def quantile_transform(): a, b = _randint(100, 1000), _randint(1000, 1e5) c = _randchoice(['normal', 'uniform']) scaler = QuantileTransformer(n_quantiles=a, output_distribution=c, subsample=b) tmp = str(a) + "_" + str(b) + "_" + c + "_" + QuantileTransformer.__name__ return scaler, tmp
def KNN(): a = _randint(2, 25) b = _randchoice(['uniform', 'distance']) c = _randchoice(['minkowski', 'chebyshev']) if c == 'minkowski': d = _randint(1, 15) else: d = 2 model = KNeighborsClassifier(n_neighbors=a, weights=b, algorithm='auto', p=d, metric=c, n_jobs=-1) tmp = str(a) + "_" + b + "_" + c + "_" + str( d) + "_" + KNeighborsClassifier.__name__ return model, tmp
def polynomial(): a = _randint(2, 10) b = _randchoice([True, False]) c = _randchoice([True, False]) scaler = PolynomialFeatures(degree=a, interaction_only=b, include_bias=c) tmp = str(a) + "_" + str(b) + "_" + str( c) + "_" + PolynomialFeatures.__name__ return scaler, tmp
def LDA_(): a, b = _randint(100, 1000), _randint(1, 10) vect = CountVectorizer(max_df=a, min_df=b) a, b, c = _randint(10, 50), _randuniform(0, 1), _randuniform(0, 1) d, e, f = _randuniform(0.51, 1.0), _randuniform(1, 50), _randchoice( [150, 180, 210, 250, 300]) lda = LatentDirichletAllocation(n_components=a, doc_topic_prior=b, topic_word_prior=c, learning_decay=d, learning_offset=e, batch_size=f, max_iter=100, learning_method='online') tmp = str(a) + "_" + str(b) + "_" + str(c) + "_" + str(d) + "_" + str( e) + "_" + str(f) + "_" + LatentDirichletAllocation.__name__ return [vect, lda], tmp
def LR(): a = _randchoice(['l1', 'l2']) b = _randuniform(0.0, 0.1) c = _randint(1, 500) model = LogisticRegression(penalty=a, tol=b, C=float(c), solver='liblinear', multi_class='warn') tmp = a + "_" + str(round( b, 5)) + "_" + str(c) + "_" + LogisticRegression.__name__ return model, tmp
def SVM(): # from sklearn.preprocessing import MinMaxScaler # scaling = MinMaxScaler(feature_range=(-1, 1)).fit(train_data) # train_data = scaling.transform(train_data) # test_data = scaling.transform(test_data) a = _randint(1, 500) b = _randchoice(['linear', 'poly', 'rbf', 'sigmoid']) c = _randint(2, 10) d = _randuniform(0.0, 1.0) e = _randuniform(0.0, 0.1) f = _randuniform(0.0, 0.1) model = SVC(C=float(a), kernel=b, degree=c, gamma=d, coef0=e, tol=f, cache_size=20000) tmp = str(a) + "_" + b + "_" + str(c) + "_" + str(round(d, 5)) + "_" + str( round(e, 5)) + "_" + str(round(f, 5)) + "_" + SVC.__name__ return model, tmp
def RF(): a = _randint(50, 150) b = _randchoice(['gini', 'entropy']) c = _randuniform(0.0, 1.0) model = RandomForestClassifier(n_estimators=a, criterion=b, min_samples_split=c, max_features=None, min_impurity_decrease=0.0, n_jobs=-1) tmp = str(a) + "_" + b + "_" + str(round( c, 5)) + "_" + RandomForestClassifier.__name__ return model, tmp
def robust_scaler(): a, b = _randint(0, 50), _randint(51, 100) scaler = RobustScaler(quantile_range=(a, b)) tmp = str(a) + "_" + str(b) + "_" + RobustScaler.__name__ return scaler, tmp
def TF(): a, b = _randint(100, 1000), _randint(1, 10) vect = CountVectorizer(max_df=a, min_df=b) tmp = str(a) + "_" + str(b) + "_" + CountVectorizer.__name__ return vect, tmp
def TFIDF(): a, b = _randint(100, 1000), _randint(1, 10) c = _randchoice(['l1', 'l2', None]) vect = TfidfVectorizer(max_df=a, min_df=b, norm=c) tmp = str(a) + "_" + str(b) + "_" + str(c) + "_" + TfidfVectorizer.__name__ return vect, tmp