Beispiel #1
0
def LDA_():
    a, b = _randint(100, 1000), _randint(1, 10)
    vect = CountVectorizer(max_df=a, min_df=b)
    a, b, c = _randint(10, 50), _randuniform(0, 1), _randuniform(0, 1)
    d, e, f = _randuniform(0.51, 1.0), _randuniform(1, 50), _randchoice(
        [150, 180, 210, 250, 300])
    lda = LatentDirichletAllocation(n_components=a,
                                    doc_topic_prior=b,
                                    topic_word_prior=c,
                                    learning_decay=d,
                                    learning_offset=e,
                                    batch_size=f,
                                    max_iter=100,
                                    learning_method='online')
    tmp = str(a) + "_" + str(b) + "_" + str(c) + "_" + str(d) + "_" + str(
        e) + "_" + str(f) + "_" + LatentDirichletAllocation.__name__
    return [vect, lda], tmp
Beispiel #2
0
def DT():
    a = _randuniform(0.0, 1.0)
    b = _randchoice(['gini', 'entropy'])
    c = _randchoice(['best', 'random'])
    model = DecisionTreeClassifier(criterion=b,
                                   splitter=c,
                                   min_samples_split=a,
                                   max_features=None,
                                   min_impurity_decrease=0.0)
    tmp = str(a) + "_" + b + "_" + c + "_" + DecisionTreeClassifier.__name__
    return model, tmp
Beispiel #3
0
def LR():
    a = _randchoice(['l1', 'l2'])
    b = _randuniform(0.0, 0.1)
    c = _randint(1, 500)
    model = LogisticRegression(penalty=a,
                               tol=b,
                               C=float(c),
                               solver='liblinear',
                               multi_class='warn')
    tmp = a + "_" + str(round(
        b, 5)) + "_" + str(c) + "_" + LogisticRegression.__name__
    return model, tmp
Beispiel #4
0
def SVM():
    # from sklearn.preprocessing import MinMaxScaler
    # scaling = MinMaxScaler(feature_range=(-1, 1)).fit(train_data)
    # train_data = scaling.transform(train_data)
    # test_data = scaling.transform(test_data)
    a = _randint(1, 500)
    b = _randchoice(['linear', 'poly', 'rbf', 'sigmoid'])
    c = _randint(2, 10)
    d = _randuniform(0.0, 1.0)
    e = _randuniform(0.0, 0.1)
    f = _randuniform(0.0, 0.1)
    model = SVC(C=float(a),
                kernel=b,
                degree=c,
                gamma=d,
                coef0=e,
                tol=f,
                cache_size=20000)
    tmp = str(a) + "_" + b + "_" + str(c) + "_" + str(round(d, 5)) + "_" + str(
        round(e, 5)) + "_" + str(round(f, 5)) + "_" + SVC.__name__
    return model, tmp
Beispiel #5
0
def RF():
    a = _randint(50, 150)
    b = _randchoice(['gini', 'entropy'])
    c = _randuniform(0.0, 1.0)
    model = RandomForestClassifier(n_estimators=a,
                                   criterion=b,
                                   min_samples_split=c,
                                   max_features=None,
                                   min_impurity_decrease=0.0,
                                   n_jobs=-1)
    tmp = str(a) + "_" + b + "_" + str(round(
        c, 5)) + "_" + RandomForestClassifier.__name__
    return model, tmp
Beispiel #6
0
def binarize():
    a = _randuniform(0, 100)
    scaler = Binarizer(threshold=a)
    tmp = str(round(a, 4)) + "_" + Binarizer.__name__
    return scaler, tmp