Ejemplo n.º 1
0
def run(analyzer, ngrams, lowercase, stop_words, max_df, min_df, norm, use_idf,
        smooth_idf, sublinear_tf):
    def train_model(X_train, y_train, stat_model):
        #stat_model.optimize_hyperparameters(X_train, y_train, folds=5)
        # params = stat_model.best_params

        params = {'kernel': 'rbf', 'C': 10, 'probability': True, 'gamma': 0.1}
        #params = {'kernel': 'rbf', 'C': 1, 'probability': True, 'gamma': 0.001} #with scale
        #params = {'alpha': alpha}
        #print params

        stat_model.train(X_train, y_train, params)

    def test_model(stat_model, X_test, y_test, fscore_list):
        y_true, y_pred = y_test, stat_model.predict(X_test)

        fscore_list.append(accuracy_score(y_true, y_pred))
        print(fscore_list[-1])

    # load data
    # data
    data = News()
    #data = Spam()
    data_train_x, data_train_y = data.get_train()
    data_test_x, data_test_y = data.get_test()

    #print newsgroups_train

    featurizer = TFIDF(analyzer, ngrams, lowercase, stop_words, max_df, min_df,
                       norm, use_idf, smooth_idf, sublinear_tf)
    (x, y, X_test, y_test) = featurizer.featurize(data_train_x, data_train_y,
                                                  data_test_x, data_test_y)
    #incremental training

    stat_model = SVC_Model()
    #stat_model = NaiveBayes()

    X_train = None
    y_train = []

    accuracy_list = [0]

    start_time = time.time()

    #print x.shape[0]

    # first model

    train_model(x, y, stat_model)
    test_model(stat_model, X_test, y_test, accuracy_list)

    #print featurizer.pipeline.get_params()

    print("--- %s seconds ---" % (time.time() - start_time))

    return stat_model.model.score(X_test, y_test)
Ejemplo n.º 2
0
    #params = {'kernel': 'rbf', 'C': 1, 'probability': True, 'gamma': 0.001} #with scale
    params = {'alpha': 0.01}
    print params

    stat_model.train(X_train, y_train, params)


def test_model(stat_model, X_test, y_test, fscore_list):
    y_true, y_pred = y_test, stat_model.predict(X_test)

    fscore_list.append(accuracy_score(y_true, y_pred))
    print(fscore_list[-1])


# data
data = News()
#data = Spam()
data_train_x, data_train_y = data.get_train()
data_test_x, data_test_y = data.get_test()

random_seed = 42

# specify TF-IDF
featurizer = TFIDF()
(x, y, X_test, y_test) = featurizer.featurize(data_train_x, data_train_y,
                                              data_test_x, data_test_y)

print x.shape[0]
'''
y_encoder = LabelBinarizer()
y_encoder.fit(y)
Ejemplo n.º 3
0
    train_start_time = time.time()
    stat_model.partial_train(X_train, y_train, params)
    traintime.append((time.time() - train_start_time))


def test_model(stat_model, X_test, y_test, fscore_list):
    y_true, y_pred = y_test, stat_model.predict(X_test)

    predictive_accuracy = np.mean(y_pred == y_true)
    fscore_list.append(predictive_accuracy)
    print(predictive_accuracy)


# load data
# data
data = News()
#data = Spam()
data_train_x, data_train_y = data.get_train()
data_test_x, data_test_y = data.get_test()

random_seed = 42

#print newsgroups_train

featurizer = TFIDF()
(x, y, X_test, y_test) = featurizer.featurize(data_train_x, data_train_y,
                                              data_test_x, data_test_y)

all_list = []
time_list_all = []