コード例 #1
0
from sklearn.preprocessing import FunctionTransformer
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.linear_model import SGDClassifier, LogisticRegression

file = write.initFile("ex12-linearSVC-part2")

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')

###############################################################################
# Load
strength = 'soft'

#data = pd.read_csv('../../TextFiles/data/tcp_train.csv', sep='\t')
data = ptd.getTrainingData()
data = data[data.Stance != 'NONE']

cv = StratifiedKFold(data.Stance, n_folds=10, shuffle=True, random_state=1)

print("%d training documents" % len(data.Abstract))
write.writeTextToFile("%d training documents" % len(data.Abstract), file)
print("%d categories" % 3)
write.writeTextToFile("%d categories" % 3, file)
print()

###############################################################################
# Classifiers
# MultinomialNB(), BernoulliNB(), SVM(), LinearSVM(), SGDClassifier(), LogisticRegression()
clf = MultinomialNB()
コード例 #2
0
rates = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
training_scores = []
validation_scores = []

for downsample_rate_favor in rates:
    tmp = []
    tmp2 = []
    for downsample_rate_none in rates:
        print 120 * '*'
        # ***** LOAD DATA   *****
        if use_downsample:
            print("using down sampling")
            print 'Downsample favor: ' + str(downsample_rate_favor)
            print 'Downsample none: ' + str(downsample_rate_none)
            train_data = ptd.getTrainingData()
            validate_data = ptd.getValidationData()
            #test_data = ptd.getTestData()
            sub_none = ptd.getDownsample2_0(train_data, "NONE", strength,
                                            downsample_rate_none)
            sub_favor = ptd.getDownsample2_0(train_data, "FAVOR", strength,
                                             downsample_rate_favor)
            against = train_data[train_data.Stance == "AGAINST"]

            train_data = pd.concat([sub_favor, sub_none, against])

        else:
            print("using nothing")
            train_data = ptd.getTrainingData()
            validate_data = ptd.getValidationData()
            test_data = ptd.getTestData()