예제 #1
0
def load_caches():
    polarity.load_cached_polarity()
    Sentence.load_cached_parses()
    Sentence.load_cached_concepts()
def main():
    polarity.load_cached_polarity()
    original_read_input()
    results = load_results(SAMPlE_DATA_PATH)
    gold_samples = filter(lambda x: x["type"] == "manual_label" and x["food"] == foodName, results)

    positve_examples = filter(lambda x: x["rating"] in (1, 2) , gold_samples)
    negative_examples = filter(lambda x: x["rating"] in (-1, -2), gold_samples)
    neutral_examples = filter(lambda x: x["rating"] == 0, gold_samples)

    # for now, let's do the first 10 sentences
    gold_samples = gold_samples[0:280]
    for i in range(len(gold_samples)):
        print str(i) + '/' + str(len(gold_samples))
        gold_sample = gold_samples[i]
        setup(gold_sample, i)

    cArray = []
    for i in range(-15,15):
        cArray.append(math.pow(2,i))

    gammaArray = []
    for i in range(-5,0):
        gammaArray.append(math.pow(10,i))

    coeffArray = []
    for i in range(-5,0):
        coeffArray.append(math.pow(10,i))

    # Set the parameters by cross-validation
    tuned_parameters = [{'kernel': ['rbf'], 'gamma': gammaArray,
                         'C': cArray},
                         {'kernel': ['poly'], 'gamma' : gammaArray, 
                             'C': cArray, 'coef0': coeffArray},
                        {'kernel': ['linear'], 'C': cArray}]

    global x
    #x = preprocessing.scale(x)
    x[:,0] = 1

    # negativeWeight = len(y) / float(len(filter(lambda x : x == -1, y)))
    # zeroWeight = len(y) / float(len(filter(lambda x : x == 0, y)))
    # positiveWeight = len(y) / float(len(filter(lambda x : x == 1, y)))
    negativeWeight = 1
    zeroWeight = 1
    positiveWeight = 1
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(x, y, test_size=0.5, random_state=0)
    clf = GridSearchCV(SVC(class_weight={-1: negativeWeight, 0: zeroWeight, 1:
        positiveWeight}), tuned_parameters,
            cv=5, scoring='f1',
            n_jobs=16 ) 
    clf.fit(X_train, y_train)
    print("Best parameters set found on development set:")
    print()
    print(clf.best_estimator_)
    print()
   # print("Grid scores on development set:")
   # print()
   # for params, mean_score, scores in clf.grid_scores_:
   #     print("%0.3f (+/-%0.03f) for %r"
   #           % (mean_score, scores.std() / 2, params))
   # print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()