예제 #1
0
def execute(language):
    language = language
    data = Dataset(language)
    print("{}: {} training - {} dev".format(language, len(data.trainset),
                                            len(data.devset)))

    baseline = Baseline(language)
    estimator = SVC(gamma=300)
    title = 'Spanish Learning Curves (SVM, γ=300)'
    X, y = baseline.train(data.trainset)
    plot_learning_curve(estimator,
                        title,
                        X,
                        y,
                        ylim=None,
                        n_jobs=1,
                        train_sizes=np.linspace(.1, 1.0, 5))

    predictions = baseline.test(data.devset)

    gold_labels = [sent['gold_label'] for sent in data.devset]

    target_words = [sent['target_word'] for sent in data.devset]
    prediction = []
    for i in predictions:
        prediction.append(i)
    df = pd.DataFrame(columns=['target_word', 'prediction'])
    df["target_word"] = target_words
    df['gold_label'] = gold_labels
    df['prediction'] = prediction
    df.to_csv('out_s2.csv')
    report_score(gold_labels, predictions)