def execute(language): language = language data = Dataset(language) print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.devset))) baseline = Baseline(language) estimator = SVC(gamma=300) title = 'Spanish Learning Curves (SVM, γ=300)' X, y = baseline.train(data.trainset) plot_learning_curve(estimator, title, X, y, ylim=None, n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5)) predictions = baseline.test(data.devset) gold_labels = [sent['gold_label'] for sent in data.devset] target_words = [sent['target_word'] for sent in data.devset] prediction = [] for i in predictions: prediction.append(i) df = pd.DataFrame(columns=['target_word', 'prediction']) df["target_word"] = target_words df['gold_label'] = gold_labels df['prediction'] = prediction df.to_csv('out_s2.csv') report_score(gold_labels, predictions)