out_f.write(','.join(feature_builder.feature_names) + '\n') for a in articles: features = feature_builder.get_article_features(a) save_features(features, a.source, out_f) def print_data(articles, feature_builder): print(','.join(feature_builder.feature_names)) for a in articles: features = feature_builder.get_article_features(a) print_features(features, a.source) if __name__ == '__main__': dr = DataReader(sys.argv[1:]) articles = dr._make_data() articles.normalize() sent_feature_builder = SentimentFeatureBuilder() testing, training = articles.make_sets() # Print the features to STDOUT to be used as training data? # save_data(training, sent_feature_builder, "training.txt") # save_data(testing, sent_feature_builder, "testing.txt") # Try to use a linear SVC to fit? model = svm.LinearSVC() df_training = pd.read_csv("training.txt") df_testing = pd.read_csv("testing.txt") training_data = df_training.loc[:, df_training.columns != 'label'] training_label = df_training['label']