def main(): # Handle input options and arguments usage = "%prog project splits_file" parser = OptionParser(usage=usage) (options, args) = parser.parse_args() project = args[0] splits_file = args[1] dirs.set_project(project, splits_file) preprocess_for_brown_clustering()
def main(): usage = "%prog project label splits_file" parser = OptionParser(usage=usage) parser.add_option('-t', dest='target_col', default=0, help='Target column; default=%default') parser.add_option('-w', dest='weight_col', default=-1, help='weight column; default=%default') #parser.add_option('-m', dest='model', default='LR', # help='Model: (LR|SVM|MNB|SVMNB); default=%default') (options, args) = parser.parse_args() if len(args) < 2: sys.exit("Please provide input arguments") project = args[0] label_file = args[1] splits_file = args[2] dirs.set_project(project, splits_file) target_col = int(options.target_col) weight_col = int(options.weight_col) model_type = 'LR' reuse = False verbose = 1 unigrams = ['ngrams,n=1,transform=binarize'] unigrams_and_bigrams = unigrams + ['ngrams,n=2,transform=binarize'] ub_personas_old = unigrams_and_bigrams + ['pkl,subdir=personas,source=personasdpm,transform=binarize'] ub_personas_new = unigrams_and_bigrams + ['pkl,subdir=personas,source=personas,transform=binarize'] ub_personas_and_stories = ub_personas_new + ['pkl,subdir=personas,source=storytypesold,transform=normalizel2'] all_feature_list = [ 'ngrams,n=1,transform=binarize', 'ngrams,n=2,transform=binarize,min_df=2', 'list,subdir=brown,source=brown', 'pkl,subdir=lda,source=lda,transform=binarize', 'pkl,subdir=personas,source=personas,transform=binarize', 'list,subdir=stanford,source=pos,transform=binarize', 'list,subdir=stanford,source=ner,transform=binarize', 'list,subdir=stanford,source=dependency_links,transform=binarize,min_df=2,lower=1', 'list,subdir=stanford,source=jkgrams,transform=binarize,min_df=2,lower=1', 'list,subdir=stanford,source=sentiments,transform=binarize', 'list,subdir=semafor,source=frames,transform=binarize,lower=1', 'list,subdir=amalgram,source=ss_tags,transform=binarize,lower=1', ] #exps = [unigrams, unigrams_and_bigrams, ub_personas_old, ub_personas_new, ub_personas_and_stories, all_feature_list] #names = ['unigrams', 'bigrams', 'personas_dpm', 'personas_new', 'personas_and_stories', 'all_features'] exps = [unigrams_and_bigrams, ub_personas_old, ub_personas_new, ub_personas_and_stories] names = ['unigrams_and_bigrams', 'personas_dpm', 'personas_new', 'personas_and_stories'] n_eval_iters = 20 dev_prop = 0.1 for i, features in enumerate(exps): for t in range(10): print 'experiment', i, '; test_fold', t experiment2.run_experiment(name=names[i], label_file=label_file, target=target_col, test_fold=t, feature_list=features, model_type=model_type, n_eval_iters=n_eval_iters, eval_prop=dev_prop, reuse=False, verbose=verbose, weight_col=weight_col, best_alphas=None, additional_label_files=None, additional_label_weights=None, metric='f1', only_unanimous=True) # run experiment