Esempio n. 1
0
from core.experiment import experiment2
from core.experiment import run_with_bayes_opt2

from core.util import dirs

#dirs.set_project('imm_labeled', 'framing')

run_with_bayes_opt2.main(['imm_labeled', 'Economic_labels', 'framing'])

"""
experiment2.run_experiment('test', 'Economic_labels', 2, -1,
                           ['ngrams,subdir=stanford,source=words,transform=binarize,min_df=4,lower=0,n=1',
                            'ngrams,subdir=stanford,source=lemmas,transform=binarize,min_df=6,lower=0,n=2',
                            'list,subdir=stanford,source=pos,transform=normalizel1',
                            'list,subdir=stanford,source=dependency_links,transform=normalizel1'],
                           'LR', 10, 0.1,
                            weight_col=1,
                            metric='f1', only_unanimous=True,
                            save_model=False)
"""
Esempio n. 2
0
def main():

    usage = "%prog project label splits_file"
    parser = OptionParser(usage=usage)
    parser.add_option('-t', dest='target_col', default=0,
                      help='Target column; default=%default')
    parser.add_option('-w', dest='weight_col', default=-1,
                      help='weight column; default=%default')

    #parser.add_option('-m', dest='model', default='LR',
    #                  help='Model: (LR|SVM|MNB|SVMNB); default=%default')

    (options, args) = parser.parse_args()
    if len(args) < 2:
        sys.exit("Please provide input arguments")

    project = args[0]
    label_file = args[1]
    splits_file = args[2]

    dirs.set_project(project, splits_file)

    target_col = int(options.target_col)
    weight_col = int(options.weight_col)

    model_type = 'LR'
    reuse = False
    verbose = 1

    unigrams = ['ngrams,n=1,transform=binarize']
    unigrams_and_bigrams = unigrams + ['ngrams,n=2,transform=binarize']
    ub_personas_old = unigrams_and_bigrams + ['pkl,subdir=personas,source=personasdpm,transform=binarize']
    ub_personas_new = unigrams_and_bigrams + ['pkl,subdir=personas,source=personas,transform=binarize']
    ub_personas_and_stories = ub_personas_new + ['pkl,subdir=personas,source=storytypesold,transform=normalizel2']
    all_feature_list = [
        'ngrams,n=1,transform=binarize',
        'ngrams,n=2,transform=binarize,min_df=2',
        'list,subdir=brown,source=brown',
        'pkl,subdir=lda,source=lda,transform=binarize',
        'pkl,subdir=personas,source=personas,transform=binarize',
        'list,subdir=stanford,source=pos,transform=binarize',
        'list,subdir=stanford,source=ner,transform=binarize',
        'list,subdir=stanford,source=dependency_links,transform=binarize,min_df=2,lower=1',
        'list,subdir=stanford,source=jkgrams,transform=binarize,min_df=2,lower=1',
        'list,subdir=stanford,source=sentiments,transform=binarize',
        'list,subdir=semafor,source=frames,transform=binarize,lower=1',
        'list,subdir=amalgram,source=ss_tags,transform=binarize,lower=1',
    ]

    #exps = [unigrams, unigrams_and_bigrams, ub_personas_old, ub_personas_new, ub_personas_and_stories, all_feature_list]
    #names = ['unigrams', 'bigrams', 'personas_dpm', 'personas_new', 'personas_and_stories', 'all_features']

    exps = [unigrams_and_bigrams, ub_personas_old, ub_personas_new, ub_personas_and_stories]
    names = ['unigrams_and_bigrams', 'personas_dpm', 'personas_new', 'personas_and_stories']

    n_eval_iters = 20
    dev_prop = 0.1

    for i, features in enumerate(exps):
        for t in range(10):
            print 'experiment', i, '; test_fold', t
            experiment2.run_experiment(name=names[i],
                                       label_file=label_file,
                                       target=target_col,
                                       test_fold=t,
                                       feature_list=features,
                                       model_type=model_type,
                                       n_eval_iters=n_eval_iters,
                                       eval_prop=dev_prop,
                                       reuse=False,
                                       verbose=verbose,
                                       weight_col=weight_col,
                                       best_alphas=None,
                                       additional_label_files=None,
                                       additional_label_weights=None,
                                       metric='f1',
                                       only_unanimous=True)  # run experiment