Ejemplo n.º 1
0
def get_methods_singletask(header, random_restarts=-1):
    FEATURES_BOW, FEATURES_BROWN, _, _=extract_feature_indices(header)
    GPCONSTRUCTOR=lambda kernel_constructor, name, random_restarts: MCGP(kernel_constructor=kernel_constructor, labels=LABELS,
                                                                         name=name, random_restarts=random_restarts)
        
    methodsmultitask=[
             lambda: SklearnBaseline(lambda: DummyClassifier("most_frequent"), "MostFrequent", [0]),
             lambda: GPCONSTRUCTOR(kernel_constructor=lambda: single_task_kernel(FEATURES_BOW, False, "BOW"), 
                                   name="BOWGPjoinedfeatures", 
                                   random_restarts=random_restarts),
             lambda: GPCONSTRUCTOR(kernel_constructor=lambda: single_task_kernel(FEATURES_BROWN, False, "BROWN"), 
                                   name="BROWNGPjoinedfeatures", 
                                   random_restarts=random_restarts)
             ]
    return methodsmultitask, map(lambda x: x().name, methodsmultitask)
Ejemplo n.º 2
0
 def __init__(self, X, y, train_set_ratios, foldtorun, splitter, evaluation_measures, 
              methodnamesmultitask, methodsmultitask, methodnamessingletask, methodssingletask, print_metrics,
              header, random_restarts=-1, results={}, filter_retweets=True):
     self.X = X
     self.y = y
     self.methodnamesmultitask = methodnamesmultitask
     self.methodsmultitask = methodsmultitask
     self.methodnamessingletask = methodnamessingletask
     self.methodssingletask = methodssingletask
     self.foldtorun = foldtorun
     self.splitter = splitter
     self.evaluation_measures=evaluation_measures
     self.print_metrics = print_metrics
     self.results = results
     self.header=header
     _, _, self.postprocessed_task_column_id, self.rttypecol_processed_column_id=extract_feature_indices(header)
     self.methodnames_all = self.methodnamesmultitask+self.methodnamessingletask
     self.filter_retweets = filter_retweets
Ejemplo n.º 3
0
foldtorun=int(sys.argv[1])
methodname=sys.argv[2]
train_set_ratios=[int(sys.argv[3])]
fname=sys.argv[4]
random_restarts=int(sys.argv[5])
filter_retweets=bool(int(sys.argv[6]))
if len(sys.argv)>=8:
    #if random number generator seed has been passed
    seed=int(sys.argv[7])
    import numpy as np
    np.random.seed(seed)
else:
    initialize_seed_with_currtime()

X, y, header = load_data(fname, labels_to_keep=LABELS)
_, _, postprocessed_task_column_id, _=extract_feature_indices(header)
splitter = foldsplitter(X, postprocessed_task_column_id, train_set_ratios)
evaluation_measures = [sklearn.metrics.accuracy_score]
tasks_number=len(set(X[:, postprocessed_task_column_id]))

methodsmultitask, methodnamesmultitask = get_methods_multitask(tasks_number, header, random_restarts=random_restarts)
methodssingletask, methodnamessingletask = get_methods_singletask(header, random_restarts=random_restarts)

if methodname != None:
    #if we are interested in keeping only one method
    methodnamesmultitask, methodsmultitask = filter_methods(methodnamesmultitask, methodsmultitask, methodname)
    methodnamessingletask, methodssingletask = filter_methods(methodnamessingletask, methodssingletask, methodname)

experiment = Experiment(X, y, train_set_ratios, foldtorun, splitter, evaluation_measures, methodnamesmultitask, methodsmultitask, 
                        methodnamessingletask, methodssingletask, print_metrics=print_metrics_multiclass, 
                        random_restarts=random_restarts, results={}, header=header, filter_retweets=filter_retweets)
Ejemplo n.º 4
0
foldtorun = int(sys.argv[1])
methodname = sys.argv[2]
train_set_ratios = [int(sys.argv[3])]
fname = sys.argv[4]
random_restarts = int(sys.argv[5])
filter_retweets = bool(int(sys.argv[6]))
if len(sys.argv) >= 8:
    #if random number generator seed has been passed
    seed = int(sys.argv[7])
    import numpy as np
    np.random.seed(seed)
else:
    initialize_seed_with_currtime()

X, y, header = load_data(fname, labels_to_keep=LABELS)
_, _, postprocessed_task_column_id, _ = extract_feature_indices(header)
splitter = foldsplitter(X, postprocessed_task_column_id, train_set_ratios)
evaluation_measures = [sklearn.metrics.accuracy_score]
tasks_number = len(set(X[:, postprocessed_task_column_id]))

methodsmultitask, methodnamesmultitask = get_methods_multitask(
    tasks_number, header, random_restarts=random_restarts)
methodssingletask, methodnamessingletask = get_methods_singletask(
    header, random_restarts=random_restarts)

if methodname != None:
    #if we are interested in keeping only one method
    methodnamesmultitask, methodsmultitask = filter_methods(
        methodnamesmultitask, methodsmultitask, methodname)
    methodnamessingletask, methodssingletask = filter_methods(
        methodnamessingletask, methodssingletask, methodname)