def test_foldsplitter(self):
     taskids = np.array([0 for _ in range(6)] + [1 for _ in range(6)])
     times = np.array(range(6) + range(6))
     X = np.transpose(np.vstack((times, taskids)))
     np.testing.assert_array_equal(list(foldsplitter(X, -1, [1, 2])), [
         (np.array([
             True, False, False, False, False, False, True, True, True,
             True, True, True
         ],
                   dtype=bool),
          np.array([
              False, True, True, True, True, True, False, False, False,
              False, False, False
          ],
                   dtype=bool)),
         (np.array([
             True, True, False, False, False, False, True, True, True, True,
             True, True
         ],
                   dtype=bool),
          np.array([
              False, False, True, True, True, True, False, False, False,
              False, False, False
          ],
                   dtype=bool)),
         (np.array([
             True, True, True, True, True, True, True, False, False, False,
             False, False
         ],
                   dtype=bool),
          np.array([
              False, False, False, False, False, False, False, True, True,
              True, True, True
          ],
                   dtype=bool)),
         (np.array([
             True, True, True, True, True, True, True, True, False, False,
             False, False
         ],
                   dtype=bool),
          np.array([
              False, False, False, False, False, False, False, False, True,
              True, True, True
          ],
                   dtype=bool))
     ])
 def test_foldsplitter(self):
     taskids=np.array([0 for _ in range(6)]+[1 for _ in range(6)])
     times=np.array(range(6)+range(6))
     X=np.transpose(np.vstack((times, taskids)))
     np.testing.assert_array_equal(list(foldsplitter(X, -1, [1, 2])), 
                                   [(np.array([ True, False, False, False, False, False,  True,  True,  True,
                                               True,  True,  True], dtype=bool), 
                                     np.array([False,  True,  True,  True,  True,  True, False, False, False,
                                               False, False, False], dtype=bool)), 
                                    (np.array([ True,  True, False, False, False, False,  True,  True,  True,
                                               True,  True,  True], dtype=bool), 
                                     np.array([False, False,  True,  True,  True,  True, False, False, False,
                                               False, False, False], dtype=bool)), 
                                    (np.array([ True,  True,  True,  True,  True,  True,  True, False, False,
                                               False, False, False], dtype=bool), 
                                     np.array([False, False, False, False, False, False, False,  True,  True,
                                               True,  True,  True], dtype=bool)), 
                                    (np.array([ True,  True,  True,  True,  True,  True,  True,  True, False,
                                               False, False, False], dtype=bool), 
                                     np.array([False, False, False, False, False, False, False, False,  True,
                                               True,  True,  True], dtype=bool))])
methodname=sys.argv[2]
train_set_ratios=[int(sys.argv[3])]
fname=sys.argv[4]
random_restarts=int(sys.argv[5])
filter_retweets=bool(int(sys.argv[6]))
if len(sys.argv)>=8:
    #if random number generator seed has been passed
    seed=int(sys.argv[7])
    import numpy as np
    np.random.seed(seed)
else:
    initialize_seed_with_currtime()

X, y, header = load_data(fname, labels_to_keep=LABELS)
_, _, postprocessed_task_column_id, _=extract_feature_indices(header)
splitter = foldsplitter(X, postprocessed_task_column_id, train_set_ratios)
evaluation_measures = [sklearn.metrics.accuracy_score]
tasks_number=len(set(X[:, postprocessed_task_column_id]))

methodsmultitask, methodnamesmultitask = get_methods_multitask(tasks_number, header, random_restarts=random_restarts)
methodssingletask, methodnamessingletask = get_methods_singletask(header, random_restarts=random_restarts)

if methodname != None:
    #if we are interested in keeping only one method
    methodnamesmultitask, methodsmultitask = filter_methods(methodnamesmultitask, methodsmultitask, methodname)
    methodnamessingletask, methodssingletask = filter_methods(methodnamessingletask, methodssingletask, methodname)

experiment = Experiment(X, y, train_set_ratios, foldtorun, splitter, evaluation_measures, methodnamesmultitask, methodsmultitask, 
                        methodnamessingletask, methodssingletask, print_metrics=print_metrics_multiclass, 
                        random_restarts=random_restarts, results={}, header=header, filter_retweets=filter_retweets)
experiment.run()
Exemple #4
0
methodname = sys.argv[2]
train_set_ratios = [int(sys.argv[3])]
fname = sys.argv[4]
random_restarts = int(sys.argv[5])
filter_retweets = bool(int(sys.argv[6]))
if len(sys.argv) >= 8:
    #if random number generator seed has been passed
    seed = int(sys.argv[7])
    import numpy as np
    np.random.seed(seed)
else:
    initialize_seed_with_currtime()

X, y, header = load_data(fname, labels_to_keep=LABELS)
_, _, postprocessed_task_column_id, _ = extract_feature_indices(header)
splitter = foldsplitter(X, postprocessed_task_column_id, train_set_ratios)
evaluation_measures = [sklearn.metrics.accuracy_score]
tasks_number = len(set(X[:, postprocessed_task_column_id]))

methodsmultitask, methodnamesmultitask = get_methods_multitask(
    tasks_number, header, random_restarts=random_restarts)
methodssingletask, methodnamessingletask = get_methods_singletask(
    header, random_restarts=random_restarts)

if methodname != None:
    #if we are interested in keeping only one method
    methodnamesmultitask, methodsmultitask = filter_methods(
        methodnamesmultitask, methodsmultitask, methodname)
    methodnamessingletask, methodssingletask = filter_methods(
        methodnamessingletask, methodssingletask, methodname)