def test_foldsplitter(self): taskids = np.array([0 for _ in range(6)] + [1 for _ in range(6)]) times = np.array(range(6) + range(6)) X = np.transpose(np.vstack((times, taskids))) np.testing.assert_array_equal(list(foldsplitter(X, -1, [1, 2])), [ (np.array([ True, False, False, False, False, False, True, True, True, True, True, True ], dtype=bool), np.array([ False, True, True, True, True, True, False, False, False, False, False, False ], dtype=bool)), (np.array([ True, True, False, False, False, False, True, True, True, True, True, True ], dtype=bool), np.array([ False, False, True, True, True, True, False, False, False, False, False, False ], dtype=bool)), (np.array([ True, True, True, True, True, True, True, False, False, False, False, False ], dtype=bool), np.array([ False, False, False, False, False, False, False, True, True, True, True, True ], dtype=bool)), (np.array([ True, True, True, True, True, True, True, True, False, False, False, False ], dtype=bool), np.array([ False, False, False, False, False, False, False, False, True, True, True, True ], dtype=bool)) ])
def test_foldsplitter(self): taskids=np.array([0 for _ in range(6)]+[1 for _ in range(6)]) times=np.array(range(6)+range(6)) X=np.transpose(np.vstack((times, taskids))) np.testing.assert_array_equal(list(foldsplitter(X, -1, [1, 2])), [(np.array([ True, False, False, False, False, False, True, True, True, True, True, True], dtype=bool), np.array([False, True, True, True, True, True, False, False, False, False, False, False], dtype=bool)), (np.array([ True, True, False, False, False, False, True, True, True, True, True, True], dtype=bool), np.array([False, False, True, True, True, True, False, False, False, False, False, False], dtype=bool)), (np.array([ True, True, True, True, True, True, True, False, False, False, False, False], dtype=bool), np.array([False, False, False, False, False, False, False, True, True, True, True, True], dtype=bool)), (np.array([ True, True, True, True, True, True, True, True, False, False, False, False], dtype=bool), np.array([False, False, False, False, False, False, False, False, True, True, True, True], dtype=bool))])
methodname=sys.argv[2] train_set_ratios=[int(sys.argv[3])] fname=sys.argv[4] random_restarts=int(sys.argv[5]) filter_retweets=bool(int(sys.argv[6])) if len(sys.argv)>=8: #if random number generator seed has been passed seed=int(sys.argv[7]) import numpy as np np.random.seed(seed) else: initialize_seed_with_currtime() X, y, header = load_data(fname, labels_to_keep=LABELS) _, _, postprocessed_task_column_id, _=extract_feature_indices(header) splitter = foldsplitter(X, postprocessed_task_column_id, train_set_ratios) evaluation_measures = [sklearn.metrics.accuracy_score] tasks_number=len(set(X[:, postprocessed_task_column_id])) methodsmultitask, methodnamesmultitask = get_methods_multitask(tasks_number, header, random_restarts=random_restarts) methodssingletask, methodnamessingletask = get_methods_singletask(header, random_restarts=random_restarts) if methodname != None: #if we are interested in keeping only one method methodnamesmultitask, methodsmultitask = filter_methods(methodnamesmultitask, methodsmultitask, methodname) methodnamessingletask, methodssingletask = filter_methods(methodnamessingletask, methodssingletask, methodname) experiment = Experiment(X, y, train_set_ratios, foldtorun, splitter, evaluation_measures, methodnamesmultitask, methodsmultitask, methodnamessingletask, methodssingletask, print_metrics=print_metrics_multiclass, random_restarts=random_restarts, results={}, header=header, filter_retweets=filter_retweets) experiment.run()
methodname = sys.argv[2] train_set_ratios = [int(sys.argv[3])] fname = sys.argv[4] random_restarts = int(sys.argv[5]) filter_retweets = bool(int(sys.argv[6])) if len(sys.argv) >= 8: #if random number generator seed has been passed seed = int(sys.argv[7]) import numpy as np np.random.seed(seed) else: initialize_seed_with_currtime() X, y, header = load_data(fname, labels_to_keep=LABELS) _, _, postprocessed_task_column_id, _ = extract_feature_indices(header) splitter = foldsplitter(X, postprocessed_task_column_id, train_set_ratios) evaluation_measures = [sklearn.metrics.accuracy_score] tasks_number = len(set(X[:, postprocessed_task_column_id])) methodsmultitask, methodnamesmultitask = get_methods_multitask( tasks_number, header, random_restarts=random_restarts) methodssingletask, methodnamessingletask = get_methods_singletask( header, random_restarts=random_restarts) if methodname != None: #if we are interested in keeping only one method methodnamesmultitask, methodsmultitask = filter_methods( methodnamesmultitask, methodsmultitask, methodname) methodnamessingletask, methodssingletask = filter_methods( methodnamessingletask, methodssingletask, methodname)