def launch(): DATA = read_data_sc2('../data/sequences-TZ-45.txt')[:5000] DATA = reduce_k_length(10, DATA) results = exhaustive(DATA, '1', top_k=10, enable_i=True) print_results(results)
def compare_ground_truth(): data = read_data_sc2('../data/sequences-TZ-45.txt')[:5000] data = reduce_k_length(10, data) target = '1' enable_i = True # if we want to average nb_launched = 5 pool = Pool(processes=3) iterations_limit = 50 iteration_step = 1000 data_final = {'WRAcc': [], 'iterations': [], 'Algorithm': []} # found with exaustive search ground_truth = 0.008893952000000009 for i in range(10): print('Iteration: {}'.format(i)) for i in range(nb_launched): result_ucb_opti = pool.apply_async( seq_scout, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP, 'iterations_limit': iterations_limit }) data_add_generic( data_final, WRAcc=max(0, average_results(result_ucb_opti.get())) / ground_truth, iterations=iterations_limit, Algorithm='seqscout') iterations_limit += iteration_step df = pd.DataFrame(data=data_final) sns.set(rc={'figure.figsize': (8, 6.5)}) plt.clf() ax = sns.lineplot(data=df, x='iterations', y='WRAcc', hue='Algorithm') ax.set(xlabel='iterations', ylabel='WRAcc') plt.savefig('./ground_truth/gt.png') df.to_pickle('./ground_truth/result') if SHOW: plt.show()
def launch(): DATA = read_data_sc2('../data/sequences-TZ-45.txt')[:5000] #DATA = reduce_k_length(10, DATA) # DATA = read_data_kosarak('../data/blocks.data') # DATA = read_data_kosarak('../data/skating.data') # DATA = read_data_kosarak('../data/context.data') # DATA = read_data(pathlib.Path(__file__).parent.parent / 'data/promoters.data') # DATA = read_jmlr('machin', pathlib.Path(__file__).parent.parent / 'data/jmlr/jmlr') #ITEMS = extract_items(DATA) #ITEMS, items_to_encoding, encoding_to_items = encode_items(ITEMS) #DATA = encode_data(DATA, items_to_encoding) results = seq_scout(DATA, '1', time_budget=60, top_k=5, enable_i=False, vertical=False, iterations_limit=10000) #results = seq_scout_api(DATA, '+', 10, 5) print_results(results)
somme = w_k(memo, l_max, m) for i, value in memo.items(): somme += value return somme if __name__ == '__main__': datasets = [(read_data_kosarak('../data/aslbu.data'), '195', 'aslbu'), (read_data('../data/promoters.data'), '+', 'promoters'), (read_data_kosarak('../data/blocks.data'), '1', 'blocks'), (read_data_kosarak('../data/context.data'), '4', 'context'), (read_data('../data/splice.data'), 'EI', 'splice'), (read_data_sc2('../data/sequences-TZ-45.txt')[:5000], '1', 'sc2'), (read_data_kosarak('../data/skating.data'), '1', 'skating'), (read_jmlr('svm', '../data/jmlr/jmlr'), 'svm', 'jmlr')] for dataset in datasets: DATA, class_target, dataset_name = dataset # we remove first element wich are useless for i in range(len(DATA)): DATA[i] = DATA[i][1:] #pattern_number, stages = compute_dataset_size(DATA) #print(pattern_number) # print(stages)
def seq_scout_api(dataset=conf.DATA, time_budget=conf.TIME_BUDGET, top_k=conf.TOP_K): ''' Launch seq_scout. This function is for the simplicity of the user, so that she does not needs to specify iterations number, which is here only for experiments. ''' if dataset == 'splice': data = read_data( pathlib.Path(__file__).parent.parent / 'data/splice.data') target_class = 'EI' enable_i = False elif dataset == 'alsbu': data = read_data_kosarak( pathlib.Path(__file__).parent.parent / 'data/aslbu.data') target_class = '195' enable_i = False elif dataset == 'alsbu': data = read_data_kosarak( pathlib.Path(__file__).parent.parent / 'data/blocks.data') target_class = '7' enable_i = False elif dataset == 'context': data = read_data_kosarak( pathlib.Path(__file__).parent.parent / 'data/context.data') target_class = '4' enable_i = False elif dataset == 'sc2': data = read_data_sc2( pathlib.Path(__file__).parent.parent / 'data/sequences-TZ-45.txt')[:5000] target_class = '1' enable_i = True elif dataset == 'skating': data = read_data_kosarak( pathlib.Path(__file__).parent.parent / 'data/skating.data') target_class = '1' enable_i = False elif dataset == 'jmlr': data = read_jmlr( 'svm', pathlib.Path(__file__).parent.parent / 'data/jmlr/jmlr') target_class = '+' enable_i = False else: data = read_data( pathlib.Path(__file__).parent.parent / 'data/promoters.data') target_class = '+' enable_i = False class_present = False for sequence in data: if target_class == sequence[0]: class_present = True break if not class_present: raise ValueError('The target class does not appear in data') items = extract_items(data) items, items_to_encoding, encoding_to_items = encode_items(items) data = encode_data(data, items_to_encoding) results = seq_scout(data, target_class, top_k=top_k, vertical=False, time_budget=time_budget, iterations_limit=10000000000000, enable_i=enable_i) print_results_decode(results, encoding_to_items) return results
datasets = [ (read_data_kosarak( pathlib.Path(__file__).parent.parent / 'data/aslbu.data'), '195', False), (read_data(pathlib.Path(__file__).parent.parent / 'data/promoters.data'), '+', False), (read_data_kosarak( pathlib.Path(__file__).parent.parent / 'data/blocks.data'), '7', False), (read_data_kosarak( pathlib.Path(__file__).parent.parent / 'data/context.data'), '4', False), (read_data(pathlib.Path(__file__).parent.parent / 'data/splice.data'), 'EI', False), (read_data_sc2( pathlib.Path(__file__).parent.parent / 'data/sequences-TZ-45.txt')[:5000], '1', False), (read_data_kosarak( pathlib.Path(__file__).parent.parent / 'data/skating.data'), '1', False), (read_jmlr('svm', pathlib.Path(__file__).parent.parent / 'data/jmlr/jmlr'), '+', False) ] datasets_names = [ 'aslbu', 'promoters', 'blocks', 'context', 'splice', 'sc2', 'skating', 'jmlr' ] SHOW = False