def fit_model(): n_clusters = K print('') print('=' * 50) print("Let's start with {} clusters!".format(n_clusters)) print('=' * 50) print('') train_test_clu, train_test_reg, df_weather = read_dataset() clusterer = create_clusterer(n_clusters) df_clustered_list, sequence, label_df_test = clusterer.fit(train_test_clu) classifier = create_classifier() best_classifier, test_clf = classifier.fit(train_test_clu, sequence, df_weather) regressor = create_regressor() best_model_set = regressor.fit(df_clustered_list, train_test_reg) pred_for_plot, true_for_plot = evaluate(label_df_test, train_test_reg, test_clf, best_classifier, best_model_set, sequence) return
def init_train(root_dir='./', results_dir='./'): # this is the code used to launch an experiment on a dataset archive_name = 'UCRArchive_2018' dataset_name = 'MITECG' classifier_names = ['fcn', 'resnet', 'mlp', 'tlenet'] itr = '_itr_1' for classifier_name in classifier_names: print( f'Empezando entrenamiento utilizando el algoritmo {classifier_name}' ) output_directory = results_dir + '/' + classifier_name + '/' + archive_name + itr + '/' + \ dataset_name + '/' test_dir_df_metrics = output_directory + 'df_metrics.csv' print('Method: ', archive_name, dataset_name, classifier_name, itr) if os.path.exists(test_dir_df_metrics): print('Already done') else: create_directory(output_directory) datasets_dict = read_dataset(root_dir, archive_name, dataset_name) fit_classifier(datasets_dict, dataset_name, classifier_name, output_directory) print('LISTO') # the creation of this directory means create_directory(output_directory + '/DONE')
def main(): #dir_ = sys.argv[1] dir_ = '/Users/alexander/Diploma/txt_acc_nkrja/' files = utils.read_dataset(dir_) all_syllables = dict() all_syllables['UNC'] = 1 all_syllables['SEP'] = 2 all_syllables['END'] = 3 all_syllables['START'] = 4 print(len(files)) for word in itertools.chain.from_iterable(files): word = word.replace("'", '') for i in list(range(2, 5)) + [0]: for syl in syllable.SyllableTrasformer.word_to_syllables( word[-i:].lower()): if syl not in all_syllables: all_syllables[syl] = len(all_syllables) + 1 for syl in syllable.SyllableTrasformer.word_to_syllables( word[-i:]): if syl not in all_syllables: all_syllables[syl] = len(all_syllables) + 1 with open('Syllables.dict', 'w') as dict_file: for x in all_syllables: dict_file.write('{}${}\n'.format(x, all_syllables[x]))
for archive_name in archive_names: for dataset_name in mts_data_names: for classifier_name in classifier_names: output_directory = root_dir+'/results/'+classifier_name+'/'+archive_name+itr+'/'+\ dataset_name+'/' output_directory = create_directory(output_directory) print('Method: ',archive_name, dataset_name, classifier_name, itr) if output_directory is None: print('Already done') else: datasets_dict = read_dataset(root_dir,archive_name,dataset_name) fit_classifier() print('DONE') #the creation of this directory means create_directory(output_directory+'/DONE') else: # this is the code used to launch an experiment on a dataset archive_name = sys.argv[1] dataset_name = sys.argv[2] classifier_name=sys.argv[3]
Descripttion: Author: Moustafa Sadek Kahil Date: 13/11/2020 """ file = "../data/BCHI.csv" from utils.utils import read_dataset, spark, sorted_dict, sorted_cols, sorted_values, build_discrete_filters, \ discrete_columns, cols_for_clustering from utils.utils import data_dict, dict_values, dict_cols, save_data_hdfs, sorted_data, add_filter, column_to_list from vis.visualization import visualize_combinations, visualize_columns, hierarchical_visualization, \ visualize_cols_stats from clustering.kmeans import kmeans_list if __name__ == '__main__': spark = spark() data = read_dataset(file=file, spark=spark, delimiter=",") dict = data_dict(data) sorted_dict = sorted_dict(data) #print(sorted_cols(sorted_dict)) visualize_columns(dict_cols(dict), dict_values(dict), "Columns before sorting") visualize_columns(sorted_cols(sorted_dict), sorted_values(sorted_dict), "Columns After sorting") visualize_combinations(sorted_cols(sorted_dict), sorted_values(sorted_dict), "Columns Combinations")
# dataset_name = sys.argv[2] # classifier_name = sys.argv[3] # itr = sys.argv[4] output_directory = root_dir + '/results/' + classifier_name + '/' + archive_name + itr + '/' + \ dataset_name + '/' test_dir_df_metrics = output_directory + 'df_metrics.csv' print('Method: ', archive_name, dataset_name, classifier_name, itr) if args.retrain: if os.path.exists(test_dir_df_metrics): os.remove(test_dir_df_metrics) create_directory(output_directory) datasets_dict = read_dataset( root_dir, archive_name, dataset_name, args.file_ext, args.remove_docstr) fit_classifier(datasets_dict, dataset_name, args.verbose, args.val_proportion, args.do_pred_only, args.nb_epochs, args.batch_size, args.trainable_layers, args.nb_epochs_finetune, output_directory, args.min_lr) print('DONE') # the creation of this directory means create_directory(output_directory + '/DONE') else: if os.path.exists(test_dir_df_metrics): print(f'Already done in {test_dir_df_metrics}') else: print(f'Not retrain and no {test_dir_df_metrics}. Please set --retrain True')
for classifier_name in arguments.classifier_names: print('classifier_name', classifier_name) for iter in range(arguments.iterations): if classifier_name == 'emn_cv' and iter > 0: continue print('\t\titer', iter) trr = '' if iter != 0: trr = '_itr_' + str(iter) tmp_output_dir = arguments.output_path + '/results/' + classifier_name + '/UCRArchive_2018/' + trr + '/' for dataset_name in arguments.dataset_names: print('\t\t\tdataset_name: ', dataset_name) output_dir = tmp_output_dir + dataset_name + '/' create_directory(output_dir) datasets_dict = read_dataset(root_dir, dataset_name) fit_classifier() print('\t\t\t\tDONE') # the creation of this directory means create_directory(output_dir + '/DONE')
elif sys.argv[1] == 'generate_results_csv': res = utils.generate_results_csv('results.csv', ROOT_DIR) print(res.to_string()) else: # this is the code used to launch an experiment on a dataset archive_name = sys.argv[1] dataset_name = sys.argv[2] classifier_name = sys.argv[3] itr = sys.argv[4] if itr == '_itr_0': itr = '' output_directory = os.path.join(ROOT_DIR, 'results', classifier_name, archive_name + itr, dataset_name) test_dir_df_metrics = os.path.join(output_directory, 'df_metrics.csv') print('Method: ', archive_name, dataset_name, classifier_name, itr) if os.path.exists(test_dir_df_metrics): print('Already done') else: utils.create_directory(output_directory) dataset = utils.read_dataset(ROOT_DIR, archive_name, dataset_name) fit_classifier(classifier_name, dataset, output_directory) print('DONE') # the creation of this directory means utils.create_directory(os.path.join(output_directory, 'DONE'))