Beispiel #1
0
def fit_model():
    n_clusters = K
    print('')
    print('=' * 50)
    print("Let's start with {} clusters!".format(n_clusters))
    print('=' * 50)
    print('')

    train_test_clu, train_test_reg, df_weather = read_dataset()

    clusterer = create_clusterer(n_clusters)
    df_clustered_list, sequence, label_df_test = clusterer.fit(train_test_clu)

    classifier = create_classifier()
    best_classifier, test_clf = classifier.fit(train_test_clu, sequence,
                                               df_weather)

    regressor = create_regressor()
    best_model_set = regressor.fit(df_clustered_list, train_test_reg)

    pred_for_plot, true_for_plot = evaluate(label_df_test, train_test_reg,
                                            test_clf, best_classifier,
                                            best_model_set, sequence)

    return
Beispiel #2
0
def init_train(root_dir='./', results_dir='./'):
    # this is the code used to launch an experiment on a dataset
    archive_name = 'UCRArchive_2018'
    dataset_name = 'MITECG'
    classifier_names = ['fcn', 'resnet', 'mlp', 'tlenet']
    itr = '_itr_1'

    for classifier_name in classifier_names:
        print(
            f'Empezando entrenamiento utilizando el algoritmo {classifier_name}'
        )
        output_directory = results_dir + '/' + classifier_name + '/' + archive_name + itr + '/' + \
                           dataset_name + '/'
        test_dir_df_metrics = output_directory + 'df_metrics.csv'
        print('Method: ', archive_name, dataset_name, classifier_name, itr)
        if os.path.exists(test_dir_df_metrics):
            print('Already done')
        else:
            create_directory(output_directory)
            datasets_dict = read_dataset(root_dir, archive_name, dataset_name)
            fit_classifier(datasets_dict, dataset_name, classifier_name,
                           output_directory)
            print('LISTO')
            # the creation of this directory means
            create_directory(output_directory + '/DONE')
def main():
    #dir_ = sys.argv[1]
    dir_ = '/Users/alexander/Diploma/txt_acc_nkrja/'
    files = utils.read_dataset(dir_)
    all_syllables = dict()
    all_syllables['UNC'] = 1
    all_syllables['SEP'] = 2
    all_syllables['END'] = 3
    all_syllables['START'] = 4
    print(len(files))
    for word in itertools.chain.from_iterable(files):
        word = word.replace("'", '')
        for i in list(range(2, 5)) + [0]:
            for syl in syllable.SyllableTrasformer.word_to_syllables(
                    word[-i:].lower()):
                if syl not in all_syllables:
                    all_syllables[syl] = len(all_syllables) + 1
            for syl in syllable.SyllableTrasformer.word_to_syllables(
                    word[-i:]):
                if syl not in all_syllables:
                    all_syllables[syl] = len(all_syllables) + 1
    with open('Syllables.dict', 'w') as dict_file:
        for x in all_syllables:
            dict_file.write('{}${}\n'.format(x, all_syllables[x]))
Beispiel #4
0
    
    for archive_name in archive_names:
        for dataset_name in mts_data_names:
            for classifier_name in classifier_names:
                output_directory = root_dir+'/results/'+classifier_name+'/'+archive_name+itr+'/'+\
                                    dataset_name+'/'

                output_directory = create_directory(output_directory)

                print('Method: ',archive_name, dataset_name, classifier_name, itr)

                if output_directory is None: 
                    print('Already done')
                else: 

                    datasets_dict = read_dataset(root_dir,archive_name,dataset_name)

                    fit_classifier()

                    print('DONE')

                    #the creation of this directory means
                    create_directory(output_directory+'/DONE')



else:
    # this is the code used to launch an experiment on a dataset
    archive_name = sys.argv[1]
    dataset_name = sys.argv[2]
    classifier_name=sys.argv[3]
Descripttion:
Author: Moustafa Sadek Kahil
Date: 13/11/2020
"""

file = "../data/BCHI.csv"
from utils.utils import read_dataset, spark, sorted_dict, sorted_cols, sorted_values, build_discrete_filters, \
    discrete_columns, cols_for_clustering
from utils.utils import data_dict, dict_values, dict_cols, save_data_hdfs, sorted_data, add_filter, column_to_list
from vis.visualization import visualize_combinations, visualize_columns, hierarchical_visualization, \
    visualize_cols_stats
from clustering.kmeans import kmeans_list

if __name__ == '__main__':
    spark = spark()
    data = read_dataset(file=file, spark=spark, delimiter=",")

    dict = data_dict(data)

    sorted_dict = sorted_dict(data)

    #print(sorted_cols(sorted_dict))

    visualize_columns(dict_cols(dict), dict_values(dict),
                      "Columns before sorting")

    visualize_columns(sorted_cols(sorted_dict), sorted_values(sorted_dict),
                      "Columns After sorting")

    visualize_combinations(sorted_cols(sorted_dict),
                           sorted_values(sorted_dict), "Columns Combinations")
Beispiel #6
0
        # dataset_name = sys.argv[2]
        # classifier_name = sys.argv[3]
        # itr = sys.argv[4]

        output_directory = root_dir + '/results/' + classifier_name + '/' + archive_name + itr + '/' + \
            dataset_name + '/'

        test_dir_df_metrics = output_directory + 'df_metrics.csv'

        print('Method: ', archive_name, dataset_name, classifier_name, itr)

        if args.retrain:
            if os.path.exists(test_dir_df_metrics):
                os.remove(test_dir_df_metrics)

            create_directory(output_directory)
            datasets_dict = read_dataset(
                root_dir, archive_name, dataset_name, args.file_ext, args.remove_docstr)
            fit_classifier(datasets_dict, dataset_name, args.verbose, args.val_proportion, args.do_pred_only, args.nb_epochs,
                           args.batch_size, args.trainable_layers, args.nb_epochs_finetune, output_directory, args.min_lr)

            print('DONE')

            # the creation of this directory means
            create_directory(output_directory + '/DONE')
        else:
            if os.path.exists(test_dir_df_metrics):
                print(f'Already done in {test_dir_df_metrics}')
            else:
                print(f'Not retrain and no {test_dir_df_metrics}. Please set --retrain True')
Beispiel #7
0
    for classifier_name in arguments.classifier_names:
        print('classifier_name', classifier_name)

        for iter in range(arguments.iterations):
            if classifier_name == 'emn_cv' and iter > 0:
                continue

            print('\t\titer', iter)

            trr = ''
            if iter != 0:
                trr = '_itr_' + str(iter)

            tmp_output_dir = arguments.output_path + '/results/' + classifier_name + '/UCRArchive_2018/' + trr + '/'

            for dataset_name in arguments.dataset_names:
                print('\t\t\tdataset_name: ', dataset_name)

                output_dir = tmp_output_dir + dataset_name + '/'

                create_directory(output_dir)

                datasets_dict = read_dataset(root_dir, dataset_name)

                fit_classifier()

                print('\t\t\t\tDONE')

                # the creation of this directory means
                create_directory(output_dir + '/DONE')
Beispiel #8
0
elif sys.argv[1] == 'generate_results_csv':
    res = utils.generate_results_csv('results.csv', ROOT_DIR)
    print(res.to_string())
else:
    # this is the code used to launch an experiment on a dataset
    archive_name = sys.argv[1]
    dataset_name = sys.argv[2]
    classifier_name = sys.argv[3]
    itr = sys.argv[4]

    if itr == '_itr_0':
        itr = ''

    output_directory = os.path.join(ROOT_DIR, 'results', classifier_name,
                                    archive_name + itr, dataset_name)
    test_dir_df_metrics = os.path.join(output_directory, 'df_metrics.csv')

    print('Method: ', archive_name, dataset_name, classifier_name, itr)

    if os.path.exists(test_dir_df_metrics):
        print('Already done')
    else:
        utils.create_directory(output_directory)
        dataset = utils.read_dataset(ROOT_DIR, archive_name, dataset_name)

        fit_classifier(classifier_name, dataset, output_directory)
        print('DONE')

        # the creation of this directory means
        utils.create_directory(os.path.join(output_directory, 'DONE'))