Example #1
0
def parsetest(parseOption):
    import sk_handler as skh
    """
    ParseOptions:
    -------------
    COMMA_DL=0;
    SPACE_DL=1;
    LC_SINGLE=1
    LC_SEPERATE=2
    LC_SVM_A=3
    LC_SVM_B=4
    LC_SVM_C = 5
    """
    dname = '{}/data/datasets/libras/movement_libras_10.data'.format(
        os.getcwd())
    #dname = '{}/data/init/adult/adult.data'.format(os.getcwd())
    #pdb.set_trace()
    data = fp.Parser(parseOption, dname, True, .25)
    pdb.set_trace()
    target_input = data.convert_file()
    train_data, test_data = data.write_csv(target_input)
    X_train, y_train = data.split_last_column(train_data)
    X_test, y_test = data.split_last_column(test_data)
    sk = skh.sk_handler(X_train, y_train, X_test, y_test)
    duration, acc = sk.svm()
    dur2, acc2 = sk.clustering()
    pdb.set_trace()
Example #2
0
 def populate_data_from_init_folder(self):
     for dirpath, dirname, filelist in os.walk('./data/init'):
         for filename in filelist:
             if re.search(r".*[.]data$", filename):
                 print("dirpath: {}, dirname: {}, filename: {}"
                       .format(dirpath, dirname, filename))
                 dpath = '{}/{}'.format(dirpath, filename)
                 data = fp.Parser(fp.LC_SINGLE, dpath, False, .25, ',')
                 full_set = data.convert_file()
                 pdb.set_trace()
                 repo.add_dset(filename,
                               dpath,
                               full_set,
                               self.session)
Example #3
0
    def populate_learning_curves(self):
        """Populate learning_curves database table with a curve for every dataset with every algorithm"""
        import sk_handler as skh
        from random import shuffle
        try:
            d_sets = self.session.query(repo.DatasetAll).all()
        except AttributeError:
            print('Repo metabases likely not defined, defining now')
            repo.defineMeta()
            d_sets = self.session.query(repo.DatasetAll).all()

        algs = self.session.query(repo.Algorithm).all()
        for d_set in d_sets:
            print("Crafting Learning Curve for  dataset: {}".format(d_set.data_name))
            data_id = d_set.data_id
            data = fp.Parser(fp.COMMA_DL, d_set.data_path,
                             fp.TP_TRUE, per=.25)
            target_input = data.convert_file()
            shuffle(target_input)
            target_input = data.limit_size(target_input)  # limiting size of datasets for sanity
            percents = [0.1, 0.2, 0.3]

            for alg in algs:
                results = []
                train_time = 0
                alg_id = alg.alg_id
                evstring = '{}()'.format(alg.alg_path)
                for percent in percents:
                    shuffle(target_input)
                    train_data, test_data = data.partition(target_input, per=percent)
                    X_train, y_train = data.split_last_column(train_data)
                    X_test, y_test = data.split_last_column(test_data)
                    sk = skh.SkHandler(X_train, y_train, X_test, y_test)
                    print('{} evaluated at {} percent'.format(evstring, str(percent)))
                    try:
                        durr, acc = eval(evstring)
                        train_time += durr
                        results.append(acc)
                    except Exception as ex:
                        print("Could not train dataset {} with method {}: {}".format(d_set.data_path,
                                                                                     alg.alg_path,
                                                                                     ex))
                        durr, acc = [float('inf'), 0]
                        results.append(acc)

                results.append(train_time)
                repo.add_curve(data_id, alg_id, results, self.session)
Example #4
0
    def populate_data_all(self):
        all_dict = {'className': 'DatasetAll',
                    'tableName': 'all_data'}
        filelist = self.get_allowed_files()
        for dpath, filename in filelist:
            print("Adding set {} to all_sets at {}".format(filename, dpath))
            data = fp.Parser(fp.LC_SINGLE, dpath, False, .25, ',')
            target_input = data.convert_file()
            target_input = data.limit_size(target_input)  # limiting size of datasets for sanity

            try:
                repo.add_dset(all_dict['className'],
                              all_dict['tableName'],
                              filename,
                              dpath,
                              target_input,
                              self.session)
            except Exception as ex:
                print("Exception occured whilst trying to add dataset: {}".format(ex))
Example #5
0
    def populate_runs_all(self):
        """Populate runs_all database table with a run of every dataset with every algorithm"""
        import sk_handler as skh
        from random import shuffle
        try:
            d_sets = self.session.query(repo.DatasetAll).all()
        except AttributeError:
            print('Repo metabases likely not defined, defining now')
            repo.defineMeta()
            d_sets = self.session.query(repo.DatasetAll).all()

        algs = self.session.query(repo.Algorithm).all()
        for d_set in d_sets:
            print("Analyzing dataset: {}".format(d_set.data_name))
            data_id = d_set.data_id
            data = fp.Parser(fp.COMMA_DL, d_set.data_path,
                             fp.TP_TRUE, per=.25)
            target_input = data.convert_file()
            shuffle(target_input)  # keep commented while debugging
            target_input = data.limit_size(target_input)  # limiting size of datasets for sanity
            train_data, test_data = data.partition(target_input)
            X_train, y_train = data.split_last_column(train_data)
            X_test, y_test = data.split_last_column(test_data)
            sk = skh.SkHandler(X_train, y_train, X_test, y_test)
            for alg in algs:
                alg_id = alg.alg_id
                evstring = '{}()'.format(alg.alg_path)
                print(evstring)
                try:
                    durr, acc = eval(evstring)
                except Exception as ex:
                    print("Could not train dataset {} with method {}: {}".format(d_set.data_path,
                                                                                 alg.alg_path,
                                                                                 ex))
                    durr, acc = [float('inf'), 0]
                    pdb.set_trace()

                repo.add_run(data_id, alg_id, durr, acc, self.session)
Example #6
0
import fparser
import misc as appmisc
import sys

# make sure this file is being run directly
if __name__ == '__main__':
    if len(sys.argv) == 1:
        appmisc.usage(sys.argv[0])
        exit(1)
    else:
        file = appmisc.file_open(sys.argv[1])
        if (file != None):
            appparser = fparser.Parser(file)
            appparser.parse()
            appmisc.file_close(file)
        else:
            appmisc.print_err("{}: no such file or directory".format(sys.argv[1]))
            exit(1)
        pass