def parsetest(parseOption): import sk_handler as skh """ ParseOptions: ------------- COMMA_DL=0; SPACE_DL=1; LC_SINGLE=1 LC_SEPERATE=2 LC_SVM_A=3 LC_SVM_B=4 LC_SVM_C = 5 """ dname = '{}/data/datasets/libras/movement_libras_10.data'.format( os.getcwd()) #dname = '{}/data/init/adult/adult.data'.format(os.getcwd()) #pdb.set_trace() data = fp.Parser(parseOption, dname, True, .25) pdb.set_trace() target_input = data.convert_file() train_data, test_data = data.write_csv(target_input) X_train, y_train = data.split_last_column(train_data) X_test, y_test = data.split_last_column(test_data) sk = skh.sk_handler(X_train, y_train, X_test, y_test) duration, acc = sk.svm() dur2, acc2 = sk.clustering() pdb.set_trace()
def populate_data_from_init_folder(self): for dirpath, dirname, filelist in os.walk('./data/init'): for filename in filelist: if re.search(r".*[.]data$", filename): print("dirpath: {}, dirname: {}, filename: {}" .format(dirpath, dirname, filename)) dpath = '{}/{}'.format(dirpath, filename) data = fp.Parser(fp.LC_SINGLE, dpath, False, .25, ',') full_set = data.convert_file() pdb.set_trace() repo.add_dset(filename, dpath, full_set, self.session)
def populate_learning_curves(self): """Populate learning_curves database table with a curve for every dataset with every algorithm""" import sk_handler as skh from random import shuffle try: d_sets = self.session.query(repo.DatasetAll).all() except AttributeError: print('Repo metabases likely not defined, defining now') repo.defineMeta() d_sets = self.session.query(repo.DatasetAll).all() algs = self.session.query(repo.Algorithm).all() for d_set in d_sets: print("Crafting Learning Curve for dataset: {}".format(d_set.data_name)) data_id = d_set.data_id data = fp.Parser(fp.COMMA_DL, d_set.data_path, fp.TP_TRUE, per=.25) target_input = data.convert_file() shuffle(target_input) target_input = data.limit_size(target_input) # limiting size of datasets for sanity percents = [0.1, 0.2, 0.3] for alg in algs: results = [] train_time = 0 alg_id = alg.alg_id evstring = '{}()'.format(alg.alg_path) for percent in percents: shuffle(target_input) train_data, test_data = data.partition(target_input, per=percent) X_train, y_train = data.split_last_column(train_data) X_test, y_test = data.split_last_column(test_data) sk = skh.SkHandler(X_train, y_train, X_test, y_test) print('{} evaluated at {} percent'.format(evstring, str(percent))) try: durr, acc = eval(evstring) train_time += durr results.append(acc) except Exception as ex: print("Could not train dataset {} with method {}: {}".format(d_set.data_path, alg.alg_path, ex)) durr, acc = [float('inf'), 0] results.append(acc) results.append(train_time) repo.add_curve(data_id, alg_id, results, self.session)
def populate_data_all(self): all_dict = {'className': 'DatasetAll', 'tableName': 'all_data'} filelist = self.get_allowed_files() for dpath, filename in filelist: print("Adding set {} to all_sets at {}".format(filename, dpath)) data = fp.Parser(fp.LC_SINGLE, dpath, False, .25, ',') target_input = data.convert_file() target_input = data.limit_size(target_input) # limiting size of datasets for sanity try: repo.add_dset(all_dict['className'], all_dict['tableName'], filename, dpath, target_input, self.session) except Exception as ex: print("Exception occured whilst trying to add dataset: {}".format(ex))
def populate_runs_all(self): """Populate runs_all database table with a run of every dataset with every algorithm""" import sk_handler as skh from random import shuffle try: d_sets = self.session.query(repo.DatasetAll).all() except AttributeError: print('Repo metabases likely not defined, defining now') repo.defineMeta() d_sets = self.session.query(repo.DatasetAll).all() algs = self.session.query(repo.Algorithm).all() for d_set in d_sets: print("Analyzing dataset: {}".format(d_set.data_name)) data_id = d_set.data_id data = fp.Parser(fp.COMMA_DL, d_set.data_path, fp.TP_TRUE, per=.25) target_input = data.convert_file() shuffle(target_input) # keep commented while debugging target_input = data.limit_size(target_input) # limiting size of datasets for sanity train_data, test_data = data.partition(target_input) X_train, y_train = data.split_last_column(train_data) X_test, y_test = data.split_last_column(test_data) sk = skh.SkHandler(X_train, y_train, X_test, y_test) for alg in algs: alg_id = alg.alg_id evstring = '{}()'.format(alg.alg_path) print(evstring) try: durr, acc = eval(evstring) except Exception as ex: print("Could not train dataset {} with method {}: {}".format(d_set.data_path, alg.alg_path, ex)) durr, acc = [float('inf'), 0] pdb.set_trace() repo.add_run(data_id, alg_id, durr, acc, self.session)
import fparser import misc as appmisc import sys # make sure this file is being run directly if __name__ == '__main__': if len(sys.argv) == 1: appmisc.usage(sys.argv[0]) exit(1) else: file = appmisc.file_open(sys.argv[1]) if (file != None): appparser = fparser.Parser(file) appparser.parse() appmisc.file_close(file) else: appmisc.print_err("{}: no such file or directory".format(sys.argv[1])) exit(1) pass