def ReadFiles(options): # set variables ensemble_paths = options.ensemble_paths # read ensembles into ensemble lookup {'Ensemble_N' : [query_name_list]} ensemble_lookup = {} for ensemble_path in ensemble_paths: try: ensemble_file = open(ensemble_path, 'r') except IOError: print("\nUnable to open ensemble_list: {l}\n".format( l=ensemble_path)) return 1 ensemble_queries = [ query.strip() for query in ensemble_file.read().split(',') ] ensemble_file.close() if len(ensemble_queries) == 0 or '' in ensemble_queries: print("\n{l} is empty\n".format(l=ensemble_path)) return 1 key = os.path.basename(ensemble_path) ensemble_lookup[key] = ensemble_queries # Run consistency checks on the input csv uniq = [] for ensemble in ensemble_lookup.keys(): for unique_query in [ query for query in ensemble_lookup[ensemble] if query not in uniq ]: uniq.append(unique_query) # read input csv inputpath = os.path.abspath(options.inputpath) print('') print(" Reading input file ...") print('') molecules = csv_interface.read_csv(inputpath, options, uniq) if not molecules: print("\n '%s' was unable to be parsed\n") sys.exit(1) return molecules, ensemble_lookup
def run(itf): """ Run optimize functions. """ if not itf: return 1 # access user input options = SplitInput(itf) # read input inputpath = os.path.abspath(options.inputpath) print(" Reading input file ...") molecules = csv_interface.read_csv(inputpath, options) if not molecules: print("\n '{flag}' was unable to be parsed\n".format( flag=os.path.basename(options.inputpath))) sys.exit(1) # determine the sort order & ensemble_size #sort_order = classification.get_sort_order(molecules) sort_order = 'asc' ensemble_size = options.ensemble_size # loop over all ensembles # temp 2/3/15 append to auc_list ef_list & write it out for later histogram construction auc_list = [] ef_list = [] for size in [x + 1 for x in range(ensemble_size)]: auc, ef = optimizor(molecules, sort_order, size, options) auc_list += auc ef_list += ef # temp 2/9/15 write auc_list & ef_list out to files for subsequent post-processing f = open('auc_histogram.csv', 'w') for value in auc_list: f.write('%f\n' % value) #f.write('%f, %s\n' % (value[0], value[1])) f.close() f = open('ef_histogram.csv', 'w') for value in ef_list: f.write('%f\n' % value) f.close()
def run(itf): """ Run optimize functions. """ if not itf: return 1 # access user input options = SplitInput(itf) # read input inputpath = os.path.abspath(options.inputpath) print(" Reading input file ...") molecules = csv_interface.read_csv(inputpath, options) if not molecules: print("\n '{flag}' was unable to be parsed\n".format(flag=os.path.basename(options.inputpath))) sys.exit(1) # determine the sort order & ensemble_size #sort_order = classification.get_sort_order(molecules) sort_order = 'asc' ensemble_size = options.ensemble_size # loop over all ensembles # temp 2/3/15 append to auc_list ef_list & write it out for later histogram construction auc_list = [] ef_list = [] for size in [x + 1 for x in range(ensemble_size)]: auc, ef = optimizor(molecules, sort_order, size, options) auc_list += auc ef_list += ef # temp 2/9/15 write auc_list & ef_list out to files for subsequent post-processing f = open('auc_histogram.csv', 'w') for value in auc_list: f.write('%f\n' % value) #f.write('%f, %s\n' % (value[0], value[1])) f.close() f = open('ef_histogram.csv', 'w') for value in ef_list: f.write('%f\n' % value) f.close()
def run(itf): """ run approximate functions :param itf: :return: """ if not itf: return 1 # access user input options = SplitInput(itf) # read input print(" Reading input file ...") molecules = csv_interface.read_csv(os.path.abspath(options.inputpath), options) if not molecules: print("\n '{f} was unable to be parsed\n".format(f=os.path.basename(options.inputpath))) sys.exit(1) approximator(molecules, options)
def ReadFiles(options): # set variables ensemble_paths = options.ensemble_paths # read ensembles into ensemble lookup {'Ensemble_N' : [query_name_list]} ensemble_lookup = {} for ensemble_path in ensemble_paths: try: ensemble_file = open(ensemble_path, 'r') except IOError: print("\nUnable to open ensemble_list: {l}\n".format(l=ensemble_path)) return 1 ensemble_queries = [query.strip() for query in ensemble_file.read().split(',')] ensemble_file.close() if len(ensemble_queries) == 0 or '' in ensemble_queries: print("\n{l} is empty\n".format(l=ensemble_path)) return 1 key = os.path.basename(ensemble_path) ensemble_lookup[key] = ensemble_queries # Run consistency checks on the input csv uniq = [] for ensemble in ensemble_lookup.keys(): for unique_query in [query for query in ensemble_lookup[ensemble] if query not in uniq]: uniq.append(unique_query) # read input csv inputpath = os.path.abspath(options.inputpath) print('') print(" Reading input file ...") print('') molecules = csv_interface.read_csv(inputpath, options, uniq) if not molecules: print("\n '%s' was unable to be parsed\n") sys.exit(1) return molecules, ensemble_lookup