def load(cls, fname=None): '''Loads a config file from the specified file name, `fname`. If `fname` is not specified, uses `DEFAULT_PATH`. Args: fname, str: The file name. Throws: ParseError: If the config file was malformatted. Returns: The `Config` object found in fname. ''' fname = fname or DEFAULT_PATH with open(fname) as f: file_contents = "".join(f.readlines()) dbg('got', file_contents) return Config(json.loads(file_contents))
"-p", "--path", required=True, help="Path to the directory containing fasta and binding.txt files") ap.add_argument("-a", "--allelle", required=True, help="Name of allelle") ap.add_argument("-l", "--alignment-file-name", required=True, help="Name of alignment file") ap.add_argument("-b", "--binding-file-name", required=True, help="Name of binding file") args = vars(ap.parse_args()) dbg('args["path"]') dbg(args["path"]) dbg('args["allelle"]') dbg(args["allelle"]) data_path_align = args["path"] + args["alignment_file_name"] data_path_bind = args["path"] + args["binding_file_name"] dbg('data_path_align') dbg(data_path_align) dbg('data_path_bind') dbg(data_path_bind) seq_array = pfun.read_seq_data(data_path=data_path_align)
"-p", "--path", required=True, help="Path to the directory containing fasta and binding.txt files") ap.add_argument("-a", "--allelle", required=True, help="Name of allelle") ap.add_argument("-l", "--alignment-file-name", required=True, help="Name of alignment file") ap.add_argument("-b", "--binding-file-name", required=True, help="Name of binding file") args = vars(ap.parse_args()) dbg('args["path"]') dbg(args["path"]) dbg('args["allelle"]') dbg(args["allelle"]) data_path_align = args["path"] + args["alignment_file_name"] data_path_bind = args["path"] + args["binding_file_name"] model_prefix_str = 'TEST_catboost_model.allelle_._' + args["allelle"] dbg('data_path_align') dbg(data_path_align) dbg('data_path_bind') dbg(data_path_bind)
ap = argparse.ArgumentParser() ap.add_argument( "-pi", "--path-input", required=True, help= "Path to the directory containing face images and corresponding gan input vectors" ) ap.add_argument( "-po", "--path-output", required=True, help="Path to the directory where numpy array files will be saved") args = vars(ap.parse_args()) dbg('args["path_input"]') dbg(args["path_input"]) fpi = args["path_input"] dbg('args["path_output"]') dbg(args["path_output"]) fpo = args["path_output"] """ walk though path and get images""" path_img = [] for (dirpath, dirnames, filenames) in os.walk(fpi): for x in filenames: tmp_ext = os.path.basename(x).split('.')[-1] tmp_ext_lower = tmp_ext.lower()
#fig.savefig(var_name+'.png', bbox_inches='tight') # define an empty list auc_tmp = [] ppvn_tmp = [] auc_comp_list = [] ppvn_comp_list = [] folders = [] for (root, dirs, files ) in os.walk('../Reda-BG/MHC-I/generation-1/competition/MHCflurry/HPV/'): dbg(root) dbg(dirs) dbg(files) dbg('--------------------------------') break for item in dirs: dbg('folder name') dbg(item) tmp_folder = './' + item + '/' dbg('tmp_folder') dbg(tmp_folder) for (subroot, subdirs, subfiles) in os.walk(tmp_folder): dbg('subfiles') dbg(subfiles) if ('auc.txt' in subfiles) and ('ppvn.txt' in subfiles):
def search_ct_gov(search_term='sars'): max_res_pull = 100 # preprocess search string # -- Replace space with '+' search_term = search_term.replace(" ", "+") # -- Replace & with '%26' search_term = search_term.replace("&", "%26") # Build Search URL # -- example: search_url = 'https://clinicaltrials.gov/api/query/study_fields?fmt=CSV&expr=heart+attack&fields=NCTId,Condition,InterventionName' search_url = 'https://clinicaltrials.gov/api/query/study_fields?fmt=CSV&expr=' + search_term + '&fields=NCTId,Condition,InterventionName,InterventionType,OfficialTitle' # Execute get request response = requests.get(search_url) tmp_search_res = response.text dbg(response.text) # Splitting on newline delimiter res_list = tmp_search_res.splitlines() dbg(res_list) # This code is written for clinicaltrials.gov API version "1.01.02" # Ensure that implemented API verson has not changed api_ver_str = res_list[0].replace('\"', '') api_ver_num_str = api_ver_str.split(':')[1].strip() dbg(api_ver_num_str) api_ver_check = ("1.01.02" == api_ver_num_str) dbg(api_ver_check) # Extract total number of trials available for search query num_trials_res_str = res_list[4].replace('\"', '') num_trials_res_value = int(num_trials_res_str.split(':')[1].strip()) dbg(num_trials_res_value) # Calculate number of get requests needed to grab all results # -- max_res_pull is the max number of results that can be requested in each get request # -- this limit is set by clinicaltrials.gov total_num_pulls = (num_trials_res_value // max_res_pull) + 1 dbg(total_num_pulls) all_search_results_list = [] all_search_results_str = '' all_search_results_list.append(res_list[10].replace('\"', '').split(',')) all_search_results_str = all_search_results_str.join(res_list[10]) + '\n' dbg(all_search_results_list) dbg(all_search_results_str) #&min_rnk=1198&max_rnk=1249&fmt=xml for pull_idx in range(total_num_pulls): min_rank = pull_idx * max_res_pull + 1 max_rank = (pull_idx + 1) * max_res_pull + 1 search_url = 'https://clinicaltrials.gov/api/query/study_fields?fmt=CSV&expr=' + search_term + '&fields=NCTId,Condition,InterventionName&min_rnk=' + str( min_rank) + '&max_rnk=' + str(max_rank) response = requests.get(search_url) #tmp_search_res = response.text.replace('\"', '') tmp_search_res = response.text # Splitting on newline delimiter res_list = tmp_search_res.splitlines() res_list_comma_split = [x.split(',') for x in res_list] dbg(res_list_comma_split[11:]) all_search_results_list.extend(res_list_comma_split[11:]) temp_results_str = '\n'.join(str(x) for x in res_list[11:]) all_search_results_str = all_search_results_str + temp_results_str + '\n' dbg(all_search_results_list) all_search_results_str_IO = StringIO(all_search_results_str) df_search_results = pd.read_csv(all_search_results_str_IO) dbg(df_search_results.head()) dbg(df_search_results.tail()) dbg(df_search_results.columns) return df_search_results
def load_csv_ct_data(trial_file_list): tot_df = pd.DataFrame() for count, fn in enumerate(trial_file_list): dbg('fn') dbg(fn) tmp_df = pd.read_csv(fn, encoding='ANSI') tmp_df['class'] = count dbg(tmp_df.head()) tot_df = pd.concat([tot_df, tmp_df.head(n=50)], ignore_index=True, sort=False) #tot_df = pd.merge(tot_df, tmp_df, on='NCT Number') dbg(tot_df) dbg(tot_df.head()) dbg(tot_df.tail()) return tot_df