Esempio n. 1
0
def load(cls, fname=None):
  '''Loads a config file from the specified file name, `fname`.  If `fname` is
  not specified, uses `DEFAULT_PATH`.

  Args:
    fname, str: The file name.

  Throws:
    ParseError: If the config file was malformatted.

  Returns:
    The `Config` object found in fname.
  '''
  fname = fname or DEFAULT_PATH

  with open(fname) as f:
    file_contents = "".join(f.readlines())
  dbg('got', file_contents)

  return Config(json.loads(file_contents))
        "-p",
        "--path",
        required=True,
        help="Path to the directory containing fasta and binding.txt files")
    ap.add_argument("-a", "--allelle", required=True, help="Name of allelle")
    ap.add_argument("-l",
                    "--alignment-file-name",
                    required=True,
                    help="Name of alignment file")
    ap.add_argument("-b",
                    "--binding-file-name",
                    required=True,
                    help="Name of binding file")
    args = vars(ap.parse_args())

    dbg('args["path"]')
    dbg(args["path"])

    dbg('args["allelle"]')
    dbg(args["allelle"])

    data_path_align = args["path"] + args["alignment_file_name"]
    data_path_bind = args["path"] + args["binding_file_name"]

    dbg('data_path_align')
    dbg(data_path_align)

    dbg('data_path_bind')
    dbg(data_path_bind)

    seq_array = pfun.read_seq_data(data_path=data_path_align)
        "-p",
        "--path",
        required=True,
        help="Path to the directory containing fasta and binding.txt files")
    ap.add_argument("-a", "--allelle", required=True, help="Name of allelle")
    ap.add_argument("-l",
                    "--alignment-file-name",
                    required=True,
                    help="Name of alignment file")
    ap.add_argument("-b",
                    "--binding-file-name",
                    required=True,
                    help="Name of binding file")
    args = vars(ap.parse_args())

    dbg('args["path"]')
    dbg(args["path"])

    dbg('args["allelle"]')
    dbg(args["allelle"])

    data_path_align = args["path"] + args["alignment_file_name"]
    data_path_bind = args["path"] + args["binding_file_name"]
    model_prefix_str = 'TEST_catboost_model.allelle_._' + args["allelle"]

    dbg('data_path_align')
    dbg(data_path_align)

    dbg('data_path_bind')
    dbg(data_path_bind)
ap = argparse.ArgumentParser()
ap.add_argument(
    "-pi",
    "--path-input",
    required=True,
    help=
    "Path to the directory containing face images and corresponding gan input vectors"
)
ap.add_argument(
    "-po",
    "--path-output",
    required=True,
    help="Path to the directory where numpy array files will be saved")
args = vars(ap.parse_args())

dbg('args["path_input"]')
dbg(args["path_input"])

fpi = args["path_input"]

dbg('args["path_output"]')
dbg(args["path_output"])

fpo = args["path_output"]
""" walk though path and get images"""

path_img = []
for (dirpath, dirnames, filenames) in os.walk(fpi):
    for x in filenames:
        tmp_ext = os.path.basename(x).split('.')[-1]
        tmp_ext_lower = tmp_ext.lower()
Esempio n. 5
0
    #fig.savefig(var_name+'.png', bbox_inches='tight')


# define an empty list
auc_tmp = []
ppvn_tmp = []

auc_comp_list = []
ppvn_comp_list = []

folders = []

for (root, dirs, files
     ) in os.walk('../Reda-BG/MHC-I/generation-1/competition/MHCflurry/HPV/'):
    dbg(root)
    dbg(dirs)
    dbg(files)
    dbg('--------------------------------')
    break

for item in dirs:
    dbg('folder name')
    dbg(item)
    tmp_folder = './' + item + '/'
    dbg('tmp_folder')
    dbg(tmp_folder)
    for (subroot, subdirs, subfiles) in os.walk(tmp_folder):
        dbg('subfiles')
        dbg(subfiles)
        if ('auc.txt' in subfiles) and ('ppvn.txt' in subfiles):
Esempio n. 6
0
def search_ct_gov(search_term='sars'):

    max_res_pull = 100
    # preprocess search string
    # -- Replace space with '+'
    search_term = search_term.replace(" ", "+")
    # -- Replace & with '%26'
    search_term = search_term.replace("&", "%26")

    # Build Search URL
    # -- example: search_url = 'https://clinicaltrials.gov/api/query/study_fields?fmt=CSV&expr=heart+attack&fields=NCTId,Condition,InterventionName'
    search_url = 'https://clinicaltrials.gov/api/query/study_fields?fmt=CSV&expr=' + search_term + '&fields=NCTId,Condition,InterventionName,InterventionType,OfficialTitle'

    # Execute get request
    response = requests.get(search_url)
    tmp_search_res = response.text
    dbg(response.text)

    # Splitting on newline delimiter
    res_list = tmp_search_res.splitlines()
    dbg(res_list)

    # This code is written for clinicaltrials.gov API version "1.01.02"
    # Ensure that implemented API verson has not changed
    api_ver_str = res_list[0].replace('\"', '')
    api_ver_num_str = api_ver_str.split(':')[1].strip()

    dbg(api_ver_num_str)

    api_ver_check = ("1.01.02" == api_ver_num_str)
    dbg(api_ver_check)

    # Extract total number of trials available for search query
    num_trials_res_str = res_list[4].replace('\"', '')
    num_trials_res_value = int(num_trials_res_str.split(':')[1].strip())

    dbg(num_trials_res_value)

    # Calculate number of get requests needed to grab all results
    # -- max_res_pull is the max number of results that can be requested in each get request
    # -- this limit is set by clinicaltrials.gov
    total_num_pulls = (num_trials_res_value // max_res_pull) + 1

    dbg(total_num_pulls)

    all_search_results_list = []
    all_search_results_str = ''

    all_search_results_list.append(res_list[10].replace('\"', '').split(','))
    all_search_results_str = all_search_results_str.join(res_list[10]) + '\n'

    dbg(all_search_results_list)
    dbg(all_search_results_str)

    #&min_rnk=1198&max_rnk=1249&fmt=xml

    for pull_idx in range(total_num_pulls):
        min_rank = pull_idx * max_res_pull + 1
        max_rank = (pull_idx + 1) * max_res_pull + 1

        search_url = 'https://clinicaltrials.gov/api/query/study_fields?fmt=CSV&expr=' + search_term + '&fields=NCTId,Condition,InterventionName&min_rnk=' + str(
            min_rank) + '&max_rnk=' + str(max_rank)

        response = requests.get(search_url)
        #tmp_search_res = response.text.replace('\"', '')
        tmp_search_res = response.text
        # Splitting on newline delimiter
        res_list = tmp_search_res.splitlines()
        res_list_comma_split = [x.split(',') for x in res_list]
        dbg(res_list_comma_split[11:])

        all_search_results_list.extend(res_list_comma_split[11:])
        temp_results_str = '\n'.join(str(x) for x in res_list[11:])
        all_search_results_str = all_search_results_str + temp_results_str + '\n'

    dbg(all_search_results_list)

    all_search_results_str_IO = StringIO(all_search_results_str)
    df_search_results = pd.read_csv(all_search_results_str_IO)
    dbg(df_search_results.head())
    dbg(df_search_results.tail())
    dbg(df_search_results.columns)

    return df_search_results
Esempio n. 7
0
def load_csv_ct_data(trial_file_list):
    tot_df = pd.DataFrame()
    for count, fn in enumerate(trial_file_list):
        dbg('fn')
        dbg(fn)

        tmp_df = pd.read_csv(fn, encoding='ANSI')
        tmp_df['class'] = count
        dbg(tmp_df.head())

        tot_df = pd.concat([tot_df, tmp_df.head(n=50)],
                           ignore_index=True,
                           sort=False)
        #tot_df = pd.merge(tot_df, tmp_df, on='NCT Number')

    dbg(tot_df)
    dbg(tot_df.head())
    dbg(tot_df.tail())

    return tot_df