def tolerance_pass(line, headlist, yaml_commands): lineList = line.rstrip("\n").split("\t") tiering_cmds = yaml_commands[yaml_keys.kModules][yaml_keys.kTiering] tolerance_zscore_colHeaders = yaml_utils.convertColumns(tiering_cmds[yaml_keys.kTToleranceZScoreCols], yaml_commands) tolerance_zscore_cutoff = tiering_cmds[yaml_keys.kTToleranceZScoreCutoff] for zscore_col_header in tolerance_zscore_colHeaders: zscores = lineList[headlist.index(zscore_col_header)].split(yaml_utils.get_dataset_defaults(yaml_commands)[yaml_keys.kDMultimatchDelimiter]) # our delimiter for zscore in zscores: if(zscore != '' and float(zscore) > float(tolerance_zscore_cutoff)): return True #else return False
def is_rare(templine, freq_thresh, bp_indexlist, yaml_commands): #debug # print 'bp_indexlist: ' + str(bp_indexlist) multimatch_delimiter = yaml_utils.get_dataset_defaults(yaml_commands)[yaml_keys.kDMultimatchDelimiter] templinelist = templine.split("\t") rare_flag = 1 # rare for i in bp_indexlist: if templinelist[i] != "": templinelistElts = templinelist[i].split(multimatch_delimiter) # should throw an exception if templinelist[i] isn't a float #debug # print 'templinelist[{i}]: '.format(i=i) + str(float(templinelist[i])) # print 'freq thresh: ' + str(float(freq_thresh)) for elt in templinelistElts: if(float(elt) > float(freq_thresh)): rare_flag = 0 # not rare return rare_flag
def tolerance_pass(line, headlist, yaml_commands): lineList = line.rstrip("\n").split("\t") tiering_cmds = yaml_commands[yaml_keys.kModules][yaml_keys.kTiering] tolerance_zscore_colHeaders = yaml_utils.convertColumns( tiering_cmds[yaml_keys.kTToleranceZScoreCols], yaml_commands) tolerance_zscore_cutoff = tiering_cmds[yaml_keys.kTToleranceZScoreCutoff] for zscore_col_header in tolerance_zscore_colHeaders: zscores = lineList[headlist.index(zscore_col_header)].split( yaml_utils.get_dataset_defaults(yaml_commands)[ yaml_keys.kDMultimatchDelimiter]) # our delimiter for zscore in zscores: if (zscore != '' and float(zscore) > float(tolerance_zscore_cutoff)): return True #else return False
def is_rare(templine, freq_thresh, bp_indexlist, yaml_commands): #debug # print 'bp_indexlist: ' + str(bp_indexlist) multimatch_delimiter = yaml_utils.get_dataset_defaults(yaml_commands)[ yaml_keys.kDMultimatchDelimiter] templinelist = templine.split("\t") rare_flag = 1 # rare for i in bp_indexlist: if templinelist[i] != "": templinelistElts = templinelist[i].split(multimatch_delimiter) # should throw an exception if templinelist[i] isn't a float #debug # print 'templinelist[{i}]: '.format(i=i) + str(float(templinelist[i])) # print 'freq thresh: ' + str(float(freq_thresh)) for elt in templinelistElts: if (float(elt) > float(freq_thresh)): rare_flag = 0 # not rare return rare_flag
def is_rare(templine, freq_thresh, bp_indexlist, yaml_commands): logger = logging.getLogger(__name__) logger.debug('bp_indexlist: ' + str(bp_indexlist)) multimatch_delimiter = yaml_utils.get_dataset_defaults(yaml_commands)[ yaml_keys.kDMultimatchDelimiter] templinelist = templine.split("\t") rare_flag = 0 # not rare #note the logic has been changed so that we only keep a variant if there is proof that it is rare for i in bp_indexlist: if templinelist[i] != "": templinelistElts = templinelist[i].split(multimatch_delimiter) # should throw an exception if templinelist[i] isn't a float logger.debug('templinelist[{i}]: '.format(i=i) + str(float(templinelist[i]))) logger.debug('freq thresh: ' + str(float(freq_thresh))) for elt in templinelistElts: if (float(elt) < float(freq_thresh)): rare_flag = 1 # rare return rare_flag
parser.add_argument('--tab_file', help='.tab file to be expanded') parser.add_argument( '--out_suffix', default='.tsv', help= 'suffix for output file (stored in same location as input file), e.g. ".tsv"' ) parser.add_argument('--config_modules', help='path to modules configuration file (YAML)') parser.add_argument('--config_datasets', help='path to datasets configuration file (YAML)') args = parser.parse_args() config_yaml = yaml_utils.parse_yaml_input_files(args.config_datasets, args.config_modules) modules_yaml = config_yaml[yaml_keys.kModules] # modules_yaml = yaml_utils.parse_yaml(args.config_modules) # datasets_yaml = yaml_utils.parse_yaml(args.config_datasets) bed_delimiter = modules_yaml[yaml_keys.kAnnotation][ yaml_keys.kABedInternalDelimiter] bed_multimatch_internal_delimiter = modules_yaml[yaml_keys.kAnnotation][ yaml_keys.kABedMultimatchInternalDelimiter] # for now, just using the default delimiter. Later, check if there's a specific delimiter for a given dataset and use that instead. dataset_multimatch_delimiter = yaml_utils.get_dataset_defaults( config_yaml)[yaml_keys.kDMultimatchDelimiter] expandBED(args.tab_file, bed_multimatch_internal_delimiter, bed_delimiter, dataset_multimatch_delimiter, args.out_suffix)
flc = fl.split(bed_delimiter) lineContents.extend(flc) out_file.write("\t".join(lineContents) + "\n") return out_file_path ##### MAIN CODE #### if __name__ == '__main__': # parse commandline args. parser = argparse.ArgumentParser(description = 'IntersectBed Expander') parser.add_argument('--tab_file', help='.tab file to be expanded') parser.add_argument('--out_suffix', default='.tsv', help='suffix for output file (stored in same location as input file), e.g. ".tsv"') parser.add_argument('--config_modules', help='path to modules configuration file (YAML)') parser.add_argument('--config_datasets', help='path to datasets configuration file (YAML)') args = parser.parse_args() config_yaml = yaml_utils.parse_yaml_input_files(args.config_datasets, args.config_modules) modules_yaml = config_yaml[yaml_keys.kModules] # modules_yaml = yaml_utils.parse_yaml(args.config_modules) # datasets_yaml = yaml_utils.parse_yaml(args.config_datasets) bed_delimiter = modules_yaml[yaml_keys.kAnnotation][yaml_keys.kABedInternalDelimiter] bed_multimatch_internal_delimiter = modules_yaml[yaml_keys.kAnnotation][yaml_keys.kABedMultimatchInternalDelimiter] # for now, just using the default delimiter. Later, check if there's a specific delimiter for a given dataset and use that instead. dataset_multimatch_delimiter = yaml_utils.get_dataset_defaults(config_yaml)[yaml_keys.kDMultimatchDelimiter] expandBED(args.tab_file, bed_multimatch_internal_delimiter, bed_delimiter, dataset_multimatch_delimiter, args.out_suffix)