Ejemplo n.º 1
0
def tolerance_pass(line, headlist, yaml_commands):
    lineList = line.rstrip("\n").split("\t")
    tiering_cmds = yaml_commands[yaml_keys.kModules][yaml_keys.kTiering]
    tolerance_zscore_colHeaders = yaml_utils.convertColumns(tiering_cmds[yaml_keys.kTToleranceZScoreCols], yaml_commands)
    tolerance_zscore_cutoff = tiering_cmds[yaml_keys.kTToleranceZScoreCutoff]
    
    for zscore_col_header in tolerance_zscore_colHeaders:
        zscores = lineList[headlist.index(zscore_col_header)].split(yaml_utils.get_dataset_defaults(yaml_commands)[yaml_keys.kDMultimatchDelimiter]) # our delimiter
        for zscore in zscores:
            if(zscore != '' and float(zscore) > float(tolerance_zscore_cutoff)):
                return True
    #else
    return False
Ejemplo n.º 2
0
def is_rare(templine, freq_thresh, bp_indexlist, yaml_commands):
    #debug
#     print 'bp_indexlist: ' + str(bp_indexlist)
    multimatch_delimiter = yaml_utils.get_dataset_defaults(yaml_commands)[yaml_keys.kDMultimatchDelimiter]
    templinelist = templine.split("\t")
    rare_flag = 1 # rare
    for i in bp_indexlist:
        if templinelist[i] != "":
            templinelistElts = templinelist[i].split(multimatch_delimiter)
            # should throw an exception if templinelist[i] isn't a float
            #debug
#             print 'templinelist[{i}]: '.format(i=i) + str(float(templinelist[i]))
#             print 'freq thresh: ' + str(float(freq_thresh))
            for elt in templinelistElts:
                if(float(elt) > float(freq_thresh)):
                    rare_flag = 0 # not rare
    return rare_flag
Ejemplo n.º 3
0
def tolerance_pass(line, headlist, yaml_commands):
    lineList = line.rstrip("\n").split("\t")
    tiering_cmds = yaml_commands[yaml_keys.kModules][yaml_keys.kTiering]
    tolerance_zscore_colHeaders = yaml_utils.convertColumns(
        tiering_cmds[yaml_keys.kTToleranceZScoreCols], yaml_commands)
    tolerance_zscore_cutoff = tiering_cmds[yaml_keys.kTToleranceZScoreCutoff]

    for zscore_col_header in tolerance_zscore_colHeaders:
        zscores = lineList[headlist.index(zscore_col_header)].split(
            yaml_utils.get_dataset_defaults(yaml_commands)[
                yaml_keys.kDMultimatchDelimiter])  # our delimiter
        for zscore in zscores:
            if (zscore != ''
                    and float(zscore) > float(tolerance_zscore_cutoff)):
                return True
    #else
    return False
Ejemplo n.º 4
0
def is_rare(templine, freq_thresh, bp_indexlist, yaml_commands):
    #debug
    #     print 'bp_indexlist: ' + str(bp_indexlist)
    multimatch_delimiter = yaml_utils.get_dataset_defaults(yaml_commands)[
        yaml_keys.kDMultimatchDelimiter]
    templinelist = templine.split("\t")
    rare_flag = 1  # rare
    for i in bp_indexlist:
        if templinelist[i] != "":
            templinelistElts = templinelist[i].split(multimatch_delimiter)
            # should throw an exception if templinelist[i] isn't a float
            #debug
            #             print 'templinelist[{i}]: '.format(i=i) + str(float(templinelist[i]))
            #             print 'freq thresh: ' + str(float(freq_thresh))
            for elt in templinelistElts:
                if (float(elt) > float(freq_thresh)):
                    rare_flag = 0  # not rare
    return rare_flag
Ejemplo n.º 5
0
def is_rare(templine, freq_thresh, bp_indexlist, yaml_commands):
    logger = logging.getLogger(__name__)
    logger.debug('bp_indexlist: ' + str(bp_indexlist))
    multimatch_delimiter = yaml_utils.get_dataset_defaults(yaml_commands)[
        yaml_keys.kDMultimatchDelimiter]
    templinelist = templine.split("\t")
    rare_flag = 0  # not rare
    #note the logic has been changed so that we only keep a variant if there is proof that it is rare
    for i in bp_indexlist:
        if templinelist[i] != "":
            templinelistElts = templinelist[i].split(multimatch_delimiter)
            # should throw an exception if templinelist[i] isn't a float
            logger.debug('templinelist[{i}]: '.format(i=i) +
                         str(float(templinelist[i])))
            logger.debug('freq thresh: ' + str(float(freq_thresh)))
            for elt in templinelistElts:
                if (float(elt) < float(freq_thresh)):
                    rare_flag = 1  # rare
    return rare_flag
Ejemplo n.º 6
0
    parser.add_argument('--tab_file', help='.tab file to be expanded')
    parser.add_argument(
        '--out_suffix',
        default='.tsv',
        help=
        'suffix for output file (stored in same location as input file), e.g. ".tsv"'
    )
    parser.add_argument('--config_modules',
                        help='path to modules configuration file (YAML)')
    parser.add_argument('--config_datasets',
                        help='path to datasets configuration file (YAML)')

    args = parser.parse_args()

    config_yaml = yaml_utils.parse_yaml_input_files(args.config_datasets,
                                                    args.config_modules)
    modules_yaml = config_yaml[yaml_keys.kModules]
    #     modules_yaml = yaml_utils.parse_yaml(args.config_modules)
    #     datasets_yaml = yaml_utils.parse_yaml(args.config_datasets)

    bed_delimiter = modules_yaml[yaml_keys.kAnnotation][
        yaml_keys.kABedInternalDelimiter]
    bed_multimatch_internal_delimiter = modules_yaml[yaml_keys.kAnnotation][
        yaml_keys.kABedMultimatchInternalDelimiter]
    # for now, just using the default delimiter. Later, check if there's a specific delimiter for a given dataset and use that instead.
    dataset_multimatch_delimiter = yaml_utils.get_dataset_defaults(
        config_yaml)[yaml_keys.kDMultimatchDelimiter]

    expandBED(args.tab_file, bed_multimatch_internal_delimiter, bed_delimiter,
              dataset_multimatch_delimiter, args.out_suffix)
Ejemplo n.º 7
0
            flc = fl.split(bed_delimiter)
            lineContents.extend(flc)
        out_file.write("\t".join(lineContents) + "\n")
    return out_file_path


##### MAIN CODE ####
if __name__ == '__main__':
    # parse commandline args.
    parser = argparse.ArgumentParser(description = 'IntersectBed Expander')
    
    parser.add_argument('--tab_file', help='.tab file to be expanded')
    parser.add_argument('--out_suffix', default='.tsv', help='suffix for output file (stored in same location as input file), e.g. ".tsv"')
    parser.add_argument('--config_modules', help='path to modules configuration file (YAML)')
    parser.add_argument('--config_datasets', help='path to datasets configuration file (YAML)')
    
    args = parser.parse_args()
    
    config_yaml = yaml_utils.parse_yaml_input_files(args.config_datasets, args.config_modules)
    modules_yaml = config_yaml[yaml_keys.kModules]
#     modules_yaml = yaml_utils.parse_yaml(args.config_modules)
#     datasets_yaml = yaml_utils.parse_yaml(args.config_datasets)
    
    bed_delimiter = modules_yaml[yaml_keys.kAnnotation][yaml_keys.kABedInternalDelimiter]
    bed_multimatch_internal_delimiter = modules_yaml[yaml_keys.kAnnotation][yaml_keys.kABedMultimatchInternalDelimiter]
    # for now, just using the default delimiter. Later, check if there's a specific delimiter for a given dataset and use that instead.
    dataset_multimatch_delimiter = yaml_utils.get_dataset_defaults(config_yaml)[yaml_keys.kDMultimatchDelimiter]
    
    expandBED(args.tab_file, bed_multimatch_internal_delimiter, bed_delimiter, dataset_multimatch_delimiter, args.out_suffix)