Esempio n. 1
0
def compute_score(pLoopFile, pProteinFile, pMaximumNumberOfLoops, pResolution, pChrPrefixLoops):
    with open(pLoopFile, 'r') as file:
        lines = file.readlines()
        if len(lines) == 0:
            return 1
    outfile_statistics = NamedTemporaryFile()
    if pChrPrefixLoops is None:
        pChrPrefixLoops = ''
    else:
        pChrPrefixLoops = '-cl ' + pChrPrefixLoops
    args = "--data {} --validationData {} {} --resolution {} --outFileName {} --method loops".format(pLoopFile, pProteinFile, pChrPrefixLoops, pResolution, outfile_statistics.name).split()
    hicValidateLocations.main(args)
    data_dict = {}

    with open(outfile_statistics.name + '_statistics', 'r') as file:
        for line in file:
            if line.startswith('#'):
                continue
            line_split = line.split(':')
            data_dict[line_split[0]] = float(line_split[1])

    data_dict['Matched Loops'] = int(data_dict['Matched Loops'])
    if data_dict['Matched Loops'] > float(pMaximumNumberOfLoops):
        return 1 - ((data_dict['Loops match protein'] * 2 + 1.0) / 3)
    if pMaximumNumberOfLoops > 500 and data_dict['Matched Loops'] < 500:
        return 1 - (data_dict['Matched Loops'] / float(pMaximumNumberOfLoops))
    return 1 - ((data_dict['Loops match protein'] * 2 + (data_dict['Matched Loops'] / float(pMaximumNumberOfLoops) / 2)) / 3)
def test_loop_narrow_peak():
    outfile = NamedTemporaryFile(suffix='out', delete=True)
    outfile.close()

    args = "--data {} --protein {} --method {} --outFileName {} -r {} --addChrPrefixLoops".format(
        ROOT + 'loops_1.bedgraph',
        ROOT + 'GSM935376_hg19_Gm12878_Smc3.narrowPeak', 'loops', outfile.name,
        10000).split()
    hicValidateLocations.main(args)

    assert are_files_equal(ROOT + 'overlap_smc3_matched_locations',
                           outfile.name + '_matched_locations')
    assert are_files_equal(ROOT + 'overlap_smc3_statistics',
                           outfile.name + '_statistics',
                           skip=3)
def test_loop_broad_peak():
    outfile = NamedTemporaryFile(suffix='out', delete=True)
    outfile.close()

    args = "--data {} --protein {} --method {} --outFileName {} -r {}".format(
        ROOT + 'loops_1.bedgraph',
        ROOT + 'GSM733752_hg19_ctcf_GM12878.broadPeak', 'loops', outfile.name,
        10000).split()
    hicValidateLocations.main(args)

    # print(outfile.name + '_matched_locations')
    assert are_files_equal(ROOT + 'overlap_ctcf_matched_locations',
                           outfile.name + '_matched_locations')
    assert are_files_equal(ROOT + 'overlap_ctcf_statistics',
                           outfile.name + '_statistics',
                           skip=3)