def compute_score(pLoopFile, pProteinFile, pMaximumNumberOfLoops, pResolution, pChrPrefixLoops): with open(pLoopFile, 'r') as file: lines = file.readlines() if len(lines) == 0: return 1 outfile_statistics = NamedTemporaryFile() if pChrPrefixLoops is None: pChrPrefixLoops = '' else: pChrPrefixLoops = '-cl ' + pChrPrefixLoops args = "--data {} --validationData {} {} --resolution {} --outFileName {} --method loops".format(pLoopFile, pProteinFile, pChrPrefixLoops, pResolution, outfile_statistics.name).split() hicValidateLocations.main(args) data_dict = {} with open(outfile_statistics.name + '_statistics', 'r') as file: for line in file: if line.startswith('#'): continue line_split = line.split(':') data_dict[line_split[0]] = float(line_split[1]) data_dict['Matched Loops'] = int(data_dict['Matched Loops']) if data_dict['Matched Loops'] > float(pMaximumNumberOfLoops): return 1 - ((data_dict['Loops match protein'] * 2 + 1.0) / 3) if pMaximumNumberOfLoops > 500 and data_dict['Matched Loops'] < 500: return 1 - (data_dict['Matched Loops'] / float(pMaximumNumberOfLoops)) return 1 - ((data_dict['Loops match protein'] * 2 + (data_dict['Matched Loops'] / float(pMaximumNumberOfLoops) / 2)) / 3)
def test_loop_narrow_peak(): outfile = NamedTemporaryFile(suffix='out', delete=True) outfile.close() args = "--data {} --protein {} --method {} --outFileName {} -r {} --addChrPrefixLoops".format( ROOT + 'loops_1.bedgraph', ROOT + 'GSM935376_hg19_Gm12878_Smc3.narrowPeak', 'loops', outfile.name, 10000).split() hicValidateLocations.main(args) assert are_files_equal(ROOT + 'overlap_smc3_matched_locations', outfile.name + '_matched_locations') assert are_files_equal(ROOT + 'overlap_smc3_statistics', outfile.name + '_statistics', skip=3)
def test_loop_broad_peak(): outfile = NamedTemporaryFile(suffix='out', delete=True) outfile.close() args = "--data {} --protein {} --method {} --outFileName {} -r {}".format( ROOT + 'loops_1.bedgraph', ROOT + 'GSM733752_hg19_ctcf_GM12878.broadPeak', 'loops', outfile.name, 10000).split() hicValidateLocations.main(args) # print(outfile.name + '_matched_locations') assert are_files_equal(ROOT + 'overlap_ctcf_matched_locations', outfile.name + '_matched_locations') assert are_files_equal(ROOT + 'overlap_ctcf_statistics', outfile.name + '_statistics', skip=3)