def multithreadedJaccardBed(*bedSortedPairs): # bedSortedPairs = jaccardBed() jaccardScores = dict() errorBedSortedPairs = [] try: if (os.stat(bedSortedPairs[0][0]).st_size >= os.stat( bedSortedPairs[0][1]).st_size): print('true') file1 = BedTool(bedSortedPairs[0][0]) file2 = BedTool(bedSortedPairs[0][1]) jaccardResults = BedTool.jaccard(file1, file2) jaccardScore = jaccardResults['jaccard'] # jaccardScore = 0 scoredPair = (file1, file2) jaccardScores[str(scoredPair)] = jaccardScore else: print('yuck') file1 = BedTool(bedSortedPairs[0][1]) file2 = BedTool(bedSortedPairs[0][0]) jaccardResults = BedTool.jaccard(file1, file2) jaccardScore = jaccardResults['jaccard'] # jaccardScore = 0 scoredPair = (file1, file2) jaccardScores[str(scoredPair)] = jaccardScore ''' jaccardResults = BedTool.jaccard(file1, file2) jaccardScore = jaccardResults['jaccard'] scoredPair = (file1, file2) jaccardScores[str(scoredPair)] = jaccardScore ''' except Exception as error: if (os.stat(bedSortedPairs[0][0]).st_size >= os.stat( bedSortedPairs[0][1]).st_size): print('ok!') print('***') print(bedSortedPairs[0][0]) print(bedSortedPairs[0][1]) print('***') else: print('youch!') print('-----') print(bedSortedPairs[0][0]) print(bedSortedPairs[0][1]) print('-----') errorBedSortedPair = [file1, file2] errorBedSortedPairs.append(errorBedSortedPair) print('error') print(error) return jaccardScores, errorBedSortedPairs
def run_jaccard(fileA, fileB, genomefile): """ Running bedtools. Reads in two bedtools approved file types, sorts the files, and calculates a jaccard score. """ a = BedTool(fileA) a = a.sort(g=genomefile) b = BedTool(fileB) b = b.sort(g=genomefile) j = a.jaccard(b, g=genomefile) j["fileA"] = fileA.split("/")[-1] j["fileB"] = fileB.split("/")[-1] keylist = list(j.keys()) keylist.sort() data = [str(j[key]) for key in keylist] return (data, keylist)
def multithreadedJaccardBed(*bedSortedPairs): # bedSortedPairs = jaccardBed() jaccardScores = dict() errorBedSortedPairs = [] try: file1 = BedTool(bedSortedPairs[0][0]) file2 = BedTool(bedSortedPairs[0][1]) jaccardResults = BedTool.jaccard(file1, file2) jaccardScore = jaccardResults['jaccard'] scoredPair = (file1, file2) jaccardScores[str(scoredPair)] = jaccardScore except Exception as error: errorBedSortedPair = [file1, file2] errorBedSortedPairs.append(errorBedSortedPair) # print('error') # print(error) return jaccardScores, errorBedSortedPairs
def sequentialJaccardBed(): sequential_time = time.time() bedSortedPairs = jaccardBed() jaccardScores = [] errorBedSortedPairs = [] for pair in bedSortedPairs: try: file1 = BedTool(pair[0]) file2 = BedTool(pair[1]) jaccardResults = BedTool.jaccard(file1, file2) jaccardScores.append(jaccardResults['jaccard']) except Exception as error: errorBedSortedPair = [file1, file2] errorBedSortedPairs.append(errorBedSortedPair) # print('error') # print(error) print('Sequentially calculating jaccard scores --- %.2f seconds ---' % (time.time() - sequential_time))
cons_bedgraph = BedTool('/vol1/opt/data/hg19.100way.phyloP100way.bg.gz') header_fields = ['#cell.type.1','cell.type.2','type','jaccard','mean.cons'] print '\t'.join(header_fields) for fname1, fname2 in combinations(subset, r=2): ctype1 = get_cell_type(fname1) ctype2 = get_cell_type(fname2) tool1 = BedTool(fname1) tool2 = BedTool(fname2) # jaccard statistic result = tool1.jaccard(tool2, f=0.5, r=False) stat = result['jaccard'] # conservation measurement result1 = tool1.intersect(tool2, v=True) result2 = tool2.intersect(tool1, v=True) cons1 = result1.map(cons_bedgraph, o='mean', c=4) cons2 = result2.map(cons_bedgraph, o='mean', c=4) pbd.set_trace() fields = [ctype1, ctype2, stat] print '\t'.join(map(str, fields))
subset = random.sample(filenames, 10) cons_bedgraph = BedTool('/vol1/opt/data/hg19.100way.phyloP100way.bg.gz') header_fields = ['#cell.type.1', 'cell.type.2', 'type', 'jaccard', 'mean.cons'] print '\t'.join(header_fields) for fname1, fname2 in combinations(subset, r=2): ctype1 = get_cell_type(fname1) ctype2 = get_cell_type(fname2) tool1 = BedTool(fname1) tool2 = BedTool(fname2) # jaccard statistic result = tool1.jaccard(tool2, f=0.5, r=False) stat = result['jaccard'] # conservation measurement result1 = tool1.intersect(tool2, v=True) result2 = tool2.intersect(tool1, v=True) cons1 = result1.map(cons_bedgraph, o='mean', c=4) cons2 = result2.map(cons_bedgraph, o='mean', c=4) pbd.set_trace() fields = [ctype1, ctype2, stat] print '\t'.join(map(str, fields))
def score(onlyfiles): for bed in onlyfiles: a = BedTool(mypath+bed) jac=BedTool.jaccard(userfile.sort(),a) if jac['jaccard']>jaccard: scores[bed]=jac['jaccard']