def create_trees_dict(intervals_file): """ Create a dictionary in which each key is a chromosome and its value is an interval tree containing all the intervals in that chromosome. """ # Temp dictionary that holds a list of all the intervals in each chromosme. chromosomes = defaultdict(list) with open(intervals_file) as fin: slines = (line.rstrip().split(None, 3) for line in fin) for inter in slines: try: interval = [int(inter[1]), int(inter[2])] # Check that the input file is in BED format. except IndexError: raise InputFormatError('Tree file must be in BED format.') except ValueError: raise InputFormatError('Tree file must be in BED format.') if len(inter) > 3: interval.append(inter[3]) interval = tuple(interval) chromosomes[inter[0]].append(interval) # Final dictionary with one tree per chromosome. trees = defaultdict() for chromosome in chromosomes: trees[chromosome] = build_tree(sorted(chromosomes[chromosome])) del chromosomes return trees
def create_trees_dict(intervals_file): """ Create a dictionary in which each key is a chromosome and its value is an interval tree containing all the intervals in that chromosome. """ # Temp dictionary that holds a list of all the intervals in each chromosme. chromosomes = defaultdict(list) with open(intervals_file) as fin: slines = (line.rstrip().split(None, 3) for line in fin) for inter in slines: try: interval = [int(inter[1]), int(inter[2])] # Check that the input file is in BED format. except IndexError: raise InputFormatError("Tree file must be in BED format.") except ValueError: raise InputFormatError("Tree file must be in BED format.") if len(inter) > 3: interval.append(inter[3]) interval = tuple(interval) chromosomes[inter[0]].append(interval) # Final dictionary with one tree per chromosome. trees = defaultdict() for chromosome in chromosomes: trees[chromosome] = build_tree(sorted(chromosomes[chromosome])) del chromosomes return trees
raise InputFormatError('Query file must be in BED format.') except ValueError: raise InputFormatError('Query file must be in BED format.') if len(interval) > 2: try: query.append(int(interval[2])) except ValueError: raise InputFormatError('Query file must be in BED format.') query = tuple(query) for overlap in find_overlaps(trees, query): print '\t'.join(str(i) for i in overlap) if __name__ == '__main__': try: tree_file = sys.argv[1] query_file = sys.argv[2] trees = create_trees_dict(tree_file) with open(query_file) as quf: slines = (line.rstrip().split(None, 3) for line in quf) for sl in slines: _query_from_main(trees, sl) except IndexError: print('Please use the following format:\n' + 'ting.py <tree_file> <query_file>')
except IndexError: raise InputFormatError("Query file must be in BED format.") except ValueError: raise InputFormatError("Query file must be in BED format.") if len(interval) > 2: try: query.append(int(interval[2])) except ValueError: raise InputFormatError("Query file must be in BED format.") query = tuple(query) for overlap in find_overlaps(trees, query): print "\t".join(str(i) for i in overlap) if __name__ == "__main__": try: tree_file = sys.argv[1] query_file = sys.argv[2] trees = create_trees_dict(tree_file) with open(query_file) as quf: slines = (line.rstrip().split(None, 3) for line in quf) for sl in slines: _query_from_main(trees, sl) except IndexError: print ("Please use the following format:\n" + "ting.py <tree_file> <query_file>")