Esempio n. 1
0
def create_trees_dict(intervals_file):
    """
    Create a dictionary in which each key is a chromosome and its value is an
    interval tree containing all the intervals in that chromosome.
    """

    # Temp dictionary that holds a list of all the intervals in each chromosme.
    chromosomes = defaultdict(list)
    with open(intervals_file) as fin:
        slines = (line.rstrip().split(None, 3) for line in fin)
        for inter in slines:
            try:
                interval = [int(inter[1]), int(inter[2])]

            # Check that the input file is in BED format.
            except IndexError:
                raise InputFormatError('Tree file must be in BED format.')
            except ValueError:
                raise InputFormatError('Tree file must be in BED format.')

            if len(inter) > 3:
                interval.append(inter[3])
                interval = tuple(interval)

            chromosomes[inter[0]].append(interval)

    # Final dictionary with one tree per chromosome.
    trees = defaultdict()
    for chromosome in chromosomes:
        trees[chromosome] = build_tree(sorted(chromosomes[chromosome]))

    del chromosomes

    return trees
Esempio n. 2
0
def create_trees_dict(intervals_file):
    """
    Create a dictionary in which each key is a chromosome and its value is an
    interval tree containing all the intervals in that chromosome.
    """

    # Temp dictionary that holds a list of all the intervals in each chromosme.
    chromosomes = defaultdict(list)
    with open(intervals_file) as fin:
        slines = (line.rstrip().split(None, 3) for line in fin)
        for inter in slines:
            try:
                interval = [int(inter[1]), int(inter[2])]

            # Check that the input file is in BED format.
            except IndexError:
                raise InputFormatError("Tree file must be in BED format.")
            except ValueError:
                raise InputFormatError("Tree file must be in BED format.")

            if len(inter) > 3:
                interval.append(inter[3])
                interval = tuple(interval)

            chromosomes[inter[0]].append(interval)

    # Final dictionary with one tree per chromosome.
    trees = defaultdict()
    for chromosome in chromosomes:
        trees[chromosome] = build_tree(sorted(chromosomes[chromosome]))

    del chromosomes

    return trees
Esempio n. 3
0
        raise InputFormatError('Query file must be in BED format.')
    except ValueError:
        raise InputFormatError('Query file must be in BED format.')

    if len(interval) > 2:
        try:
            query.append(int(interval[2]))
        except ValueError:
            raise InputFormatError('Query file must be in BED format.')

    query = tuple(query)
    for overlap in find_overlaps(trees, query):
        print '\t'.join(str(i) for i in overlap)


if __name__ == '__main__':
    try:
        tree_file = sys.argv[1]
        query_file = sys.argv[2]

        trees = create_trees_dict(tree_file)

        with open(query_file) as quf:
            slines = (line.rstrip().split(None, 3) for line in quf)
            for sl in slines:
                _query_from_main(trees, sl)

    except IndexError:
        print('Please use the following format:\n' +
              'ting.py <tree_file> <query_file>')
Esempio n. 4
0
    except IndexError:
        raise InputFormatError("Query file must be in BED format.")
    except ValueError:
        raise InputFormatError("Query file must be in BED format.")

    if len(interval) > 2:
        try:
            query.append(int(interval[2]))
        except ValueError:
            raise InputFormatError("Query file must be in BED format.")

    query = tuple(query)
    for overlap in find_overlaps(trees, query):
        print "\t".join(str(i) for i in overlap)


if __name__ == "__main__":
    try:
        tree_file = sys.argv[1]
        query_file = sys.argv[2]

        trees = create_trees_dict(tree_file)

        with open(query_file) as quf:
            slines = (line.rstrip().split(None, 3) for line in quf)
            for sl in slines:
                _query_from_main(trees, sl)

    except IndexError:
        print ("Please use the following format:\n" + "ting.py <tree_file> <query_file>")