Example #1
0
def create_gene_tree(bed_file_path):

    # dictionary mapping chromosome names to interval trees
    models = dict()
    #dmRNA = getmRNAlengths(gff3_file_path)
    tree = GenomeIntervalTree()
    # parse the annotations file (GFF3) and build the interval trees
    with open(bed_file_path, 'r') as annotations_file:
        reader = csv.reader(annotations_file, delimiter='\t')
        for row in reader:
            if len(row) == 9 and not row[0].startswith('##'):
                seqid = row[0]
                start = int(row[1])
                end = int(row[2])
                strand = row[3]
                m_id = row[4]
                g_id = row[5]
                cov = float(row[6])
                idty = float(row[7])
                matches = int(row[8])
                #tree = None
                if tree[seqid].overlaps(start, end):
                    continue
                else:
                    models[m_id] = 1
                    models[g_id] = 1
                    tree[seqid].addi(start,
                                     end,
                                     data=({
                                         "ID": m_id,
                                         "Parent": g_id
                                     }))
    return models
Example #2
0
def test_pickling():
    git = GenomeIntervalTree()
    git['a'][1:2] = ['some', 'data']
    git['a'][1.5:2.5] = ['more', 'data']
    git['b'][10:12] = ['even', 'more', 'data']
    s = pickle.dumps(git)
    new_git = pickle.loads(s)
    assert len(git) == len(new_git)
    assert len(git['a']) == len(new_git['a'])