def create_gene_tree(bed_file_path): # dictionary mapping chromosome names to interval trees models = dict() #dmRNA = getmRNAlengths(gff3_file_path) tree = GenomeIntervalTree() # parse the annotations file (GFF3) and build the interval trees with open(bed_file_path, 'r') as annotations_file: reader = csv.reader(annotations_file, delimiter='\t') for row in reader: if len(row) == 9 and not row[0].startswith('##'): seqid = row[0] start = int(row[1]) end = int(row[2]) strand = row[3] m_id = row[4] g_id = row[5] cov = float(row[6]) idty = float(row[7]) matches = int(row[8]) #tree = None if tree[seqid].overlaps(start, end): continue else: models[m_id] = 1 models[g_id] = 1 tree[seqid].addi(start, end, data=({ "ID": m_id, "Parent": g_id })) return models
def test_pickling(): git = GenomeIntervalTree() git['a'][1:2] = ['some', 'data'] git['a'][1.5:2.5] = ['more', 'data'] git['b'][10:12] = ['even', 'more', 'data'] s = pickle.dumps(git) new_git = pickle.loads(s) assert len(git) == len(new_git) assert len(git['a']) == len(new_git['a'])