def test_get_bulge_dimensions(self): bg = fgb.BulgeGraph(dotbracket_str='(.(.))') bd = bg.get_bulge_dimensions('i0') self.assertEquals(bd, (1,0)) bg = fgb.BulgeGraph(dotbracket_str='((.).)') bd = bg.get_bulge_dimensions('i0') self.assertEquals(bd, (0,1)) bg = fgb.BulgeGraph(dotbracket_str='().()') bd = bg.get_bulge_dimensions('m0') dotbracket = '(.(.).(.).(.))' bg = fgb.BulgeGraph(dotbracket_str=dotbracket) bd = bg.get_bulge_dimensions('m0') self.assertEquals(bd, (1,1000)) bd = bg.get_bulge_dimensions('m1') self.assertEquals(bd, (0,1000)) bd = bg.get_bulge_dimensions('m2') self.assertEquals(bd, (1,1000)) bd = bg.get_bulge_dimensions('m3') self.assertEquals(bd, (1,1000)) bg = fgb.BulgeGraph(dotbracket_str='((..((..))....))..((..((..))...))') bd = bg.get_bulge_dimensions('i0') self.assertEquals(bd, (2, 4)) bd = bg.get_bulge_dimensions('i1') self.assertEquals(bd, (2, 3))
def test_get_flanking_handles(self): bg = fgb.BulgeGraph(dotbracket_str='((..))') h = bg.get_flanking_handles('h0') self.assertEqual(h, (2, 5, 1, 4)) bg = fgb.BulgeGraph(dotbracket_str='((.((.)).(.).))') self.assertEqual(bg.get_flanking_handles('m0'), (2,4,1,3)) self.assertEqual(bg.get_flanking_handles('m2'), (8,10,1,3)) self.assertEqual(bg.get_flanking_handles('m1'), (12,14,0,2)) bg = fgb.BulgeGraph(dotbracket_str='(.(.).).(.(.))') self.assertEqual(bg.get_flanking_handles('i0', side=0), (1,3,0,2)) self.assertEqual(bg.get_flanking_handles('i0', side=1), (5,7,0,2)) self.assertEqual(bg.get_flanking_handles('i1', side=0), (9,11,0,2)) self.assertEqual(bg.get_flanking_handles('i1', side=1), (13,14,0,1)) bg = fgb.BulgeGraph(dotbracket_str='((.((.)).)).((.((.))))') # 1234567890123456789012 self.assertEqual(bg.get_flanking_handles('i0', side=0), (2,4,1,3)) self.assertEqual(bg.get_flanking_handles('i0', side=1), (8,10,1,3)) self.assertEqual(bg.get_flanking_handles('i1', side=0), (14,16,1,3)) self.assertEqual(bg.get_flanking_handles('i1', side=1), (20,21,1,2))
def test_define_residue_num_iterator(self): bg = fgb.BulgeGraph(dotbracket_str='((..((..))((..))))') drni = bg.define_residue_num_iterator('m2', adjacent=True) # the second multiloop should have at least two adjacent nucleotides self.assertEqual(len(list(drni)), 2) drni = bg.define_residue_num_iterator('m1', adjacent=True) # the second multiloop should have at least two adjacent nucleotides self.assertEqual(len(list(drni)), 2) drni = bg.define_residue_num_iterator('m1', adjacent=True) bg = fgb.BulgeGraph() bg.from_dotbracket('..((..((...))..))..((..))..') self.assertEqual(list(bg.define_residue_num_iterator('f1')), [1,2]) self.assertEqual(list(bg.define_residue_num_iterator('t1')), [26, 27]) self.assertEqual(list(bg.define_residue_num_iterator('s1')), [7, 8, 12, 13]) self.assertEqual(list(bg.define_residue_num_iterator('i0')), [5,6,14,15]) fa=""">blah AAAAAAAAAA ((((.)).)) """ bg.from_fasta(fa, dissolve_length_one_stems=True) self.assertEqual(list(bg.define_residue_num_iterator('i0', adjacent=True)), [2,3,7,8,9]) self.assertEqual(list(bg.define_residue_num_iterator('i0', adjacent=True, seq_ids=True)), [(' ', 2, ' '), (' ', 3, ' '), (' ', 7, ' '), (' ', 8, ' '), (' ', 9, ' ')])
def test_from_dotplot(self): bg = fgb.BulgeGraph() bg.from_dotbracket(self.dotbracket) self.assertEquals(bg.seq_length, len(self.dotbracket)) bg = fgb.BulgeGraph() bg.from_dotbracket('....')
def test_get_any_sides(self): bg = fgb.BulgeGraph(dotbracket_str='((..((..))..)).((..))') self.assertEqual(bg.get_any_sides('s0', 'i0'), (1,0)) self.assertEqual(bg.get_any_sides('i0', 's0'), (0,1)) bg = fgb.BulgeGraph(dotbracket_str='((..((..))((..))))') self.assertEqual(bg.get_any_sides('s1', 'm1'), (0, 1)) self.assertEqual(bg.get_any_sides('m1', 's1'), (1, 0))
def annotate_structures(input_file, output_file): """ Annotate secondary structure predictions with structural contexts. Given dot-bracket strings this function will annote every character as either 'H' (hairpin), 'S' (stem), 'I' (internal loop/bulge) or 'M' (multi loop). The input file must be a fasta formatted file and each sequence and structure must span a single line: '>header 'CCCCAUAGGGG '((((...)))) (-3.3) This is the default format of e.g. RNAfold. The output file will contain the annotated string: '>header 'CCCCAUAGGGG 'SSSSHHHSSSS Parameters ---------- input_file : str A fasta file containing secondary structure predictions. output_file : str A fasta file with secondary structure annotations. """ handle_in = get_handle(input_file, "rt") handle_out = get_handle(output_file, "wt") for header, entry in parse_fasta(handle_in, "_"): entry = entry.split("_") bg = cgb.BulgeGraph() bg.from_dotbracket(entry[1].split()[0]) handle_out.write(">{}\n".format(header)) handle_out.write("{}\n{}\n".format(entry[0], bg.to_element_string().upper())) handle_in.close() handle_out.close()
def removeGUWobble(seq1, seq2): with open('dummyAl_structureCalc2way4wayStoreData_v2_20190416.txt', 'w') as f: processCall = subprocess.Popen( 'RNAalifold --noPS --noGU --temp=20 dummyClustal_structureCalc2way4wayStoreData_v2_20190416.aln', shell=True, stdout=f, stdin=subprocess.PIPE ) #temperature had to be lowered from default of 37C as otherwise some AT rich structures were not folding--note that this does change the folding for some RNAs as well time.sleep( 1 ) #important otherwise dummyAl_structureCalc2way4way_v1_20190222.txt not created before being accessed dotBracket = returnDotBracket( 'dummyAl_structureCalc2way4wayStoreData_v2_20190416.txt') bg = fgb.BulgeGraph(dotbracket_str=dotBracket) listDotBracket = list(dotBracket) for eachStem in bg.stem_iterator(): for eachBp in bg.stem_bp_iterator( eachStem): #returns 1-index based numbering i1 = eachBp[0] - 1 i2 = eachBp[1] - 1 if isGU(i1, i2, seq1) and isGU(i1, i2, seq2): listDotBracket[i1] = '.' listDotBracket[i2] = '.' return ''.join(listDotBracket)
def translateIntoContexts(dotBracketString): bg = fgb.BulgeGraph() bg.from_dotbracket(dotBracketString) rawContextString = upper(bg.to_element_string()) contextString1 = rawContextString.replace('F', 'E') contextString = contextString1.replace('T', 'E') return contextString
def main(): usage = """ python bpseq_to_bulge_graph.py secondary_structure.bpseq """ num_args= 1 parser = OptionParser(usage=usage) #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str') #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option') (options, args) = parser.parse_args() if len(args) < num_args: parser.print_help() sys.exit(1) with open(args[0], 'r') as f: text = f.read() try: int(text[0]) except ValueError: i=text.find("\n1 ") text=text[i+1:] bg = fgb.BulgeGraph() bg.from_bpseq_str(text) print bg.to_bg_string()
def main(seq): start = time.clock() s1 = readInSeq(seq) length = len(s1) global structure structure = [None for i in range(0, length)] W = [[None for c in range(1,length+3)] for d in range(1,length+3)] V = [[None for c in range(1,length+3)] for d in range(1,length+3)] initW, initV = initialize(W, V, length) calcW, calcV, maxScore = fillMatrix(initW, initV, s1, length) traceWpath(0, length-1, calcW, calcV) # note: the value of 'structure' has been changed to what you want annot = ''.join(['.' if j is None else ')' if j < i else '(' for i, j in enumerate(structure)]) # getScoreRNA(tracedStruct, length) # Annotate dot-bracket notation bg = fgb.BulgeGraph() bg.from_dotbracket(annot) # print structure print s1 print "The structure in string form: " + annot print "The corresponding annotated notation: " + bg.to_element_string() print "\nFinished in ",time.clock() - start,"seconds" return (calcW, calcV)
def test_get_sides(self): with open('test/forgi/data/1ymo.bpseq', 'r') as f: lines = f.readlines() bpseq_str = "".join(lines) bg = fgb.BulgeGraph() bg.from_bpseq_str(bpseq_str, dissolve_length_one_stems=True)
def main(): usage = """ ./longest_stem.py dotbracket_file """ num_args = 1 parser = OptionParser() #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str') #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option') (options, args) = parser.parse_args() if len(args) < num_args: parser.print_help() sys.exit(1) if args[0] == '-': f = sys.stdin else: f = open(args[0]) brackets = "".join(f.readlines()).replace('\n', '') bg = cgb.BulgeGraph() bg.from_dotbracket(brackets) biggest_stem = (-1, 'x') for s in bg.stem_iterator(): if bg.stem_length(s) > biggest_stem[0]: biggest_stem = (bg.stem_length(s), s) print(biggest_stem[0])
def test_from_dotplot3(self): dotbracket = '(.(.((((((...((((((....((((.((((.(((..(((((((((....)))))))))..((.......))....)))......))))))))...))))))..)).))))).)..((((..((((((((((...))))))))).))))).......' bg = fgb.BulgeGraph() self.check_graph_integrity(bg) bg.from_dotbracket(dotbracket) self.check_graph_integrity(bg)
def test_get_length(self): bg = fgb.BulgeGraph(dotbracket_str='(())') bg = fgb.BulgeGraph(dotbracket_str='((..))..(((.)))') self.assertEquals(bg.get_length('s0'), 2) self.assertEquals(bg.get_length('h0'), 2) self.assertEquals(bg.get_length('m0'), 2) self.assertEquals(bg.get_length('s1'), 3) bg = fgb.BulgeGraph(dotbracket_str='(())(())') self.assertEquals(bg.get_length('m0'), 0) bg = fgb.BulgeGraph(dotbracket_str='(((((((((..(((..((((.(((((((((.....(((((.(((((....((((....))))....))))).....(((((((((.......)))))))))....))))).((........))...)))))))))))))...)))..))....))))))).') self.assertEqual(bg.get_length('i4'), 2)
def test_find_multiloop_loops(self): bg = fgb.BulgeGraph() bg.from_dotbracket('((..((..))..((..))..))') bg.find_multiloop_loops() bg.from_dotbracket('((..((..((..))..((..))..))..((..))..))') bg.from_dotbracket('(.(.(.(.).(.).).(.).))')
def test_to_bg_string(self): self.fasta = """>1y26 CGCUUCAUAUAAUCCUAAUGAUAUGGUUUGGGAGUUUCUACCAAGAGCCUUAAACUCUUGAUUAUGAAGUG (((((((((...((((((.........))))))........((((((.......))))))..))))))))) """ bg = fgb.BulgeGraph() bg.from_fasta(self.fasta, dissolve_length_one_stems=True)
def test_stem_length(self): bg = fgb.BulgeGraph(dotbracket_str='.((..(((..))).))((..))') self.assertEqual(bg.stem_length('s0'), 2) self.assertEqual(bg.stem_length('s1'), 3) self.assertEqual(bg.stem_length('m0'), 0) self.assertEqual(bg.stem_length('i0'), 1) self.assertEqual(bg.stem_length('f1'), 1)
def test_connection_type(self): bg = fgb.BulgeGraph(dotbracket_str='(.(.).).(.(.))') self.assertEqual(bg.connection_type('m0', ['s0', 's2']), 3) self.assertEqual(bg.connection_type('m0', ['s2', 's0']), -3) self.assertEqual(bg.connection_type('i0', ['s0', 's1']), 1) self.assertEqual(bg.connection_type('i0', ['s1', 's0']), -1)
def test_pairing_partner(self): # documented bg = fgb.BulgeGraph() bg.from_dotbracket('((..))') self.assertEquals(bg.pairing_partner(1), 6) self.assertEquals(bg.pairing_partner(2), 5) self.assertEquals(bg.pairing_partner(5), 2)
def test_remove_pseudoknots(self): pk_fasta = '>hi\nAAAAAAAAAAAAAAAA\n((..[[[..))..]]]' bg = fgb.BulgeGraph() bg.from_fasta(pk_fasta) dissolved_bp = forna.remove_pseudoknots(bg) self.assertTrue(dissolved_bp is not None)
def get_element_with_id_list_from_dotbracket(dot_bracket_str): graph = cgb.BulgeGraph() graph.from_dotbracket(dot_bracket_str) element_str = graph.to_element_string(True) list_result = element_str.split('\n') elements = list_result[0] ids = list_result[1] return zip([str(elem) for elem in elements], [str(elem_id) for elem_id in ids])
def test_are_adjacent_stems(self): bg = fgb.BulgeGraph(dotbracket_str='((..((..))..))..((..))') self.assertTrue(bg.are_adjacent_stems('s0', 's1')) self.assertTrue(bg.are_adjacent_stems('s0', 's2')) self.assertFalse(bg.are_adjacent_stems('s1', 's2')) self.assertFalse(bg.are_adjacent_stems('s0', 's2', multiloops_count=False))
def test_dissolve_stem(self): ''' Test to make sure length one stems can be dissolved. ''' bg = fgb.BulgeGraph() bg.from_dotbracket('((.(..((..))..).))', dissolve_length_one_stems = True) self.assertEquals(bg.to_dotbracket_string(), '((....((..))....))') self.check_graph_integrity(bg) bg = fgb.BulgeGraph(dotbracket_str='((..))..((..))') self.assertEquals(bg.to_dotbracket_string(), '((..))..((..))') bg.dissolve_stem('s0') self.check_graph_integrity(bg) self.assertEquals(bg.to_dotbracket_string(), '........((..))') bg.dissolve_stem('s0') self.check_graph_integrity(bg)
def test_from_fasta(self): bg = fgb.BulgeGraph() with open('test/forgi/threedee/data/3V2F.fa', 'r') as f: text = f.read() bg.from_fasta(text, dissolve_length_one_stems=False) for s in bg.stem_iterator(): bg.stem_length(s)
def test_create_mst(self): ''' Test the creation of a minimum spanning tree from the graph. ''' db = '....((((((...((((((.....(((.((((.(((..(((((((((....)))))))))..((.......))....)))......)))))))....))))))..)).)))).....((((...(((((((((...)))))))))..)))).......' bg = fgb.BulgeGraph(dotbracket_str=db) mst = bg.get_mst() self.assertTrue("m0" in mst) build_order = bg.traverse_graph() db = '..((.(())..(())...)).' bg = fgb.BulgeGraph(dotbracket_str=db) mst = bg.get_mst() self.assertTrue('m0' in mst) self.assertTrue('m2' in mst) build_order = bg.traverse_graph()
def returnElementNotation(sequence): with open('dummy_structureCalc2way4way_v1_20190404.txt', 'w') as f: processCall = subprocess.Popen(['RNAfold', '--noPS', '--noGU'], stdout=f, stdin=subprocess.PIPE) processCall.communicate(input=sequence) dotBracket = returnDotBracket( 'dummy_structureCalc2way4way_v1_20190404.txt') bg = fgb.BulgeGraph(dotbracket_str=dotBracket) return bg.to_element_string()
def stemThere(dotBracket): bg = fgb.BulgeGraph(dotbracket_str=dotBracket) elementRep = bg.to_element_string() numberStems = 0 for eachEl in bg.stem_iterator(): numberStems += 1 if numberStems == 0: return False else: return True
def test_random_subgraph(self): bg = fgb.BulgeGraph(dotbracket_str='(.(.).).(.(.))..((..((..((..))..))..))') sg = bg.random_subgraph() # check to make sure there are no duplicate elements self.assertEquals(len(sg), len(set(sg))) nbg = fgb.bg_from_subgraph(bg, sg) self.assertTrue(set(nbg.defines.keys()) == set(sg))
def test_get_define_seq_str(self): bg = fgb.BulgeGraph(dotbracket_str="(.(.))") bg.seq = 'acgauu' self.assertEquals(bg.get_define_seq_str("i0"), ['c', '']) bg = fgb.BulgeGraph(dotbracket_str="(.(.))") bg.seq = 'acgauu' self.assertEquals(bg.get_define_seq_str("i0", True), ['acg','uu']) bg = fgb.BulgeGraph(dotbracket_str='(.(.).(.).)') bg.seq = 'acguaaccggu' self.assertEquals(bg.get_define_seq_str('m0'), ['c']) self.assertEquals(bg.get_define_seq_str('m0', True), ['acg']) self.assertEquals(bg.get_define_seq_str('m1'), ['g']) self.assertEquals(bg.get_define_seq_str('m1', True), ['ggu']) self.assertEquals(bg.get_define_seq_str('m2'), ['a']) self.assertEquals(bg.get_define_seq_str('m2', True), ['aac'])
def json_to_json(rna_json_str): ''' Convert an RNA json string to fasta file, then to a bulge_graph and then back to a json. The purpose is to maintain the integrity of the molecule and to maintain the positions of all the hidden nodes after modification. ''' with open('test.out', 'w') as f: f.write(rna_json_str) (all_fastas, all_xs, all_ys, all_uids, different_tree_links) = json_to_fasta(rna_json_str) big_json = {'nodes': [], 'links': []} coords_to_index = dict() for fasta_text, xs, ys, uids in zip(all_fastas, all_xs, all_ys, all_uids): bg = fgb.BulgeGraph() bg.from_fasta(fasta_text) new_json = bg_to_json(bg, xs=xs, ys=ys, uids=uids) for l in new_json['links']: # the indices of the new nodes will be offset, so the links # have to have their node pointers adjusted as well l['source'] += len(big_json['nodes']) l['target'] += len(big_json['nodes']) big_json['links'] += [l] # Create a mapping between the coordinates of a node and its index # in the node list. To be used when creating links between different # molecules, which are stored according to the coordinates of the nodes # being linked for i, n in enumerate(new_json['nodes']): if n['node_type'] == 'nucleotide': coords_to_index[(n['x'], n['y'])] = i + len(big_json['nodes']) big_json['nodes'] += new_json['nodes'] # add the links that are between different molecules for dtl in different_tree_links: fud.pv('dtl') n1 = coords_to_index[(dtl[0])] n2 = coords_to_index[(dtl[1])] fud.pv('n1,n2') big_json['links'] += [{ 'source': n1, 'target': n2, 'link_type': 'basepair', 'value': 1 }] #fud.pv('big_json["nodes"]') return big_json