Ejemplo n.º 1
0
    def test_get_bulge_dimensions(self):
        bg = fgb.BulgeGraph(dotbracket_str='(.(.))')
        bd = bg.get_bulge_dimensions('i0')
        self.assertEquals(bd, (1,0))

        bg = fgb.BulgeGraph(dotbracket_str='((.).)')
        bd = bg.get_bulge_dimensions('i0')
        self.assertEquals(bd, (0,1))

        bg = fgb.BulgeGraph(dotbracket_str='().()')
        bd = bg.get_bulge_dimensions('m0')

        dotbracket = '(.(.).(.).(.))'
        bg = fgb.BulgeGraph(dotbracket_str=dotbracket)
        bd = bg.get_bulge_dimensions('m0')
        self.assertEquals(bd, (1,1000))
        bd = bg.get_bulge_dimensions('m1')
        self.assertEquals(bd, (0,1000))
        bd = bg.get_bulge_dimensions('m2')
        self.assertEquals(bd, (1,1000))
        bd = bg.get_bulge_dimensions('m3')
        self.assertEquals(bd, (1,1000))

        bg = fgb.BulgeGraph(dotbracket_str='((..((..))....))..((..((..))...))')

        bd = bg.get_bulge_dimensions('i0')
        self.assertEquals(bd, (2, 4))
        bd = bg.get_bulge_dimensions('i1')
        self.assertEquals(bd, (2, 3))
Ejemplo n.º 2
0
    def test_get_flanking_handles(self):
        bg = fgb.BulgeGraph(dotbracket_str='((..))')
        h = bg.get_flanking_handles('h0')

        self.assertEqual(h, (2, 5, 1, 4))

        bg = fgb.BulgeGraph(dotbracket_str='((.((.)).(.).))')

        self.assertEqual(bg.get_flanking_handles('m0'),
                         (2,4,1,3))
        self.assertEqual(bg.get_flanking_handles('m2'),
                         (8,10,1,3))
        self.assertEqual(bg.get_flanking_handles('m1'),
                         (12,14,0,2))

        bg = fgb.BulgeGraph(dotbracket_str='(.(.).).(.(.))')
        self.assertEqual(bg.get_flanking_handles('i0', side=0),
                         (1,3,0,2))
        self.assertEqual(bg.get_flanking_handles('i0', side=1),
                         (5,7,0,2))
        self.assertEqual(bg.get_flanking_handles('i1', side=0),
                         (9,11,0,2))
        self.assertEqual(bg.get_flanking_handles('i1', side=1),
                         (13,14,0,1))

        bg = fgb.BulgeGraph(dotbracket_str='((.((.)).)).((.((.))))')
        #                                   1234567890123456789012
        self.assertEqual(bg.get_flanking_handles('i0', side=0),
                         (2,4,1,3))
        self.assertEqual(bg.get_flanking_handles('i0', side=1),
                         (8,10,1,3))
        self.assertEqual(bg.get_flanking_handles('i1', side=0),
                         (14,16,1,3))
        self.assertEqual(bg.get_flanking_handles('i1', side=1),
                         (20,21,1,2))
Ejemplo n.º 3
0
    def test_define_residue_num_iterator(self):
        bg = fgb.BulgeGraph(dotbracket_str='((..((..))((..))))')
        drni = bg.define_residue_num_iterator('m2', adjacent=True)
        # the second multiloop should have at least two adjacent nucleotides
        self.assertEqual(len(list(drni)), 2)
        drni = bg.define_residue_num_iterator('m1', adjacent=True)
        # the second multiloop should have at least two adjacent nucleotides
        self.assertEqual(len(list(drni)), 2)

        drni = bg.define_residue_num_iterator('m1', adjacent=True)

        bg = fgb.BulgeGraph()
        bg.from_dotbracket('..((..((...))..))..((..))..')

        self.assertEqual(list(bg.define_residue_num_iterator('f1')),
                         [1,2])
        self.assertEqual(list(bg.define_residue_num_iterator('t1')),
                         [26, 27])
        self.assertEqual(list(bg.define_residue_num_iterator('s1')),
                         [7, 8, 12, 13])
        self.assertEqual(list(bg.define_residue_num_iterator('i0')),
                         [5,6,14,15])

        fa=""">blah
AAAAAAAAAA
((((.)).))
"""
        bg.from_fasta(fa, dissolve_length_one_stems=True)
        self.assertEqual(list(bg.define_residue_num_iterator('i0', adjacent=True)),
                         [2,3,7,8,9])

        self.assertEqual(list(bg.define_residue_num_iterator('i0', adjacent=True, seq_ids=True)),
                         [(' ', 2, ' '), (' ', 3, ' '), (' ', 7, ' '), (' ', 8, ' '), (' ', 9, ' ')])
Ejemplo n.º 4
0
    def test_from_dotplot(self):
        bg = fgb.BulgeGraph()
        bg.from_dotbracket(self.dotbracket)

        self.assertEquals(bg.seq_length, len(self.dotbracket))

        bg = fgb.BulgeGraph()
        bg.from_dotbracket('....')
Ejemplo n.º 5
0
    def test_get_any_sides(self):
        bg = fgb.BulgeGraph(dotbracket_str='((..((..))..)).((..))')

        self.assertEqual(bg.get_any_sides('s0', 'i0'), (1,0))
        self.assertEqual(bg.get_any_sides('i0', 's0'), (0,1))

        bg = fgb.BulgeGraph(dotbracket_str='((..((..))((..))))')

        self.assertEqual(bg.get_any_sides('s1', 'm1'), (0, 1))
        self.assertEqual(bg.get_any_sides('m1', 's1'), (1, 0))
Ejemplo n.º 6
0
def annotate_structures(input_file, output_file):
    """ Annotate secondary structure predictions with structural contexts.

    Given dot-bracket strings this function will annote every character
    as either 'H' (hairpin), 'S' (stem), 'I' (internal loop/bulge) or 'M' (multi loop). The input file
    must be a fasta formatted file and each sequence and structure must span a single line:

    '>header
    'CCCCAUAGGGG
    '((((...)))) (-3.3)

    This is the default format of e.g. RNAfold. The output file will contain the annotated string:

    '>header
    'CCCCAUAGGGG
    'SSSSHHHSSSS

    Parameters
    ----------
    input_file : str
        A fasta file containing secondary structure predictions.
    
    output_file : str
        A fasta file with secondary structure annotations.
    """
    handle_in = get_handle(input_file, "rt")
    handle_out = get_handle(output_file, "wt")
    for header, entry in parse_fasta(handle_in, "_"):
        entry = entry.split("_")
        bg = cgb.BulgeGraph()
        bg.from_dotbracket(entry[1].split()[0])
        handle_out.write(">{}\n".format(header))
        handle_out.write("{}\n{}\n".format(entry[0], bg.to_element_string().upper()))
    handle_in.close()
    handle_out.close()
def removeGUWobble(seq1, seq2):
    with open('dummyAl_structureCalc2way4wayStoreData_v2_20190416.txt',
              'w') as f:
        processCall = subprocess.Popen(
            'RNAalifold --noPS --noGU --temp=20 dummyClustal_structureCalc2way4wayStoreData_v2_20190416.aln',
            shell=True,
            stdout=f,
            stdin=subprocess.PIPE
        )  #temperature had to be lowered from default of 37C as otherwise some AT rich structures were not folding--note that this does change the folding for some RNAs as well
    time.sleep(
        1
    )  #important otherwise dummyAl_structureCalc2way4way_v1_20190222.txt not created before being accessed
    dotBracket = returnDotBracket(
        'dummyAl_structureCalc2way4wayStoreData_v2_20190416.txt')
    bg = fgb.BulgeGraph(dotbracket_str=dotBracket)
    listDotBracket = list(dotBracket)
    for eachStem in bg.stem_iterator():
        for eachBp in bg.stem_bp_iterator(
                eachStem):  #returns 1-index based numbering
            i1 = eachBp[0] - 1
            i2 = eachBp[1] - 1
            if isGU(i1, i2, seq1) and isGU(i1, i2, seq2):
                listDotBracket[i1] = '.'
                listDotBracket[i2] = '.'
    return ''.join(listDotBracket)
Ejemplo n.º 8
0
def translateIntoContexts(dotBracketString):
    bg = fgb.BulgeGraph()
    bg.from_dotbracket(dotBracketString)
    rawContextString = upper(bg.to_element_string())
    contextString1 = rawContextString.replace('F', 'E')
    contextString = contextString1.replace('T', 'E')
    return contextString
Ejemplo n.º 9
0
def main():
    usage = """
    python bpseq_to_bulge_graph.py secondary_structure.bpseq
    """
    num_args= 1
    parser = OptionParser(usage=usage)

    #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str')
    #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    with open(args[0], 'r') as f:
        text = f.read()
        try:
          int(text[0])
        except ValueError:
          i=text.find("\n1 ")
          text=text[i+1:]
        bg = fgb.BulgeGraph()
        bg.from_bpseq_str(text)
        print bg.to_bg_string()
Ejemplo n.º 10
0
def main(seq):

    start = time.clock()

    s1 = readInSeq(seq)
    length = len(s1)
    
    global structure
    structure = [None for i in range(0, length)]
    W = [[None for c in range(1,length+3)] for d in range(1,length+3)]
    V = [[None for c in range(1,length+3)] for d in range(1,length+3)]

    initW, initV = initialize(W, V, length)
    calcW, calcV, maxScore = fillMatrix(initW, initV, s1, length)

    traceWpath(0, length-1, calcW, calcV)
    # note: the value of 'structure' has been changed to what you want

    annot = ''.join(['.' if j is None else ')' if j < i else '(' for i, j in enumerate(structure)])
    # getScoreRNA(tracedStruct, length)


    # Annotate dot-bracket notation
    bg = fgb.BulgeGraph()
    bg.from_dotbracket(annot)

    # print structure
    print s1
    print "The structure in string form:         " + annot
    print "The corresponding annotated notation: " + bg.to_element_string()
    print "\nFinished in ",time.clock() - start,"seconds"

    return (calcW, calcV)
Ejemplo n.º 11
0
 def test_get_sides(self):
     with open('test/forgi/data/1ymo.bpseq', 'r') as f:
         lines = f.readlines()
     
     bpseq_str = "".join(lines)
     bg = fgb.BulgeGraph()
     bg.from_bpseq_str(bpseq_str, dissolve_length_one_stems=True)
Ejemplo n.º 12
0
def main():
    usage = """
    ./longest_stem.py dotbracket_file
    """
    num_args = 1
    parser = OptionParser()

    #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str')
    #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    if args[0] == '-':
        f = sys.stdin
    else:
        f = open(args[0])

    brackets = "".join(f.readlines()).replace('\n', '')
    bg = cgb.BulgeGraph()
    bg.from_dotbracket(brackets)

    biggest_stem = (-1, 'x')

    for s in bg.stem_iterator():
        if bg.stem_length(s) > biggest_stem[0]:
            biggest_stem = (bg.stem_length(s), s)

    print(biggest_stem[0])
Ejemplo n.º 13
0
    def test_from_dotplot3(self):
        dotbracket = '(.(.((((((...((((((....((((.((((.(((..(((((((((....)))))))))..((.......))....)))......))))))))...))))))..)).))))).)..((((..((((((((((...))))))))).))))).......'
        bg = fgb.BulgeGraph()
        self.check_graph_integrity(bg)

        bg.from_dotbracket(dotbracket)
        self.check_graph_integrity(bg)
Ejemplo n.º 14
0
    def test_get_length(self):
        bg = fgb.BulgeGraph(dotbracket_str='(())')

        bg = fgb.BulgeGraph(dotbracket_str='((..))..(((.)))')

        self.assertEquals(bg.get_length('s0'), 2)
        self.assertEquals(bg.get_length('h0'), 2)
        self.assertEquals(bg.get_length('m0'), 2)
        self.assertEquals(bg.get_length('s1'), 3)

        bg = fgb.BulgeGraph(dotbracket_str='(())(())')
        self.assertEquals(bg.get_length('m0'), 0)

        bg = fgb.BulgeGraph(dotbracket_str='(((((((((..(((..((((.(((((((((.....(((((.(((((....((((....))))....))))).....(((((((((.......)))))))))....))))).((........))...)))))))))))))...)))..))....))))))).')

        self.assertEqual(bg.get_length('i4'), 2)
Ejemplo n.º 15
0
 def test_find_multiloop_loops(self):
     bg = fgb.BulgeGraph()
     bg.from_dotbracket('((..((..))..((..))..))')
     
     bg.find_multiloop_loops()
     bg.from_dotbracket('((..((..((..))..((..))..))..((..))..))')
     bg.from_dotbracket('(.(.(.(.).(.).).(.).))')
Ejemplo n.º 16
0
    def test_to_bg_string(self):
        self.fasta = """>1y26
CGCUUCAUAUAAUCCUAAUGAUAUGGUUUGGGAGUUUCUACCAAGAGCCUUAAACUCUUGAUUAUGAAGUG
(((((((((...((((((.........))))))........((((((.......))))))..)))))))))
"""
        bg = fgb.BulgeGraph()
        bg.from_fasta(self.fasta, dissolve_length_one_stems=True)
Ejemplo n.º 17
0
    def test_stem_length(self):
        bg = fgb.BulgeGraph(dotbracket_str='.((..(((..))).))((..))')

        self.assertEqual(bg.stem_length('s0'), 2)
        self.assertEqual(bg.stem_length('s1'), 3)
        self.assertEqual(bg.stem_length('m0'), 0)
        self.assertEqual(bg.stem_length('i0'), 1)
        self.assertEqual(bg.stem_length('f1'), 1)
Ejemplo n.º 18
0
    def test_connection_type(self):
        bg = fgb.BulgeGraph(dotbracket_str='(.(.).).(.(.))')

        self.assertEqual(bg.connection_type('m0', ['s0', 's2']), 3)
        self.assertEqual(bg.connection_type('m0', ['s2', 's0']), -3)

        self.assertEqual(bg.connection_type('i0', ['s0', 's1']), 1)
        self.assertEqual(bg.connection_type('i0', ['s1', 's0']), -1)
Ejemplo n.º 19
0
    def test_pairing_partner(self):
        # documented
        bg = fgb.BulgeGraph()
        bg.from_dotbracket('((..))')

        self.assertEquals(bg.pairing_partner(1), 6)
        self.assertEquals(bg.pairing_partner(2), 5)
        self.assertEquals(bg.pairing_partner(5), 2)
Ejemplo n.º 20
0
    def test_remove_pseudoknots(self):
        pk_fasta = '>hi\nAAAAAAAAAAAAAAAA\n((..[[[..))..]]]'

        bg = fgb.BulgeGraph()
        bg.from_fasta(pk_fasta)

        dissolved_bp = forna.remove_pseudoknots(bg)
        self.assertTrue(dissolved_bp is not None)
def get_element_with_id_list_from_dotbracket(dot_bracket_str):
    graph = cgb.BulgeGraph()
    graph.from_dotbracket(dot_bracket_str)
    element_str = graph.to_element_string(True)
    list_result = element_str.split('\n')
    elements = list_result[0]
    ids = list_result[1]
    return zip([str(elem) for elem in elements], [str(elem_id) for elem_id in ids])
Ejemplo n.º 22
0
    def test_are_adjacent_stems(self):
        bg = fgb.BulgeGraph(dotbracket_str='((..((..))..))..((..))')

        self.assertTrue(bg.are_adjacent_stems('s0', 's1'))
        self.assertTrue(bg.are_adjacent_stems('s0', 's2'))
        self.assertFalse(bg.are_adjacent_stems('s1', 's2'))

        self.assertFalse(bg.are_adjacent_stems('s0', 's2', 
                                               multiloops_count=False))
Ejemplo n.º 23
0
    def test_dissolve_stem(self):
        '''
        Test to make sure length one stems can be dissolved.
        '''
        bg = fgb.BulgeGraph()
        bg.from_dotbracket('((.(..((..))..).))', dissolve_length_one_stems = True)
        self.assertEquals(bg.to_dotbracket_string(), '((....((..))....))')
        self.check_graph_integrity(bg)

        bg = fgb.BulgeGraph(dotbracket_str='((..))..((..))')
        self.assertEquals(bg.to_dotbracket_string(), '((..))..((..))')
        bg.dissolve_stem('s0')
        self.check_graph_integrity(bg)

        self.assertEquals(bg.to_dotbracket_string(), '........((..))')

        bg.dissolve_stem('s0')
        self.check_graph_integrity(bg)
Ejemplo n.º 24
0
    def test_from_fasta(self):
        bg = fgb.BulgeGraph()

        with open('test/forgi/threedee/data/3V2F.fa', 'r') as f:
            text = f.read()
            bg.from_fasta(text, dissolve_length_one_stems=False)

        for s in bg.stem_iterator():
            bg.stem_length(s)
Ejemplo n.º 25
0
    def test_create_mst(self):
        '''
        Test the creation of a minimum spanning tree from the graph.
        '''
        db = '....((((((...((((((.....(((.((((.(((..(((((((((....)))))))))..((.......))....)))......)))))))....))))))..)).)))).....((((...(((((((((...)))))))))..)))).......'
        bg = fgb.BulgeGraph(dotbracket_str=db)
        mst = bg.get_mst()
        self.assertTrue("m0" in mst)
        build_order = bg.traverse_graph()

        db = '..((.(())..(())...)).'
        bg = fgb.BulgeGraph(dotbracket_str=db)
        mst = bg.get_mst()

        self.assertTrue('m0' in mst)
        self.assertTrue('m2' in mst)

        build_order = bg.traverse_graph()
Ejemplo n.º 26
0
def returnElementNotation(sequence):
    with open('dummy_structureCalc2way4way_v1_20190404.txt', 'w') as f:
        processCall = subprocess.Popen(['RNAfold', '--noPS', '--noGU'],
                                       stdout=f,
                                       stdin=subprocess.PIPE)
        processCall.communicate(input=sequence)
    dotBracket = returnDotBracket(
        'dummy_structureCalc2way4way_v1_20190404.txt')
    bg = fgb.BulgeGraph(dotbracket_str=dotBracket)
    return bg.to_element_string()
def stemThere(dotBracket):
    bg = fgb.BulgeGraph(dotbracket_str=dotBracket)
    elementRep = bg.to_element_string()
    numberStems = 0
    for eachEl in bg.stem_iterator():
        numberStems += 1
    if numberStems == 0:
        return False
    else:
        return True
Ejemplo n.º 28
0
    def test_random_subgraph(self):
        bg = fgb.BulgeGraph(dotbracket_str='(.(.).).(.(.))..((..((..((..))..))..))')

        sg = bg.random_subgraph()

        # check to make sure there are no duplicate elements
        self.assertEquals(len(sg), len(set(sg)))

        nbg = fgb.bg_from_subgraph(bg, sg)
        self.assertTrue(set(nbg.defines.keys()) == set(sg))
Ejemplo n.º 29
0
    def test_get_define_seq_str(self):
        bg = fgb.BulgeGraph(dotbracket_str="(.(.))") 
        bg.seq = 'acgauu'
        self.assertEquals(bg.get_define_seq_str("i0"), ['c', ''])

        bg = fgb.BulgeGraph(dotbracket_str="(.(.))") 
        bg.seq = 'acgauu'
        self.assertEquals(bg.get_define_seq_str("i0", True), ['acg','uu'])

        bg = fgb.BulgeGraph(dotbracket_str='(.(.).(.).)')
        bg.seq = 'acguaaccggu'
        self.assertEquals(bg.get_define_seq_str('m0'), ['c'])
        self.assertEquals(bg.get_define_seq_str('m0', True), ['acg'])

        self.assertEquals(bg.get_define_seq_str('m1'), ['g'])
        self.assertEquals(bg.get_define_seq_str('m1', True), ['ggu'])

        self.assertEquals(bg.get_define_seq_str('m2'), ['a'])
        self.assertEquals(bg.get_define_seq_str('m2', True), ['aac'])
Ejemplo n.º 30
0
def json_to_json(rna_json_str):
    '''
    Convert an RNA json string to fasta file, then to a bulge_graph
    and then back to a json.

    The purpose is to maintain the integrity of the molecule and to
    maintain the positions of all the hidden nodes after modification.
    '''
    with open('test.out', 'w') as f:
        f.write(rna_json_str)

    (all_fastas, all_xs, all_ys, all_uids,
     different_tree_links) = json_to_fasta(rna_json_str)
    big_json = {'nodes': [], 'links': []}

    coords_to_index = dict()
    for fasta_text, xs, ys, uids in zip(all_fastas, all_xs, all_ys, all_uids):
        bg = fgb.BulgeGraph()
        bg.from_fasta(fasta_text)
        new_json = bg_to_json(bg, xs=xs, ys=ys, uids=uids)

        for l in new_json['links']:
            # the indices of the new nodes will be offset, so the links
            # have to have their node pointers adjusted as well
            l['source'] += len(big_json['nodes'])
            l['target'] += len(big_json['nodes'])
            big_json['links'] += [l]

        # Create a mapping between the coordinates of a node and its index
        # in the node list. To be used when creating links between different
        # molecules, which are stored according to the coordinates of the nodes
        # being linked
        for i, n in enumerate(new_json['nodes']):
            if n['node_type'] == 'nucleotide':
                coords_to_index[(n['x'], n['y'])] = i + len(big_json['nodes'])

        big_json['nodes'] += new_json['nodes']

    # add the links that are between different molecules
    for dtl in different_tree_links:
        fud.pv('dtl')
        n1 = coords_to_index[(dtl[0])]
        n2 = coords_to_index[(dtl[1])]

        fud.pv('n1,n2')
        big_json['links'] += [{
            'source': n1,
            'target': n2,
            'link_type': 'basepair',
            'value': 1
        }]

    #fud.pv('big_json["nodes"]')

    return big_json