예제 #1
0
파일: rnasubopt.py 프로젝트: bgruening/EDeN
def string_to_networkx(header, sequence, **options):
    # defaults
    energy_range = options.get('energy_range', 10)
    max_num = options.get('max_num', 3)
    max_num_subopts = options.get('max_num_subopts', 100)
    split_components = options.get('split_components', False)
    seq_struct_list, energy_list = rnasubopt_wrapper(sequence, energy_range=energy_range, max_num=max_num, max_num_subopts=max_num_subopts)
    if split_components:
        for seq_struct, energy in zip(seq_struct_list, energy_list):
            G = sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=seq_struct)
            G.graph['info'] = 'RNAsubopt energy=%s max_num=%s' % (energy, max_num)
            if G.number_of_nodes() < 2:
                G = seq_to_networkx(header, sequence, **options)
            G.graph['id'] = header
            G.graph['sequence'] = sequence
            G.graph['structure'] = seq_struct
            yield G
    else:
        G_global = nx.Graph()
        G_global.graph['id'] = header
        G_global.graph['info'] = 'RNAsubopt energy_range=%s max_num=%s' % (energy_range, max_num)
        G_global.graph['sequence'] = sequence
        for seq_struct in seq_struct_list:
            G = sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=seq_struct)
            G_global = nx.disjoint_union(G_global, G)
        if G_global.number_of_nodes() < 2:
            G_global = seq_to_networkx(header, sequence, **options)
        yield G_global
예제 #2
0
 def _seq_to_eden(self, header, sequence, struct, energy):
     graph = sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=struct)
     if graph.number_of_nodes() < 2:
         graph = seq_to_networkx(header, sequence)
     graph.graph['id'] = header
     graph.graph['info'] = 'muscle+RNAalifold energy=%.3f' % (energy)
     graph.graph['energy'] = energy
     graph.graph['sequence'] = sequence
     return graph
예제 #3
0
def rnafold_to_eden(iterable=None, **options):
    assert (is_iterable(iterable)), 'Not iterable'
    for header, seq in iterable:
        try:
            G = string_to_networkx(header, seq, **options)
        except Exception as e:
            print e.__doc__
            print e.message
            print 'Error in: %s' % seq
            G = seq_to_networkx(header, seq, **options)
        yield G
예제 #4
0
파일: rnafold.py 프로젝트: bgruening/EDeN
def rnafold_to_eden(iterable=None, **options):
    assert(is_iterable(iterable)), 'Not iterable'
    for header, seq in iterable:
        try:
            G = string_to_networkx(header, seq, **options)
        except Exception as e:
            print e.__doc__
            print e.message
            print 'Error in: %s' % seq
            G = seq_to_networkx(header, seq, **options)
        yield G
예제 #5
0
def rnashapes_struct_to_eden(iterable, **options):
    assert(is_iterable(iterable)), 'Not iterable'
    for header, seq in iterable:
        try:
            for G in string_to_networkx(header, seq, **options):
                yield G
        except Exception as e:
            print e.__doc__
            print e.message
            print 'Error in: %s %s' % (header, seq)
            G = seq_to_networkx(header, seq, **options)
            yield G
예제 #6
0
def rnasubopt_to_eden(iterable, **options):
    assert(is_iterable(iterable)), 'Not iterable'
    for header, seq, const in iterable:
        try:
            for G in string_to_networkx(header, seq, const, **options):
                yield G
        except Exception as e:
            print e.__doc__
            print e.message
            print 'Error in: %s' % seq
            G = seq_to_networkx(header, seq, **options)
            yield G
예제 #7
0
def rnashapes_struct_to_eden(iterable, **options):
    assert(is_iterable(iterable)), 'Not iterable'
    for header, seq in iterable:
        try:
            for G in string_to_networkx(header, seq, **options):
                yield G
        except Exception as e:
            print e.__doc__
            print e.message
            print 'Error in: %s %s' % (header, seq)
            graph = seq_to_networkx(header, seq, **options)
            yield graph
def rnasubopt_to_eden(iterable, **options):
    assert (is_iterable(iterable)), 'Not iterable'
    for header, seq in iterable:
        try:
            for graph in string_to_networkx(header, seq, **options):
                yield graph
        except Exception as e:
            print e.__doc__
            print e.message
            print 'Error in: %s' % seq
            graph = seq_to_networkx(header, seq, **options)
            yield graph
예제 #9
0
def string_to_networkx(header, sequence, **options):
    # defaults
    shape_type = options.get('shape_type', 5)
    energy_range = options.get('energy_range', 10)
    max_num = options.get('max_num', 3)
    split_components = options.get('split_components', False)
    seq_info, seq_struct_list, struct_list = rnashapes_wrapper(
        sequence,
        shape_type=shape_type,
        energy_range=energy_range,
        max_num=max_num,
        rnashapes_version=options.get('rnashapes_version', 2))
    if split_components:
        for seq_struct, struct in zip(seq_struct_list, struct_list):
            graph = sequence_dotbracket_to_graph(seq_info=seq_info,
                                                 seq_struct=seq_struct)
            graph.graph[
                'info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s' % (
                    shape_type, energy_range, max_num)
            graph.graph['id'] = header + '_' + struct
            if graph.number_of_nodes() < 2:
                graph = seq_to_networkx(header, sequence, **options)
                graph.graph['id'] = header
            graph.graph['sequence'] = sequence
            graph.graph['structure'] = seq_struct
            yield graph
    else:
        graph_global = nx.Graph()
        graph_global.graph['id'] = header
        graph_global.graph[
            'info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s' % (
                shape_type, energy_range, max_num)
        graph_global.graph['sequence'] = sequence
        for seq_struct in seq_struct_list:
            graph = sequence_dotbracket_to_graph(seq_info=seq_info,
                                                 seq_struct=seq_struct)
            graph_global = nx.disjoint_union(graph_global, graph)
        if graph_global.number_of_nodes() < 2:
            graph_global = seq_to_networkx(header, sequence, **options)
        yield graph_global
예제 #10
0
def rnaplfold_to_eden(iterable, **options):
    assert(is_iterable(iterable)), 'Not iterable'
    for header, seq in iterable:
        try:
            graph = string_to_networkx(header, seq, **options)
        except Exception as e:
            print
            print '-' * 80
            # print e.__doc__
            print e.message
            print 'Error in: %s %s' % (header, seq)
            print 'Reverting to path graph from sequence'
            graph = seq_to_networkx(header, seq, **options)
        yield graph
예제 #11
0
def rnaplfold_to_eden(iterable, **options):
    assert (is_iterable(iterable)), 'Not iterable'
    for header, seq in iterable:
        try:
            graph = string_to_networkx(header, seq, **options)
        except Exception as e:
            print
            print '-' * 80
            # print e.__doc__
            print e.message
            print 'Error in: %s %s' % (header, seq)
            print 'Reverting to path graph from sequence'
            graph = seq_to_networkx(header, seq, **options)
        yield graph
예제 #12
0
def string_to_networkx(header, sequence, **options):
    # defaults
    shape_type = options.get('shape_type', 5)
    energy_range = options.get('energy_range', 10)
    max_num = options.get('max_num', 3)
    split_components = options.get('split_components', False)
    seq_info, seq_struct_list, struct_list = rnashapes_wrapper(sequence,
                                                               shape_type=shape_type,
                                                               energy_range=energy_range,
                                                               max_num=max_num,
                                                               rnashapes_version=options.get('rnashapes_version', 2))
    if split_components:
        for seq_struct, struct in zip(seq_struct_list, struct_list):
            graph = sequence_dotbracket_to_graph(seq_info=seq_info, seq_struct=seq_struct)
            graph.graph['info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s' % (shape_type,
                                                                                          energy_range,
                                                                                          max_num)
            graph.graph['id'] = header + '_' + struct
            if graph.number_of_nodes() < 2:
                graph = seq_to_networkx(header, sequence, **options)
                graph.graph['id'] = header
            graph.graph['sequence'] = sequence
            graph.graph['structure'] = seq_struct
            yield graph
    else:
        graph_global = nx.Graph()
        graph_global.graph['id'] = header
        graph_global.graph['info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s' % (shape_type,
                                                                                             energy_range,
                                                                                             max_num)
        graph_global.graph['sequence'] = sequence
        for seq_struct in seq_struct_list:
            graph = sequence_dotbracket_to_graph(seq_info=seq_info, seq_struct=seq_struct)
            graph_global = nx.disjoint_union(graph_global, graph)
        if graph_global.number_of_nodes() < 2:
            graph_global = seq_to_networkx(header, sequence, **options)
        yield graph_global
def string_to_networkx(header, sequence, constraint, **options):
    # defaults
    energy_range = options.get('energy_range', 10)
    max_num = options.get('max_num', 3)
    max_num_subopts = options.get('max_num_subopts', 100)
    split_components = options.get('split_components', False)
    seq_struct_list, energy_list = rnasubopt_wrapper(
        sequence,
        constraint,
        energy_range=energy_range,
        max_num=max_num,
        max_num_subopts=max_num_subopts)
    if split_components:
        for seq_struct, energy in zip(seq_struct_list, energy_list):
            graph = sequence_dotbracket_to_graph(seq_info=sequence,
                                                 seq_struct=seq_struct)
            graph.graph['info'] = 'RNAsubopt energy=%s max_num=%s' % (energy,
                                                                      max_num)
            if graph.number_of_nodes() < 2:
                graph = seq_to_networkx(header, sequence, **options)
            graph.graph['id'] = header
            graph.graph['sequence'] = sequence
            graph.graph['structure'] = seq_struct
            yield graph
    else:
        graph_global = nx.Graph()
        graph_global.graph['id'] = header
        graph_global.graph['info'] = 'RNAsubopt energy_range=%s max_num=%s' % (
            energy_range, max_num)
        graph_global.graph['sequence'] = sequence
        for seq_struct in seq_struct_list:
            graph = sequence_dotbracket_to_graph(seq_info=sequence,
                                                 seq_struct=seq_struct)
            graph_global = nx.disjoint_union(graph_global, graph)
        if graph_global.number_of_nodes() < 2:
            graph_global = seq_to_networkx(header, sequence, **options)
        yield graph_global
def string_to_networkx(header, sequence, **options):
    # defaults
    shape_type = options.get('shape_type', 5)
    energy_range = options.get('energy_range', 10)
    max_num = options.get('max_num', 3)
    shape = options.get('shape', False)
    energy = options.get('energy', False)
    dotbracket = options.get('dotbracket', True)
    split_components = options.get('split_components', False)
    seq_info, seq_struct_list = rnashapes_wrapper(sequence, shape_type=shape_type, energy_range=energy_range, max_num=max_num)
    if split_components:
        for shape_str, energy_str, dotbracket_str in seq_struct_list:
            graph = nx.Graph()
            if shape:
                graph_shape = seq_to_networkx('', shape_str)
                graph = nx.disjoint_union(graph, graph_shape)
            if energy:
                graph_energy = seq_to_networkx('', energy_str)
                graph = nx.disjoint_union(graph, graph_energy)
            if dotbracket:
                graph_dotbracket = seq_to_networkx('', dotbracket_str)
                graph = nx.disjoint_union(graph, graph_dotbracket)
            graph.graph['id'] = header + '_' + shape_str
            graph.graph['info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s shape=%s energy=%s dotbracket=%s' % (
                shape_type, energy_range, max_num, shape, energy, dotbracket)
            graph.graph['sequence'] = sequence
            yield graph
    else:
        graph_global = nx.Graph()
        for shape_str, energy_str, dotbracket_str in seq_struct_list:
            graph = nx.Graph()
            if shape:
                graph_shape = seq_to_networkx('', shape_str)
                graph = nx.disjoint_union(graph, graph_shape)
            if energy:
                graph_energy = seq_to_networkx('', energy_str)
                graph = nx.disjoint_union(graph, graph_energy)
            if dotbracket:
                graph_dotbracket = seq_to_networkx('', dotbracket_str)
                graph = nx.disjoint_union(graph, graph_dotbracket)
            graph_global = nx.disjoint_union(graph_global, graph)
        graph_global.graph['id'] = header
        graph_global.graph['info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s shape=%s energy=%s dotbracket=%s' % (
            shape_type, energy_range, max_num, shape, energy, dotbracket)
        graph_global.graph['sequence'] = sequence
        yield graph_global
예제 #15
0
def rnafold_to_eden(iterable=None, **options):
    '''

    Parameters
    ----------
    iterable: over (header_string, sequence_string)

    options

    Returns
    -------
        nx.graph generator
    '''
    assert (is_iterable(iterable)), 'Not iterable'
    for header, seq in iterable:
        try:
            graph = string_to_networkx(header, seq, **options)
        except Exception as e:
            print e.__doc__
            print e.message
            print 'Error in: %s' % seq
            graph = seq_to_networkx(header, seq, **options)
        yield graph
예제 #16
0
def rnafold_to_eden(iterable=None, **options):
    '''

    Parameters
    ----------
    iterable: over (header_string, sequence_string)

    options

    Returns
    -------
        nx.graph generator
    '''
    assert (is_iterable(iterable)), 'Not iterable'
    for header, seq in iterable:
        try:
            graph = string_to_networkx(header, seq, **options)
        except Exception as e:
            print e.__doc__
            print e.message
            print 'Error in: %s' % seq
            graph = seq_to_networkx(header, seq, **options)
        yield graph
예제 #17
0
def rnashapes_to_eden(iterable, **options):
    """Transforms sequences to graphs that encode secondary structure information
    according to the RNAShapes algorithm.

    Parameters
    ----------
    sequences : iterable
        iterable pairs of header and sequence strings

    rnashapes_version : int (default 2)
        The version of RNAshapes that is in the path.
        2   e.g. RNAshapes version 2.1.6
        3   e.g. RNAshapes version 3.3.0

    shape_type : int (default 5)
        Is the level of abstraction or dissimilarity which defines a different shape.
        In general, helical regions are depicted by a pair of opening and closing brackets
        and unpaired regions are represented as a single underscore. The differences of the
        shape types are due to whether a structural element (bulge loop, internal loop, multiloop,
        hairpin loop, stacking region and external loop) contributes to the shape representation:
        Five types are implemented.
        1   Most accurate - all loops and all unpaired  [_[_[]]_[_[]_]]_
        2   Nesting pattern for all loop types and unpaired regions in external loop
        and multiloop [[_[]][_[]_]]
        3   Nesting pattern for all loop types but no unpaired regions [[[]][[]]]
        4   Helix nesting pattern in external loop and multiloop [[][[]]]
        5   Most abstract - helix nesting pattern and no unpaired regions [[][]]

    energy_range : float (default 10)
        Sets the energy range as percentage value of the minimum free energy.
        For example, when relative deviation is specified as 5.0, and the minimum free energy
        is -10.0 kcal/mol, the energy range is set to -9.5 to -10.0 kcal/mol.
        Relative deviation must be a positive floating point number; by default it is set to to 10 %.

    max_num : int (default 3)
        Is the maximum number of structures that are generated.

    split_components : bool (default False)
        If True each structure is yielded as an independent graph. Otherwise all structures
        are part of the same graph that has therefore several disconnectd components.

    example: transform a simple sequence using RNAshapes version 3+
        >>> graphs = rnashapes_to_eden([("ID", "CCCCCGGGGG")], rnashapes_version=3)
        >>> g = graphs.next()
        >>> # extract sequence from graph nodes
        >>> "".join([ value["label"] for (key, value) in g.nodes(data=True)])
        'CCCCCGGGGG'
        >>> # get vertice types
        >>> [(start, end, g.edge[start][end]["type"]) for start, end in g.edges()]
        [(0, 8, 'basepair'), (0, 1, 'backbone'), (1, 2, 'backbone'), (1, 7, 'basepair'), (2, 3, 'backbone'), (2, 6, 'basepair'), (3, 4, 'backbone'), (4, 5, 'backbone'), (5, 6, 'backbone'), (6, 7, 'backbone'), (7, 8, 'backbone'), (8, 9, 'backbone')]

    example: transform a simple sequence using RNAshapes version 3+, splitting components
        >>> graphs = rnashapes_to_eden([("ID", "CCCCCGGGGG")], split_components=True, rnashapes_version=3)
        >>> g = graphs.next()
        >>> # extract sequence from graph nodes
        >>> "".join([ value["label"] for (key, value) in g.nodes(data=True)])
        'CCCCCGGGGG'
        >>> # get dotbracket structure annotation
        >>> g.graph["structure"]
        '(((...))).'
        >>> # get vertice types
        >>> [ (start, end, g.edge[start][end]["type"]) for start, end in g.edges()]
        [(0, 8, 'basepair'), (0, 1, 'backbone'), (1, 2, 'backbone'), (1, 7, 'basepair'), (2, 3, 'backbone'), (2, 6, 'basepair'), (3, 4, 'backbone'), (4, 5, 'backbone'), (5, 6, 'backbone'), (6, 7, 'backbone'), (7, 8, 'backbone'), (8, 9, 'backbone')]

    test max_num parameter with RNAshapes version 3+
        >>> seq = "CGUCGUCGCAUCGUACGCAUGACUCAGCAUCAGACUACGUACGCAUACGUCAGCAUCAGUCAGCAUCAGCAUGCAUCACUAGCAUGCACCCCCGGGGGCACAUCGUACGUACGCUCAGUACACUGCAUGACUACGU"
        >>> graphs = rnashapes_to_eden([("ID", seq)], split_components=True, max_num=2, rnashapes_version=3)
        >>> g = graphs.next()
        >>> # get dotbracket structure annotations
        >>> len([g.graph["structure"] for g in graphs])
        2
    """

    assert(is_iterable(iterable)), 'Not iterable'
    for header, seq in iterable:
        try:
            for graph in string_to_networkx(header, seq, **options):
                yield graph
        except Exception as e:
            print e.__doc__
            print e.message
            print 'Error in: %s' % seq
            graph = seq_to_networkx(header, seq, **options)
            yield graph
예제 #18
0
def rnashapes_to_eden(iterable, **options):
    """Transforms sequences to graphs that encode secondary structure information
    according to the RNAShapes algorithm.

    Parameters
    ----------
    sequences : iterable
        iterable pairs of header and sequence strings

    rnashapes_version : int (default 2)
        The version of RNAshapes that is in the path.
        2   e.g. RNAshapes version 2.1.6
        3   e.g. RNAshapes version 3.3.0

    shape_type : int (default 5)
        Is the level of abstraction or dissimilarity which defines a different shape.
        In general, helical regions are depicted by a pair of opening and closing brackets
        and unpaired regions are represented as a single underscore. The differences of the
        shape types are due to whether a structural element (bulge loop, internal loop, multiloop,
        hairpin loop, stacking region and external loop) contributes to the shape representation:
        Five types are implemented.
        1   Most accurate - all loops and all unpaired  [_[_[]]_[_[]_]]_
        2   Nesting pattern for all loop types and unpaired regions in external loop
        and multiloop [[_[]][_[]_]]
        3   Nesting pattern for all loop types but no unpaired regions [[[]][[]]]
        4   Helix nesting pattern in external loop and multiloop [[][[]]]
        5   Most abstract - helix nesting pattern and no unpaired regions [[][]]

    energy_range : float (default 10)
        Sets the energy range as percentage value of the minimum free energy.
        For example, when relative deviation is specified as 5.0, and the minimum free energy
        is -10.0 kcal/mol, the energy range is set to -9.5 to -10.0 kcal/mol.
        Relative deviation must be a positive floating point number; by default it is set to to 10 %.

    max_num : int (default 3)
        Is the maximum number of structures that are generated.

    split_components : bool (default False)
        If True each structure is yielded as an independent graph. Otherwise all structures
        are part of the same graph that has therefore several disconnectd components.

    example: transform a simple sequence using RNAshapes version 3+
        >>> graphs = rnashapes_to_eden([("ID", "CCCCCGGGGG")], rnashapes_version=3)
        >>> g = graphs.next()
        >>> # extract sequence from graph nodes
        >>> "".join([ value["label"] for (key, value) in g.nodes(data=True)])
        'CCCCCGGGGG'
        >>> # get vertice types
        >>> [(start, end, g.edge[start][end]["type"]) for start, end in g.edges()]
        [(0, 8, 'basepair'), (0, 1, 'backbone'), (1, 2, 'backbone'), (1, 7, 'basepair'), (2, 3, 'backbone'), (2, 6, 'basepair'), (3, 4, 'backbone'), (4, 5, 'backbone'), (5, 6, 'backbone'), (6, 7, 'backbone'), (7, 8, 'backbone'), (8, 9, 'backbone')]

    example: transform a simple sequence using RNAshapes version 3+, splitting components
        >>> graphs = rnashapes_to_eden([("ID", "CCCCCGGGGG")], split_components=True, rnashapes_version=3)
        >>> g = graphs.next()
        >>> # extract sequence from graph nodes
        >>> "".join([ value["label"] for (key, value) in g.nodes(data=True)])
        'CCCCCGGGGG'
        >>> # get dotbracket structure annotation
        >>> g.graph["structure"]
        '(((...))).'
        >>> # get vertice types
        >>> [ (start, end, g.edge[start][end]["type"]) for start, end in g.edges()]
        [(0, 8, 'basepair'), (0, 1, 'backbone'), (1, 2, 'backbone'), (1, 7, 'basepair'), (2, 3, 'backbone'), (2, 6, 'basepair'), (3, 4, 'backbone'), (4, 5, 'backbone'), (5, 6, 'backbone'), (6, 7, 'backbone'), (7, 8, 'backbone'), (8, 9, 'backbone')]

    test max_num parameter with RNAshapes version 3+
        >>> seq = "CGUCGUCGCAUCGUACGCAUGACUCAGCAUCAGACUACGUACGCAUACGUCAGCAUCAGUCAGCAUCAGCAUGCAUCACUAGCAUGCACCCCCGGGGGCACAUCGUACGUACGCUCAGUACACUGCAUGACUACGU"
        >>> graphs = rnashapes_to_eden([("ID", seq)], split_components=True, max_num=2, rnashapes_version=3)
        >>> g = graphs.next()
        >>> # get dotbracket structure annotations
        >>> len([g.graph["structure"] for g in graphs])
        2
    """

    assert (is_iterable(iterable)), 'Not iterable'
    for header, seq in iterable:
        try:
            for graph in string_to_networkx(header, seq, **options):
                yield graph
        except Exception as e:
            print e.__doc__
            print e.message
            print 'Error in: %s' % seq
            graph = seq_to_networkx(header, seq, **options)
            yield graph