예제 #1
0
    def fit(self, inputs, vectorizer):
        """

        Parameters
        ----------
        inputs: sequence list
        vectorizer: a vectorizer

        Returns
        -------
        self
        """

        self.vectorizer = vectorizer
        self.NNmodel = EdenNNF(n_neighbors=4)
        self.NNmodel.fit(inputs)
        return self
예제 #2
0
class GraphTransformerForgi(GraphTransformer):
    def __init__(self):


        '''
        Parameters
        ----------
        base_thickness_list: list of int
            thickness list for the base graph
        structure_mod : bool
            should we introduce "F" nodes to keep multiloop flexible regarding substitution
        include_base : base
            if asked for all cips, i will also yield   "normal" cips (whose core is not radius of abstract, but radius of base graph)
        ignore_inserts:
            bolges will be ignored and merged to their adjacend stems
        Returns
        -------

        '''


    def fit(self, inputs, vectorizer):
        """

        Parameters
        ----------
        inputs: sequence list
        vectorizer: a vectorizer

        Returns
        -------
        self
        """

        self.vectorizer = vectorizer
        self.NNmodel = EdenNNF(n_neighbors=4)
        self.NNmodel.fit(inputs)
        return self

    def fit_transform(self, inputs):
        """

        Parameters
        ----------
        inputs: sequences

        Returns
        -------
        many graphdecomposers
        """

        inputs = list(inputs)
        self.fit(inputs, self.vectorizer)
        inputs = [b for a, b in inputs]
        return self.transform(inputs)

    def re_transform_single(self, graph):
        """

        Parameters
        ----------
        graph: digraph

        Returns
        -------
        graph decomposer
        """

        try:
            sequence = get_sequence(graph)
        except:
            logger.debug('sequenceproblem: this is not an rna')
            # draw.graphlearn(graph, size=20)
            return None

        sequence = sequence.replace("F", '')
        trans = self.transform([sequence])[0]
        # if trans._base_graph.graph['energy'] > -10:
        #    return None
        return trans

    def abstract_graph(self):
        '''
        we need to make an abstraction Ooo
        '''

        # create the abstract graph and populate the contracted set
        abstract_graph = forgi.get_abstr_graph(self.structure, ignore_inserts=self.ignore_inserts)
        abstract_graph = self.vectorizer._edge_to_vertex_transform(abstract_graph)
        completed_abstract_graph = forgi.edge_parent_finder(abstract_graph, self._base_graph)

        # eden is forcing us to set a label and a contracted attribute.. lets do this
        for n, d in completed_abstract_graph.nodes(data=True):
            if 'edge' in d:
                d['label'] = 'e'
        # in the abstract graph , all the edge nodes need to have a contracted attribute.
        # originaly this happens naturally but since we make multiloops into one loop
        # there are some left out
        for n, d in completed_abstract_graph.nodes(data=True):
            if 'contracted' not in d:
                d['contracted'] = set()
        return completed_abstract_graph


    def transform(self, sequences):
        """

        Parameters
        ----------
        sequences : iterable over rna sequences

        Returns
        -------
        list of RnaGraphWrappers
        """
        result = []
        for sequence in sequences:

            # if we eat a tupple, it musst be a (name, sequence) type :)  we only want a sequence
            if type(sequence) == type(()):
                logger.warning('YOUR INPUT IS A TUPPLE, GIVE ME A SEQUENCE, SINCERELY -- YOUR RNA PREPROCESSOR')

            # get structure
            structure, energy, sequence = self.NNmodel.transform_single(('fake', sequence))
            # FIXING STRUCTURE
            structure, sequence = fix_structure(structure, sequence)
            if structure == None:
                result.append(None)
                continue


            # built base_graph
            base_graph = converter.sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=structure)
            base_graph = self.vectorizer._edge_to_vertex_transform(base_graph)
            base_graph = expanded_rna_graph_to_digraph(base_graph)
            base_graph.graph['energy'] = energy
            base_graph.graph['sequence'] = sequence
            base_graph.graph['structure'] = structure
            result.append(
                   (sequence, structure, base_graph, self.abstract_graph(base_graph))
            )

        return result