def _fit_predictive_model(self, seqs, neg_seqs=None):
     # duplicate iterator
     pos_seqs, pos_seqs_ = tee(seqs)
     pos_graphs = mp_pre_process(pos_seqs,
                                 pre_processor=sequence_to_eden,
                                 n_blocks=self.pre_processor_n_blocks,
                                 block_size=self.pre_processor_block_size,
                                 n_jobs=self.pre_processor_n_jobs)
     if neg_seqs is None:
         # shuffle seqs to obtain negatives
         neg_seqs = seq_to_seq(pos_seqs_,
                               modifier=shuffle_modifier,
                               times=self.negative_ratio,
                               order=self.shuffle_order)
     neg_graphs = mp_pre_process(neg_seqs,
                                 pre_processor=sequence_to_eden,
                                 n_blocks=self.pre_processor_n_blocks,
                                 block_size=self.pre_processor_block_size,
                                 n_jobs=self.pre_processor_n_jobs)
     # fit discriminative estimator
     self.estimator = fit(pos_graphs,
                          neg_graphs,
                          vectorizer=self.vectorizer,
                          n_iter_search=self.n_iter_search,
                          n_jobs=self.n_jobs,
                          n_blocks=self.n_blocks,
                          block_size=self.block_size,
                          random_state=self.random_state)
Beispiel #2
0
 def _fit_predictive_model(self, seqs, neg_seqs=None):
     # duplicate iterator
     pos_seqs, pos_seqs_ = tee(seqs)
     pos_graphs = mp_pre_process(pos_seqs, pre_processor=sequence_to_eden,
                                 n_blocks=self.pre_processor_n_blocks,
                                 block_size=self.pre_processor_block_size,
                                 n_jobs=self.pre_processor_n_jobs)
     if neg_seqs is None:
         # shuffle seqs to obtain negatives
         neg_seqs = seq_to_seq(pos_seqs_,
                               modifier=shuffle_modifier,
                               times=self.negative_ratio,
                               order=self.shuffle_order)
     neg_graphs = mp_pre_process(neg_seqs, pre_processor=sequence_to_eden,
                                 n_blocks=self.pre_processor_n_blocks,
                                 block_size=self.pre_processor_block_size,
                                 n_jobs=self.pre_processor_n_jobs)
     # fit discriminative estimator
     self.estimator = fit(pos_graphs, neg_graphs,
                          vectorizer=self.vectorizer,
                          n_iter_search=self.n_iter_search,
                          n_jobs=self.n_jobs,
                          n_blocks=self.n_blocks,
                          block_size=self.block_size,
                          random_state=self.random_state)
Beispiel #3
0
 def annotate(self, iterable):
     assert(is_iterable(iterable)), 'Not iterable'
     graphs = mp_pre_process(iterable,
                             pre_processor=self.pre_processor,
                             pre_processor_args=self.pre_processor_args,
                             n_blocks=self.pre_processor_n_blocks,
                             block_size=self.pre_processor_block_size,
                             n_jobs=self.pre_processor_n_jobs)
     return self.vectorizer.annotate(graphs, self.estimator)
Beispiel #4
0
 def annotate(self, iterable):
     assert (is_iterable(iterable)), 'Not iterable'
     graphs = mp_pre_process(iterable,
                             pre_processor=self.pre_processor,
                             pre_processor_args=self.pre_processor_args,
                             n_blocks=self.pre_processor_n_blocks,
                             block_size=self.pre_processor_block_size,
                             n_jobs=self.pre_processor_n_jobs)
     return self.vectorizer.annotate(graphs, self.estimator)
Beispiel #5
0
 def _data_matrix(self, iterable, fit_vectorizer=False):
     assert(is_iterable(iterable)), 'Not iterable'
     graphs = mp_pre_process(iterable,
                             pre_processor=self.pre_processor,
                             pre_processor_args=self.pre_processor_args,
                             n_blocks=self.pre_processor_n_blocks,
                             block_size=self.pre_processor_block_size,
                             n_jobs=self.pre_processor_n_jobs)
     graphs, graphs_ = tee(graphs)
     self.vectorizer.set_params(**self.vectorizer_args)
     if fit_vectorizer:
         self.vectorizer.fit(graphs_)
     X = vectorize(graphs, vectorizer=self.vectorizer, n_jobs=self.n_jobs, n_blocks=self.n_blocks)
     return X
    def seq_to_data_matrix(self, sequences=None):               
                
        # Transform sequences to matrix
        graphs = mp_pre_process(sequences, pre_processor=self.pre_processor, pre_processor_args={}, n_jobs=-1)	        
                      
        seq_data_matrix = vectorize(graphs, vectorizer=self.vectorizer, n_jobs=-1)                

        # Densify the matrix
        seq_data_matrx = seq_data_matrix.toarray()

        # Standardize the matrix
        self.scale.fit(seq_data_matrx)
        std_seq_data_matrx = self.scale.transform(seq_data_matrx)        

        return std_seq_data_matrx