Ejemplo n.º 1
0
 def regeression_sampler(self, test_id, batch, le):
     pthway = Data()
     pthway.test_id = test_id
     pthway.operate_id = np.setdiff1d(batch, test_id)
     pthway.pthway_NameList = self.data.pthway_NameList.iloc[pthway.operate_id,:]
     if (self.data.pthway_NameList.iloc[pthway.test_id,:]['GenomeType'].values != 'protein'):
         raise IndexError('The test element is not the protein type. \t')
     
     if (np.all(pthway.pthway_NameList['GenomeType'].values == 'protein') is False):
         raise IndexError('Some of Related Genome ar not protein type.\t')
     
     # The pathway name list that exclude the target protein
     pthway.Genome_NameList = pthway.pthway_NameList['GenomeName'].values
     # Only the name of target protein
     pthway.Test_NameList = self.data.pthway_NameList.iloc[pthway.test_id,:]['GenomeName'].values
     
     # The overall trained dataset
     activ_id = le.transform(pthway.Genome_NameList)
     pthway.activ_free = self.data.activ_free[activ_id]
     pthway.activ_cancer = self.data.activ_cancer[activ_id]
     
     # the target value
     activ_id_test = le.transform(pthway.Test_NameList)
     pthway.activ_free_target = self.data.activ_free[activ_id_test]
     pthway.activ_cancer_target = self.data.activ_cancer[activ_id_test]
     
     return pthway
Ejemplo n.º 2
0
    def sampler_generater(self, batch, le):
        """
        This function passes batch index number to obtained trained object
        """
        deep_pthway = Data()
        newpthway_Namelist = self.data.pthway_NameList.iloc[batch,:].reset_index(drop=True)
        deep_pthway.genome_Namelist = newpthway_Namelist[newpthway_Namelist['GenomeType'] == 'protein']['GenomeName'].values
        activ_id = le.transform(deep_pthway.genome_Namelist)
        deep_pthway.activ_free = self.data.activ_free[activ_id]
        deep_pthway.activ_cancer = self.data.activ_cancer[activ_id]

        deep_pthway.pth_Namelist = newpthway_Namelist
        Edgelist = self.data.Edgelist
        Namelist_l = list(newpthway_Namelist['GenomeName'].values)
        Edgelist_l = list(Edgelist.iloc[:,0].values)
        Edgelist_ll = list(Edgelist.iloc[:,1].values)
        exclude_list = []
        for idx, (elem, elem2) in enumerate(zip(Edgelist_l, Edgelist_ll)):
            if ((elem not in Namelist_l) or (elem2 not in Namelist_l)):
                exclude_list.append(idx)

        newpthway_Edgelist = Edgelist.drop(exclude_list).reset_index(drop=True)
        deep_pthway.Edgelist = newpthway_Edgelist

        le2 = LabelEncoder()
        le2.fit(deep_pthway.pth_Namelist['GenomeName'].values)
        deep_pthway.edge_index = le2.transform(deep_pthway.Edgelist.iloc[:,:2].values.reshape(-1)).reshape(-1,2)
        deep_pthway.all_elem_className = list(le2.classes_)

        # Label edge_class
        le2 = LabelEncoder()
        le2.fit(deep_pthway.Edgelist['edgeType'])
        deep_pthway.edge_class = le2.transform(deep_pthway.Edgelist['edgeType'])
        deep_pthway.edge_className = list(le2.classes_)

        # Label node class
        le2 = LabelEncoder()
        le2.fit(deep_pthway.pth_Namelist['GenomeType'])
        deep_pthway.node_class = le2.transform(deep_pthway.pth_Namelist['GenomeType'])
        deep_pthway.node_className = list(le2.classes_) 
        
        return deep_pthway