Ejemplo n.º 1
0
    def construct_coder(self,method):
#         
        if method == "n1":
            D = self.dictionary_builder()
            self.coder_n1 = SparseCoder1(dictionary=D, 
                            transform_alpha=self.para, transform_n_nonzero_coefs=3,transform_algorithm=self.dictionary_loss)
        elif method =="n2":
            D = self.dictionary_builder()
            self.coder_n2 = SparseCoder(dictionary=D, 
                            transform_alpha=self.para, transform_n_nonzero_coefs=3,transform_algorithm=self.dictionary_loss)
        elif method =="l13":    
            D = self.dictionary_learning_1()
            self.coder_l1 = SparseCoder1(dictionary=D, 
                            transform_alpha=self.para, transform_n_nonzero_coefs=3,transform_algorithm=self.dictionary_loss)
        elif method == "l24":
            D = self.dictionary_learning_2()
            self.coder_l2 = SparseCoder1(dictionary=D, 
                            transform_alpha=self.para, transform_n_nonzero_coefs=3,transform_algorithm=self.dictionary_loss)
        elif method == "l57":
            D = self.dictionary_learning_1()
            self.coder_l5 = SparseCoder(dictionary=D, 
                            transform_alpha=self.para, transform_n_nonzero_coefs=3,transform_algorithm=self.dictionary_loss)
        elif method == "l68":
            D = self.dictionary_learning_2()
            self.coder_l6 = SparseCoder(dictionary=D, 
                            transform_alpha=self.para, transform_n_nonzero_coefs=3,transform_algorithm=self.dictionary_loss)
Ejemplo n.º 2
0
class Dictionary_spam():
    # subtopic original representation as TF/IDF vectors
    # dictionary size equal to the length of the subtopic clusters number of the original query 
 
    
    def __init__(self,num_subtopic,subtopic_candidates,original_A1):
        self.dictionary_size = num_subtopic
        self.subtopic_candidates = subtopic_candidates
        self.dictionary_A = original_A1
        self.coder = None

    def get_dictionary_size(self):
        return self.dictionary_size


    def get_subtopic_candidates(self):
        return self.subtopic_candidates


    def get_dictionary_a(self):
        return self.dictionary_A


    def set_dictionary_size(self, value):
        self.dictionary_size = value


    def set_subtopic_candidates(self, value):
        self.subtopic_candidates = value


    def set_dictionary_a(self, value):
        self.dictionary_A = value
        
    def get_coder(self):
        if self.coder == None:
            self.construct_coder(1,4,"lars")
        return self.coder
    def construct_coder(self,alpha = 0.5,n_nonzero = 4,algo = "lars"):
        D = self.dictionary_builder()
        self.coder = SparseCoder(dictionary=D, transform_n_nonzero_coefs=3,
                            transform_alpha=alpha, transform_algorithm="threshold")
        

        
        
    def dictionary_learning(self):
        """
            use the subtopics as training data learning a dictionary.
        """
        print" dict "
        
    def dictionary_builder(self):
        D = np.empty((len(self.dictionary_A), len(self.dictionary_A[0])))
        for i, vec in enumerate(self.dictionary_A):
            D[i] = vec 
        D /= np.sqrt(np.sum(D ** 2, axis=1))[:, np.newaxis]
       
        return D
    
    
    def document_sparse_representation(self,document):
        vec = document.get_tfidf_vec()
        if self.coder == None:
            self.construct_coder()
        sparse_coding = self.coder.transform(vec)
        return sparse_coding
    
    def subtopic_sparse_representation(self,subtopic):
        vec = subtopic.get_query_vec_tfidf()
        if self.coder == None:
            self.construct_coder()
            
        sparse_coding = self.coder.transform(vec)
        return sparse_coding
Ejemplo n.º 3
0
 def construct_coder(self,alpha = 0.5,n_nonzero = 4,algo = "lars"):
     D = self.dictionary_builder()
     self.coder = SparseCoder(dictionary=D, transform_n_nonzero_coefs=3,
                         transform_alpha=alpha, transform_algorithm="threshold")
Ejemplo n.º 4
0
class Dictionary():
    # subtopic original representation as TF/IDF vectors
    # dictionary size equal to the length of the subtopic clusters number of the original query 
 
    
    def __init__(self,num_subtopic,train_subtopics,train_subtopics_code,original_A1,dictionary_loss,para_1):
        self.dictionary_size = num_subtopic
        self.train_subtopics = train_subtopics
        self.dictionary_A = original_A1
        self.dictionary_loss = dictionary_loss
        self.coder_n1 = None
        self.coder_n2 = None
        self.coder_l1 = None
        self.coder_l2 = None
        self.coder_l6 = None
        self.coder_l5 = None
        self.para = para_1
        self.train_subtopics_code = train_subtopics_code

    def get_dictionary_size(self):
        return self.dictionary_size


    def get_subtopic_candidates(self):
        return self.subtopic_candidates


    def get_dictionary_a(self):
        return self.dictionary_A


    def set_dictionary_size(self, value):
        self.dictionary_size = value


    def set_subtopic_candidates(self, value):
        self.subtopic_candidates = value


    def set_dictionary_a(self, value):
        self.dictionary_A = value
        
    def get_coder(self):
        if self.coder == None: 
            self.construct_coder(self.para,4,self.dictionary_loss)
        return self.coder
    def construct_coder(self,method):
#         
        if method == "n1":
            D = self.dictionary_builder()
            self.coder_n1 = SparseCoder1(dictionary=D, 
                            transform_alpha=self.para, transform_n_nonzero_coefs=3,transform_algorithm=self.dictionary_loss)
        elif method =="n2":
            D = self.dictionary_builder()
            self.coder_n2 = SparseCoder(dictionary=D, 
                            transform_alpha=self.para, transform_n_nonzero_coefs=3,transform_algorithm=self.dictionary_loss)
        elif method =="l13":    
            D = self.dictionary_learning_1()
            self.coder_l1 = SparseCoder1(dictionary=D, 
                            transform_alpha=self.para, transform_n_nonzero_coefs=3,transform_algorithm=self.dictionary_loss)
        elif method == "l24":
            D = self.dictionary_learning_2()
            self.coder_l2 = SparseCoder1(dictionary=D, 
                            transform_alpha=self.para, transform_n_nonzero_coefs=3,transform_algorithm=self.dictionary_loss)
        elif method == "l57":
            D = self.dictionary_learning_1()
            self.coder_l5 = SparseCoder(dictionary=D, 
                            transform_alpha=self.para, transform_n_nonzero_coefs=3,transform_algorithm=self.dictionary_loss)
        elif method == "l68":
            D = self.dictionary_learning_2()
            self.coder_l6 = SparseCoder(dictionary=D, 
                            transform_alpha=self.para, transform_n_nonzero_coefs=3,transform_algorithm=self.dictionary_loss)
            
            
        
       
        

        
        
    def dictionary_learning_2(self):
        """
            use the subtopics as training data learning a dictionary.
        """
        self.dictionary_builder()
        V, U, E = dict_learning2(self.train_subtopics, self.dictionary_size, 1,
                                tol=1e-10, max_iter=1000,
                                method='cd',
                                n_jobs=1,
                                code_init=self.train_subtopics_code,
                                dict_init=self.dictionary_A,
                                verbose=False,
                                random_state=None)
        return U
    def dictionary_learning_1(self):
        """
            use the subtopics as training data learning a dictionary.
        """
        self.dictionary_builder()
        V, U, E = dict_learning1(self.train_subtopics, self.dictionary_size, 1,
                                tol=1e-10, max_iter=1000,
                                method='cd',
                                n_jobs=1,
                                code_init=None,
                                dict_init=None,
                                verbose=False,
                                random_state=None)
        return U
        
    def dictionary_builder(self):

        D = np.empty((len(self.dictionary_A), len(self.dictionary_A[0])))
        for i, vec in enumerate(self.dictionary_A):
            D[i] = vec 
#         D /= np.sqrt(np.sum(D ** 2, axis=1))[:, np.newaxis]
        self.dictionary_A =D
        return D
    
    # without dictionary learning and sparse with CNU Sparse Coding
    def document_sparse_representation_n1(self,document):
        vec = document.get_tfidf_vec()
        if self.coder_n1 == None:
            self.construct_coder("n1")
        sparse_coding = self.coder_n1.transform(vec)
        return sparse_coding
    # without dictionary learning and sparse with sklearn Sparse Coding
    def document_sparse_representation_n2(self,document):
        vec = document.get_tfidf_vec()
        if self.coder_n2 == None:
            self.construct_coder("n2")
        sparse_coding = self.coder_n2.transform(vec)
        return sparse_coding
    #dictionary learning with CNU Learning1 and sparse coding with CNU
    def document_sparse_representation_l13(self,document):
        vec = document.get_tfidf_vec()
        if self.coder_l1 == None:
            self.construct_coder("l13")
        sparse_coding = self.coder_l1.transform(vec)
        return sparse_coding
    #dictionary learning with CNU Learning2 and sparse coding with CNU
    def document_sparse_representation_l24(self,document):
        vec = document.get_tfidf_vec()
        if self.coder_l2 == None:
            self.construct_coder("l24")
        sparse_coding = self.coder_l2.transform(vec)
        return sparse_coding
    #dictionary learning with CNU Learning1 and sparse coding with sk learn
    def document_sparse_representation_l57(self,document):
        vec = document.get_tfidf_vec()
        if self.coder_l5 == None:
            self.construct_coder("l57")
        sparse_coding = self.coder_l5.transform(vec)
        return sparse_coding
    #dictionary learning with CNU Learning2 and sparse coding with sk learn
    def document_sparse_representation_l68(self,document):
        vec = document.get_tfidf_vec()
        if self.coder_l6 == None:
            self.construct_coder("l68")
        sparse_coding = self.coder_l6.transform(vec)
        return sparse_coding
    
    def subtopic_sparse_representation(self,subtopic):
        vec = subtopic.get_query_vec_tfidf()
        if self.coder == None:
            self.construct_coder()
            
        sparse_coding = self.coder.transform(vec)
        return sparse_coding