예제 #1
0
 def test(self, ds, qa_pair):
     #reload best weights
     self.model.load_weights(self.best_params)
     #Construct Test dataset
     test_X1 = []
     test_X2 = []
     test_X3 = []
     test_Y = []
     for qa in ds.build_qa_pairs(qa_pair):
         q_vect = np.array(
             self.w2vutil.transform2Word2Vect(
                 nlp_utils.data_preprocess(qa.q, self.prep_step)))
         q_vect = np.pad(q_vect,
                         ((0, self.timesteps - q_vect.shape[0]), (0, 0)),
                         mode='constant')
         a_vect = np.array(
             self.w2vutil.transform2Word2Vect(
                 nlp_utils.data_preprocess(qa.a, self.prep_step)))
         a_vect = np.pad(a_vect,
                         ((0, self.timesteps - a_vect.shape[0]), (0, 0)),
                         mode='constant')
         test_X1.append(q_vect)
         test_X2.append(a_vect)
         test_X3.append(self.proc_third_input(qa.q, qa.a))
         test_Y.append(qa.l)
     test_X1 = np.array(test_X1)
     test_X2 = np.array(test_X2)
     test_X3 = np.array(test_X3)
     test_Y = np.array(test_Y)
     #loss, acc = model.evaluate([test_X1, test_X2, test_X3], test_Y, 50)
     #loss, acc = model.evaluate(test_X, test_Y, batch_size, show_accuracy=True)
     #print('Test loss / test accuracy = {:.4f} / {:.4f}'.format(loss, acc))
     predictions = self.model.predict([test_X1, test_X2, test_X3])
     return predictions
예제 #2
0
 def buildCosineSimMatrix(self, questions_answer_pairs, max_terms=20):
     #Construct Question Answer Matrix Pairs
     x = []
     y = []
     for pair in questions_answer_pairs:
         q_vect = self.w2vutil.transform2Word2Vect(
             nlp_utils.data_preprocess(pair.q, self.prep_step))
         #print q
         cos_matrix = []
         for i, q_i in enumerate(q_vect):
             if i == max_terms:
                 break
             sim_qi_a = []
             #print q_i
             #a = nlp_utils.data_preprocess(pair.a,self.prep_step)
             a_vect = self.w2vutil.transform2Word2Vect(
                 nlp_utils.data_preprocess(pair.a, self.prep_step))
             #print a
             for k, a_k in enumerate(a_vect):
                 if k == max_terms:
                     break
                 #print a_k
                 sim_qi_a += [spatial.distance.cosine(q_i, a_k)]
             cos_matrix += [sim_qi_a]
         cos_matrix = np.array(cos_matrix)
         shape_cos_matrix = cos_matrix.shape
         cos_matrix = np.pad(cos_matrix,
                             ((0, max_terms - shape_cos_matrix[0]),
                              (0, max_terms - shape_cos_matrix[1])),
                             mode='constant')
         if np.isnan(cos_matrix).any():
             print 'ERROR IS NAN: ', pair
         x.append(np.expand_dims(cos_matrix, 0))
         y.append(pair.l)
     return x, y
예제 #3
0
 def generateXYBatches(self, qadata, dataset, samples, prep_step, proc_func, positive_rate=0.5):
     samples = qadata.get_random_samples(dataset, samples, positive_rate)
     while 1:
         train_X1 = []
         train_X2 = []
         train_X3 = []
         train_Y = []
         for qa_pair in samples:
             
             q_vect = self.w2vutil.transform2Word2Vect(nlp_utils.data_preprocess(qa_pair.q,prep_step))
             if len(q_vect)==0:
                 q_vect = q_vect + [np.zeros(300)]
             q_vect = np.array(q_vect)
             q_vect = np.pad(q_vect, ((0,self.timesteps-q_vect.shape[0]),(0,0)), mode='constant')
             
             a_vect = self.w2vutil.transform2Word2Vect(nlp_utils.data_preprocess(qa_pair.a,prep_step))
             if len(a_vect)==0:
                 a_vect = a_vect + [np.zeros(300)]
             a_vect = np.array(a_vect)
             a_vect = np.pad(a_vect, ((0,self.timesteps-a_vect.shape[0]),(0,0)), mode='constant')
             
             train_X1.append(q_vect)
             train_X2.append(a_vect)
             train_X3.append(proc_func(qa_pair.q, qa_pair.a))
             train_Y.append( np.array(qa_pair.l) )
         #print "   Samples generated = ", len(train_X1), '  -  Validation(',validation, ')'
         #yield (train_X, train_Y)
         yield ([np.array(train_X1), np.array(train_X2), np.array(train_X3)], np.array(train_Y))
예제 #4
0
 def proc_third_input(self, q, a):
     q_w2v = self.w2vutil.transform2Word2Vect(nlp_utils.data_preprocess(q,self.prep_step)) \
             if len(self.w2vutil.transform2Word2Vect(nlp_utils.data_preprocess(q,self.prep_step)))>0 else [np.zeros(300)]
     a_w2v = self.w2vutil.transform2Word2Vect(nlp_utils.data_preprocess(a,self.prep_step)) \
             if len(self.w2vutil.transform2Word2Vect(nlp_utils.data_preprocess(a,self.prep_step)))>0 else [np.zeros(300)]
     #res = np.concatenate((sum(np.array(q_w2v)), sum(np.array(a_w2v)) ))
     res = spatial.distance.cosine(sum(np.array(q_w2v)),
                                   sum(np.array(a_w2v)))
     return res
예제 #5
0
 def proc_third_input(self, q, a):
     q_w2v = self.w2vutil.transform2Word2Vect(nlp_utils.data_preprocess(q,self.prep_step)) \
             if len(self.w2vutil.transform2Word2Vect(nlp_utils.data_preprocess(q,self.prep_step)))>0 else [np.zeros(300)]
     a_w2v = self.w2vutil.transform2Word2Vect(nlp_utils.data_preprocess(a,self.prep_step)) \
             if len(self.w2vutil.transform2Word2Vect(nlp_utils.data_preprocess(a,self.prep_step)))>0 else [np.zeros(300)]
     try:
         res = np.concatenate((sum(np.array(q_w2v)), sum(np.array(a_w2v))))
         return res
     except Exception, e:
         print e
예제 #6
0
    def buildCosineSimMatrix(self,
                             questions_answer_pairs,
                             ordered_matrix=1,
                             salience_weight=0,
                             max_terms=40):
        #Construct Question Answer Matrix Pairs
        x = []
        y = []
        for pair in questions_answer_pairs:
            #Question Processin
            q_list = nlp_utils.data_preprocess(pair.q, self.prep_step)
            q_vect = self.w2vutil.transform2Word2Vect(q_list)
            #Answer processing
            a_list = nlp_utils.data_preprocess(pair.a, self.prep_step)
            a_vect = self.w2vutil.transform2Word2Vect(a_list)
            #Get cosine distance
            #distance = np.absolute( spatial.distance.cdist(q_vect[0:max_terms], a_vect[0:max_terms], 'cosine') )
            ''' with that param the MAP and Loss are  highly correlated
                and the improvement in MAP is very fast, but in test the results are almost equal
                cos_matrix = 1 - (1/(1+np.exp(-distance*3)))
                it changes a bit with
                cos_matrix = 1 - (1/(1+np.exp(-distance*2)))

            distance = spatial.distance.cdist(q_vect[0:max_terms], a_vect[0:max_terms],
              lambda u, v: (np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v)))
                    if (np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))) >= 0
                    else -1.0*(np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))) )
                    '''
            if len(q_vect) < 1 or len(a_vect) < 1:
                cos_matrix = np.zeros((max_terms, max_terms))
            else:
                distance = spatial.distance.cdist(q_vect[0:max_terms],
                                                  a_vect[0:max_terms],
                                                  'cosine')
                #cos_matrix = 1 - distance
                cos_matrix = 1 - distance / 2
            shape_cos_matrix = cos_matrix.shape
            cos_matrix = np.pad(cos_matrix,
                                ((0, max_terms - shape_cos_matrix[0]),
                                 (0, max_terms - shape_cos_matrix[1])),
                                mode='constant')
            if np.isnan(cos_matrix).any():
                print 'ERROR IS NAN: ', pair
            if salience_weight == 1:
                #Get salience score
                sal_matrix = self.getSalienceScore(q_list, a_list, max_terms)
                cos_matrix = np.multiply(cos_matrix, sal_matrix)
            #x.append( np.expand_dims( np.multiply(cos_matrix,sal_matrix) ,0) )
            if ordered_matrix == 1:
                cos_matrix.sort()
                cos_matrix = cos_matrix[::-1]
                cos_matrix = cos_matrix[:, ::-1]
            y.append(pair.l)
            x.append(np.expand_dims(cos_matrix, 0))
        return np.array(x), np.array(y)
예제 #7
0
 def buildCosineSimMatrix(self, questions_answer_pairs, max_terms=20):
     #Construct Question Answer Matrix Pairs
     x = []
     y = []
     for pair in questions_answer_pairs:
         q_list = nlp_utils.data_preprocess(pair.q, self.prep_step)
         a_list = []
         q_vect = self.w2vutil.transform2Word2Vect(q_list)
         cos_matrix = []
         sal_matrix = []
         for i, q_i in enumerate(q_vect):
             if i == max_terms:
                 break
             sim_qi_a = []
             a_list = nlp_utils.data_preprocess(pair.a, self.prep_step)
             a_vect = self.w2vutil.transform2Word2Vect(a_list)
             for k, a_k in enumerate(a_vect):
                 if k == max_terms:
                     break
                 pw1 = nltk.pos_tag(q_list[i])[0][1]
                 pw2 = nltk.pos_tag(a_list[k])[0][1]
                 result = spatial.distance.cosine(q_i, a_k)
                 if set([pw1, pw2]).intersection(set(['VB', 'VBD', 'VBG', 'VBN', 'VBP', \
                                                      'VBZ', 'WDT', 'WP', 'WP', 'WRB', \
                                                      'NN', 'NNS', 'NNP', 'NNPS', 'MD'])) :
                     result = (1 - result) * 1
                 else:
                     result = (1 - result) * 0.2
                 sim_qi_a += [result]
             cos_matrix += [sim_qi_a]
         cos_matrix = np.array(cos_matrix)
         sal_matrix = np.array(sal_matrix)
         shape_cos_matrix = cos_matrix.shape
         #print 'shapes: ', sal_matrix.shape, cos_matrix.shape
         cos_matrix = np.pad(cos_matrix,
                             ((0, max_terms - shape_cos_matrix[0]),
                              (0, max_terms - shape_cos_matrix[1])),
                             mode='constant')
         #sal_matrix = np.pad(sal_matrix, ((0,max_terms-shape_cos_matrix[0]),(0,max_terms-shape_cos_matrix[1])), mode='constant')
         if np.isnan(cos_matrix).any():
             print 'ERROR IS NAN: ', pair
         x.append(np.expand_dims(cos_matrix, 0))
         y.append(pair.l)
     return np.array(x), np.array(y)
예제 #8
0
 def buildCosineSimMatrix(self, questions_answer_pairs, max_terms=20):
     #Construct Question Answer Matrix Pairs
     x = []
     y = []
     for pair in questions_answer_pairs:
         q_list = nlp_utils.data_preprocess(pair.q, self.prep_step)
         a_list = []
         q_vect = self.w2vutil.transform2Word2Vect(q_list)
         cos_matrix = []
         #sal_matrix = []
         for i, q_i in enumerate(q_vect):
             if i == max_terms:
                 break
             sim_qi_a = []
             a_list = nlp_utils.data_preprocess(pair.a, self.prep_step)
             a_vect = self.w2vutil.transform2Word2Vect(a_list)
             for k, a_k in enumerate(a_vect):
                 if k == max_terms:
                     break
                 sim_qi_a += [spatial.distance.cosine(q_i, a_k)]
             cos_matrix += [sim_qi_a]
         #sal_matrix = self.getSalienceScore(q_list,a_list,max_terms)
         cos_matrix = np.array(cos_matrix)
         #sal_matrix = np.array(sal_matrix)
         shape_cos_matrix = cos_matrix.shape
         #print 'shapes: ', sal_matrix.shape, cos_matrix.shape
         cos_matrix = np.pad(cos_matrix,
                             ((0, max_terms - shape_cos_matrix[0]),
                              (0, max_terms - shape_cos_matrix[1])),
                             mode='constant')
         #sal_matrix = np.pad(sal_matrix, ((0,max_terms-shape_cos_matrix[0]),(0,max_terms-shape_cos_matrix[1])), mode='constant')
         if np.isnan(cos_matrix).any():
             print 'ERROR IS NAN: ', pair
         #x.append( np.expand_dims(np.multiply(cos_matrix, sal_matrix),0) )
         x.append(np.expand_dims(cos_matrix, 0))
         y.append(pair.l)
     return np.array(x), np.array(y)
예제 #9
0
 def buildCosineSimMatrix(self,
                          questions_answer_pairs,
                          ordered_matrix=1,
                          salience_weight=0,
                          max_terms=40):
     #Construct Question Answer Matrix Pairs
     x = []
     y = []
     for pair in questions_answer_pairs:
         #Question Processin
         q_list = nlp_utils.data_preprocess(pair.q, self.prep_step)
         #Answer processing
         a_list = nlp_utils.data_preprocess(pair.a, self.prep_step)
         #Get composed similarity matrix
         cos_matrix = self.composed_similarity(self.w2vutil.w2v_model, q_list, a_list, \
                 wg_wordnet=1, wg_levenshtein=1, maxterms=max_terms)
         #Reshape
         shape_cos_matrix = cos_matrix.shape
         cos_matrix = np.pad(cos_matrix,
                             ((0, max_terms - shape_cos_matrix[0]),
                              (0, max_terms - shape_cos_matrix[1])),
                             mode='constant')
         if np.isnan(cos_matrix).any():
             print 'ERROR IS NAN: ', pair
         if salience_weight == 1:
             #Get salience score
             sal_matrix = self.getSalienceScore(q_list, a_list, max_terms)
             cos_matrix = np.multiply(cos_matrix, sal_matrix)
         #x.append( np.expand_dims( np.multiply(cos_matrix,sal_matrix) ,0) )
         if ordered_matrix == 1:
             cos_matrix.sort()
             cos_matrix = cos_matrix[::-1]
             cos_matrix = cos_matrix[:, ::-1]
         y.append(pair.l)
         x.append(np.expand_dims(cos_matrix, 0))
     return np.array(x), np.array(y)