def getFeature(sentence, f_index): kw = WordSeg.getKeyWords(sentence, kw_each_predict) f_vector = [0] * kw_each_label * num_of_labels # all 0 list # get the f_vector for each text for i in kw: if f_index.has_key(i): f_vector[f_index[i]] = 1 return f_vector
def getFeatureSpace(train_data_dir, labels, sample_num=100): kws = list() # list =all key words train_set = list() for label in labels: alltext, texts = getAllText(train_data_dir, label, sample_num) kw = WordSeg.getKeyWords(alltext, kw_each_label) kws.extend(kw) train_set.append(texts) #print " ".join(kws) #print len(train_set) f_index = dict(zip(kws, range(len(kws)))) y, X = getTrainMatrix(train_set, f_index) return f_index, y, X
def getFeatureSpace( train_data_dir, labels, sample_num=100 ): kws = list() # list =all key words train_set = list() for label in labels: alltext, texts = getAllText( train_data_dir, label, sample_num ) kw = WordSeg.getKeyWords(alltext, kw_each_label ) kws.extend(kw) train_set.append( texts ); #print " ".join(kws) #print len(train_set) f_index = dict( zip( kws, range( len(kws) ) ) ) y, X = getTrainMatrix( train_set, f_index ) return f_index, y, X
def getFeature(sentence): return WordSeg.getKeyWords(sentence, kw_each_predict)
def getFeature( sentence ): return WordSeg.getKeyWords( sentence, kw_each_predict )