def getFeature(sentence, f_index):
    kw = WordSeg.getKeyWords(sentence, kw_each_predict)
    f_vector = [0] * kw_each_label * num_of_labels  # all 0 list
    # get the f_vector for each text
    for i in kw:
        if f_index.has_key(i):
            f_vector[f_index[i]] = 1
    return f_vector
def getFeature(sentence, f_index):
    kw = WordSeg.getKeyWords(sentence, kw_each_predict)
    f_vector = [0] * kw_each_label * num_of_labels  # all 0 list
    # get the f_vector for each text
    for i in kw:
        if f_index.has_key(i):
            f_vector[f_index[i]] = 1
    return f_vector
Beispiel #3
0
def getFeatureSpace(train_data_dir, labels, sample_num=100):
    kws = list()  # list =all key words
    train_set = list()
    for label in labels:
        alltext, texts = getAllText(train_data_dir, label, sample_num)
        kw = WordSeg.getKeyWords(alltext, kw_each_label)
        kws.extend(kw)
        train_set.append(texts)
    #print " ".join(kws)
    #print len(train_set)
    f_index = dict(zip(kws, range(len(kws))))
    y, X = getTrainMatrix(train_set, f_index)
    return f_index, y, X
Beispiel #4
0
def getFeatureSpace( train_data_dir, labels, sample_num=100 ):
    kws = list()                # list =all key words
    train_set = list()
    for label in labels:
        alltext, texts = getAllText( train_data_dir, label, sample_num )
        kw = WordSeg.getKeyWords(alltext, kw_each_label )
        kws.extend(kw)
        train_set.append( texts );
    #print " ".join(kws)
    #print len(train_set)
    f_index = dict( zip( kws, range( len(kws) ) ) )
    y, X = getTrainMatrix( train_set, f_index )
    return f_index, y, X
Beispiel #5
0
def getFeature(sentence):
    return WordSeg.getKeyWords(sentence, kw_each_predict)
Beispiel #6
0
def getFeature( sentence ):
    return WordSeg.getKeyWords( sentence, kw_each_predict )