def nmf30_feature(data): data_block_number = int((data.shape[0] - 1) / block_size) + 1 data = process_data(nmf30, data, n=2, tag='topic model', data_n=data_block_number) return data
def local_vec_feature(data): data_block_number = int((data.shape[0] - 1) / block_size) + 1 print("block num for loc vec", data_block_number, data.shape[0]) data = process_data(local_vec, data, n=4, tag='local word2vec', data_n=data_block_number) return data
def basic_feature(): data = pd.read_csv('data/{f}'.format(f=data_file), sep=',') if n > 0: data = data[start:n] if data_file != "test.csv": data = data.drop(['id', 'qid1', 'qid2'], axis=1) print("basic data is read") ret = process_data(base_process_thread, data, n=8, tag="base_feature") return ret
def pos_vec_feature(data): # data_block_number = int((data.shape[0]-1) / block_size) + 1 data = process_data(pos_vec, data, n=8, tag='pos vec') #, data_n=data_block_number) return data
def common_ratio_feature(ret): ret = process_data(common_ratio_thread, ret, n=6, tag="common_ratio") return ret
def dist_features(ret): ret = process_data(dist_thread, ret, n=3, tag='dist_feature') return ret
def norm_wmd_feature(ret): ret = process_data(norm_wmd_tread, ret, n=3, tag="norm_wmd_feature") return ret
def wmd_feature(ret): ret = process_data(wmd_thread, ret, n=3, tag="wmd_feature") return ret