def loadData(path, k=100): x = cPickle.load(open(path, "rb")) revs, W, W2, word_idx_map, vocab = x[0], x[1], x[2], x[3], x[4] max_l = np.max(pd.DataFrame(revs)["num_words"]) print(len(word_idx_map)) print(len(vocab)) print(len(revs)) datasets = make_idx_data_cv(revs, word_idx_map, 1, max_l=max_l, k=k, filter_h=5) img_h = len(datasets[0][0]) - 1 print('img_h', img_h) print('max len', max_l) print(datasets[0].shape) test_set_x = datasets[1][:, :img_h] test_set_y = np.asarray(datasets[1][:, -1], "int32") train_set_x = datasets[0][:, :img_h] train_set_y = np.asarray(datasets[0][:, -1], "int32") print(np.shape(train_set_x)) print('load data...') print(np.shape(W)) print(type(W)) return (train_set_x, train_set_y), (test_set_x, test_set_y), W
def loadData(path): x = cPickle.load(open(path,"rb")) revs, W, W2, word_idx_map, vocab = x[0], x[1], x[2], x[3], x[4] print(len(word_idx_map)) print(len(vocab)) datasets = make_idx_data_cv(revs, word_idx_map, 1, max_l=10,k=100, filter_h=5) img_h = len(datasets[0][0])-1 test_set_x = datasets[1][:,:img_h] test_set_y = np.asarray(datasets[1][:,-1],"int32") train_set_x =datasets[0][:,:img_h] train_set_y =np.asarray(datasets[0][:,-1],"int32") print (np.shape(train_set_x)) print('load data...') print(np.shape(W)) print(type(W)) return (train_set_x,train_set_y),(test_set_x,test_set_y),W