def LSM_method(train_set, test_set, num_feature, alpha): """ NIPS'13: A latent source model for nonparametric time series classification """ from utils import transform_ts x_train, comment_cnt_train, y_train = MLR_extract_feature(train_set, num_feature) train_cnt = len(train_set) for i in range(train_cnt): x_train[i, :] = transform_ts(x_train[i, :]) x_test, comment_cnt_true, y_true = MLR_extract_feature(test_set, num_feature) test_cnt = len(x_test) for i in range(test_cnt): s = x_test[i, :] = transform_ts(x_test[i, :])
def transform_count_feature(topic_feature, factor_index_list): """ 按照NIPS'13文章中的方法,将feature vector进行转化 factor_index_list: 需要进行归一化的factor index列表 注意:NIPS'13文章中的方法只适用与count相关的feature """ num_feature = len(topic_feature) p = [0] * num_feature p = np.array(p, float) for i in factor_index_list: for j in range(num_feature): p[j] = topic_feature[j][i] p = transform_ts(p) for j in range(num_feature): topic_feature[j][i] = p[j] return topic_feature
# Here we create a list of lists with start index and end index for each of the 10 parts and one for the last partial part start_end = [[x, x + part_size] for x in range(first_sig, max_line + first_sig, part_size)] start_end = start_end[:-1] + [[start_end[-1][0], start_end[-1][0] + last_part]] print(start_end) X_test = [] # now, very like we did above with the train data, we convert the test data part by part # transforming the 3 phases 800000 measurement in matrix (160,57) for start, end in start_end: subset_test = pq.read_pandas('./data/test.parquet', columns=[str(i) for i in range(start, end) ]).to_pandas() for i in tqdm(subset_test.columns): id_measurement, phase = meta_test.loc[int(i)] subset_test_col = subset_test[i] subset_trans = utils.transform_ts(subset_test_col, sample_size, n_dim, min_num, max_num) X_test.append([i, id_measurement, phase, subset_trans]) X_test_input = np.asarray([ np.concatenate([X_test[i][3], X_test[i + 1][3], X_test[i + 2][3]], axis=1) for i in range(0, len(X_test), 3) ]) np.save("X_test.npy", X_test_input) submission = pd.read_csv('./data/sample_submission.csv') print(len(submission)) preds_test = [] for i in range(N_SPLITS): model.load_weights('weights_{}.h5'.format(i)) pred = model.predict(X_test_input, batch_size=300, verbose=1)