def LSM_method(train_set, test_set, num_feature, alpha):
    """ NIPS'13: A latent source model for nonparametric time series classification
    """
    from utils import transform_ts
    x_train, comment_cnt_train, y_train = MLR_extract_feature(train_set, num_feature)
    train_cnt = len(train_set)
    for i in range(train_cnt):
        x_train[i, :] = transform_ts(x_train[i, :])
        
    x_test, comment_cnt_true, y_true = MLR_extract_feature(test_set, num_feature)
    
    test_cnt = len(x_test)
    for i in range(test_cnt):
        s = x_test[i, :] = transform_ts(x_test[i, :])
def transform_count_feature(topic_feature, factor_index_list):
    """ 按照NIPS'13文章中的方法,将feature vector进行转化
    factor_index_list: 需要进行归一化的factor index列表
    注意:NIPS'13文章中的方法只适用与count相关的feature
    """
    num_feature = len(topic_feature)
    p = [0] * num_feature
    p = np.array(p, float)
    for i in factor_index_list:
        for j in range(num_feature):
            p[j] = topic_feature[j][i]
            
        p = transform_ts(p)
        
        for j in range(num_feature):
            topic_feature[j][i] = p[j]
            
    return topic_feature
Exemple #3
0
# Here we create a list of lists with start index and end index for each of the 10 parts and one for the last partial part
start_end = [[x, x + part_size]
             for x in range(first_sig, max_line + first_sig, part_size)]
start_end = start_end[:-1] + [[start_end[-1][0], start_end[-1][0] + last_part]]
print(start_end)
X_test = []
# now, very like we did above with the train data, we convert the test data part by part
# transforming the 3 phases 800000 measurement in matrix (160,57)
for start, end in start_end:
    subset_test = pq.read_pandas('./data/test.parquet',
                                 columns=[str(i) for i in range(start, end)
                                          ]).to_pandas()
    for i in tqdm(subset_test.columns):
        id_measurement, phase = meta_test.loc[int(i)]
        subset_test_col = subset_test[i]
        subset_trans = utils.transform_ts(subset_test_col, sample_size, n_dim,
                                          min_num, max_num)
        X_test.append([i, id_measurement, phase, subset_trans])

X_test_input = np.asarray([
    np.concatenate([X_test[i][3], X_test[i + 1][3], X_test[i + 2][3]], axis=1)
    for i in range(0, len(X_test), 3)
])
np.save("X_test.npy", X_test_input)

submission = pd.read_csv('./data/sample_submission.csv')
print(len(submission))

preds_test = []
for i in range(N_SPLITS):
    model.load_weights('weights_{}.h5'.format(i))
    pred = model.predict(X_test_input, batch_size=300, verbose=1)