Beispiel #1
0
def sequential_prediction_command(args):
    data, to_predict, true_data, tide_height_nans = process_data(normalise_data=True)
    max_time = data.index.max()
    means = []
    variances = []
    predictions = []
    data_chunks = []
    for data_chunk in chunk(data):
        prediction, mean, var = sequential_predictions(data_chunk, max_time=max_time)
        means.append(mean)
        variances.append(var)
        predictions.append(prediction)
        data_chunks.append(data_chunk)

    @gif.frame
    def animate(i):
        plot = GPPlot(
            data_chunks[i],
            true_data,
            means[i],
            variances[i],
            [predictions[i]],
            TIDE_HEIGHT,
            join=False,
        )
        plot.init_plot()
        plot.plot()

    frames = [animate(i) for i in range(len(means))]

    gif.save(frames, f"{args.fig_name}.gif", duration=60, unit="s", between="startend")
Beispiel #2
0
def plot_scatter_command(args):
    data, to_predict, true_data, tide_height_nans = process_data(normalise_data=False)
    fig, ax = plt.subplots()
    plot_scatter(
        ax,
        data,
        true_data,
        TIDE_HEIGHT,
        savefig=args.save_figures,
        fig_name=args.fig_name,
    )
Beispiel #3
0
def classify(args):
    input_path = args.input_file
    df_input = get_input_data(input_path)
    print "Fetching features for {} input samples".format(len(df_input))
    df_input = process_data(data_frame=df_input)
    if args.training_file:
        if args.processed:
            df_train = pd.read_csv(args.training_file)
        else:
            df_train = pd.read_csv(args.training_file,
                                   sep=' ',
                                   names=["repository", "label"])
            print "Fetching features for {} training samples".format(
                len(df_train))
            df_train = process_data(data_frame=df_train)
        loops = args.loops or 1
        loops = int(loops)
        train_and_predict(df_train, df_input, loops)
    else:
        predict(df_input)
Beispiel #4
0
def train_command(args):
    data, to_predict, true_data, tide_height_nans = process_data(normalise_data=True)
    predictions, mean, var, _ = train(
        to_predict,
        data,
    )
    # filter the true tide_height to only be
    # at the non_nan points
    true_data_filtered = true_data.loc[tide_height_nans.values]
    plot = GPPlot(
        data,
        true_data_filtered,
        mean,
        var,
        [predictions],
        TIDE_HEIGHT,
    )
    plot.init_plot()
    plot.plot()
    if args.save_figures:
        plot.savefig(args.fig_name)
Beispiel #5
0
                                   global_step=step,
                                   write_meta_graph=False)
                        print('step:{}, error:{}'.format(step, _error))
            except Exception as e:
                print(e)
                train_data_iter = process_data.generate_batch(
                    batch_size,
                    process_data.train_data,
                    features=[
                        'word_embedding', 'postag', 'p', 'sequence_lengths'
                    ],
                    label_type='s')


if __name__ == '__main__':
    train_data_path_list = ['../../../data/train_data_ps.json']
    test_data_path = '../../../data/dev_data_ps.json'
    pre_word_embedding_path = '../../../data/embedding/sgns.target.word-ngram.1-2.dynwin5.thr10.neg5.dim300.iter5.table'
    baike_word_embedding_path = '../../../data/embedding/sgns.target.word-ngram.1-2.dynwin5.thr10.neg5.dim300.iter5.table'
    postag_path = '../../../data/pos'
    p_path = '../../../data/all_50_schemas'
    process_data = process_data(train_data_path_list, test_data_path,
                                pre_word_embedding_path,
                                baike_word_embedding_path, postag_path, p_path)

    batch_size = 128
    learning_rate = 0.001  #0.0000001收敛较慢
    epoch = 20
    #train(learning_rate, batch_size, epoch, process_data)
    evaluate(process_data)
Beispiel #6
0
    pso_sess.close()

if __name__ == '__main__':
    p_train_data_path_list = ['../../../data/train_data.json']
    p_test_data_path = '../../../data/dev_data.json'
    ps_train_data_path_list = ['../../../data/train_data_ps.json']
    ps_test_data_path = '../../../data/dev_data_ps.json'
    pso_train_data_path_list = ['../../../data/train_data_pso.json']
    pso_test_data_path = '../../../data/dev_data_pso.json'

    pre_word_embedding_path = '../../../data/embedding/sgns.target.word-ngram.1-2.dynwin5.thr10.neg5.dim300.iter5.table'
    baike_word_embedding_path = '../../../data/embedding/sgns.target.word-ngram.1-2.dynwin5.thr10.neg5.dim300.iter5.table'
    postag_path = '../../../data/pos'
    p_path = '../../../data/all_50_schemas'

    p_process_data = process_data(p_train_data_path_list, p_test_data_path, pre_word_embedding_path, \
                                  baike_word_embedding_path, postag_path, p_path, '../../../data/test_data_postag.json')
    del p_process_data.train_data, p_process_data.test_data
    gc.collect()

    ps_process_data = process_data(ps_train_data_path_list, ps_test_data_path, pre_word_embedding_path, \
                                   baike_word_embedding_path, postag_path, p_path, '../../../data/test_data_postag.json')
    del ps_process_data.train_data, ps_process_data.test_data
    gc.collect()

    pso_process_data = process_data(pso_train_data_path_list, pso_test_data_path, pre_word_embedding_path, \
                                    baike_word_embedding_path, postag_path, p_path, '../../../data/test_data_postag.json')
    del pso_process_data.train_data, pso_process_data.test_data
    gc.collect()

    batch_size = 1
    out_len = 49
Beispiel #7
0
def estimate_noise_command(args):
    data, _, true_data, tide_height_nans = process_data(normalise_data=False)
    true_data = true_data.loc[~tide_height_nans.values]
    diff = true_data - data
    LOG.info(f"Estimate of the noise: {diff.std()}")