def generate_valid_data(self, path, validation_nr=0, batch_size=64):

        tracks_logs = sorted(
            glob.glob(path + "/training_set_preproc/log_*.csv"))

        track = tracks_logs[validation_nr]
        session = os.path.dirname(track) + '/session_' + os.path.basename(
            track)

        x_rnn, x_fc, y = utils.load_training_data_simple(track, session)

        n_slices = int(x_rnn.shape[0] / batch_size)
        random_idx = np.random.randint(x_rnn.shape[0], size=x_rnn.shape[0])

        while True:
            for k in range(n_slices):
                random_idx_slices = random_idx[k * batch_size:(k + 1) *
                                               batch_size]

                batch_x_rnn = x_rnn[random_idx_slices, :, :]
                batch_x_fc = x_fc[random_idx_slices, :]
                batch_y = y[random_idx_slices, :]

                yield ({
                    'tracks_input': batch_x_rnn,
                    'session_input': batch_x_fc
                }, {
                    'output': batch_y
                })
def run_on_server_grid_search():
    start = time.process_time()
    path = '/cluster/scratch/cspreche/spotify_challenge'

    train_path = path + '/training_set_preproc'
    test_path = path + '/test_set_preproc'
    submission_path = path + '/submissions'

    tracks_path = train_path + '/log_8_20180902_000000000000.csv'
    sessions_path = train_path + '/session_log_8_20180902_000000000000.csv'

    now = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M")
    path_write = 'models/grid_search_' + now + '.txt'

    x_rnn, x_fc, y = utils.load_training_data_simple(tracks_path, sessions_path)

    s = x_rnn.shape[0]
    shuffle_indices = np.random.permutation(np.arange(s))
    indices_train = shuffle_indices[:int(s*0.5)]
    indices_valid = shuffle_indices[int(s*0.5):int(s*0.75)]
    indices_test = shuffle_indices[int(s*0.75):]

    x_rnn_train = x_rnn[indices_train,:,:]
    x_fc_train = x_fc[indices_train,:]
    y_train = y[indices_train,:]

    x_rnn_valid = x_rnn[indices_valid,:,:]
    x_fc_valid = x_fc[indices_valid,:]
    y_valid = y[indices_valid,:]

    x_rnn_test = x_rnn[indices_test,:,:]
    x_fc_test = x_fc[indices_test,:]
    y_test = y[indices_test,:]

    print(x_rnn.shape)
    print(x_rnn_train.shape, x_rnn_valid.shape, x_rnn_test.shape)

    with open(path_write, 'a') as f:
        f.write('x_rnn: (%u,%u,%u)' % (x_rnn.shape[0],x_rnn.shape[1],x_rnn.shape[2]))
        f.write(', x_rnn_train: (%u)' % (x_rnn_train.shape[0]))
        f.write(', x_rnn_valid: (%u)' % (x_rnn_valid.shape[0]))
        f.write(', x_rnn_test: (%u) \n' % (x_rnn_test.shape[0]))

    del x_rnn, x_fc, y

    batch_size = 64
    epochs = 100

    grid_search(x_rnn_train, x_fc_train, y_train, x_rnn_valid,
            x_fc_valid, y_valid, x_rnn_test, x_fc_test, y_test,
            batch_size = batch_size, epochs = epochs, path = path_write)

    end = time.process_time()
    print("Time used: %4.2f seconds" % (end-start))
def continue_on_server_simple():
    print(device_lib.list_local_devices())

    start = time.process_time()
    path = '/cluster/scratch/cspreche/spotify_challenge'

    train_path = path + '/training_set_preproc'
    test_path = path + '/test_set_preproc'
    submission_path = path + '/submissions'

    tracks_path = train_path + '/log_8_20180902_000000000000.csv'
    sessions_path = train_path + '/session_log_8_20180902_000000000000.csv'

    x_rnn, x_fc, y = utils.load_training_data_simple(tracks_path, sessions_path)

    s = x_rnn.shape[0]
    shuffle_indices = np.random.permutation(np.arange(s))
    indices_train = shuffle_indices[:int(s*0.3)]
    indices_valid = shuffle_indices[int(s*0.3):int(s*0.4)]
    indices_test = shuffle_indices[int(s*0.75):]

    x_rnn_train = x_rnn[indices_train,:,:]
    x_fc_train = x_fc[indices_train,:]
    y_train = y[indices_train,:]

    x_rnn_valid = x_rnn[indices_valid,:,:]
    x_fc_valid = x_fc[indices_valid,:]
    y_valid = y[indices_valid,:]

    x_rnn_test = x_rnn[indices_test,:,:]
    x_fc_test = x_fc[indices_test,:]
    y_test = y[indices_test,:]

    del x_rnn, x_fc, y

    model = Hybrid('hybrid_concatenate')
    model.load_model('hybrid_concatenate_2018-12-31_16:52')
    model.compile(optimizer = 'Adam', loss = 'm_hinge_acc', lr = 0.0001)

    model.print_summary()

    model.fit(x_rnn_train, x_fc_train, y_train, x_rnn_valid,
        x_fc_valid, y_valid, epochs=200, batch_size = 64,
        verbosity=2, patience = 40)

    model.plot_training()
    model.save_model()

    model.evaluate(x_rnn_test, x_fc_test, y_test, verbosity=2)

    end = time.process_time()
    print("Model trained, time used: %4.2f seconds" % (end-start))
def run_local_grid_search():
    start = time.process_time()

    now = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M")
    path_write = 'models/grid_search_' + now + '.txt'

    x_rnn, x_fc, y = utils.load_training_data_simple()

    print(x_rnn.shape)

    # Generate validation set
    s = x_rnn.shape[0]
    shuffle_indices = np.random.permutation(np.arange(s))
    indices_train = shuffle_indices[:int(s*0.5)]
    indices_valid = shuffle_indices[int(s*0.5):int(s*0.75)]
    indices_test = shuffle_indices[int(s*0.75):]

    x_rnn_train = x_rnn[indices_train,:,:]
    x_fc_train = x_fc[indices_train,:]
    y_train = y[indices_train,:]

    x_rnn_valid = x_rnn[indices_valid,:,:]
    x_fc_valid = x_fc[indices_valid,:]
    y_valid = y[indices_valid,:]

    x_rnn_test = x_rnn[indices_test,:,:]
    x_fc_test = x_fc[indices_test,:]
    y_test = y[indices_test,:]

    with open(path_write, 'a') as f:
        f.write('x_rnn: (%u,%u,%u)' % (x_rnn.shape[0],x_rnn.shape[1],x_rnn.shape[2]))
        f.write(', x_rnn_train: (%u)' % (x_rnn_train.shape[0]))
        f.write(', x_rnn_valid: (%u)' % (x_rnn_valid.shape[0]))
        f.write(', x_rnn_test: (%u) \n' % (x_rnn_test.shape[0]))

    del x_rnn, x_fc, y

    # Generate model
    batch_size = 64
    epochs = 25

    grid_search(x_rnn_train, x_fc_train, y_train, x_rnn_valid,
            x_fc_valid, y_valid, x_rnn_test, x_fc_test, y_test,
            batch_size = batch_size, epochs = epochs, path = path_write)
def run_on_server_simple():
    print(device_lib.list_local_devices())

    start = time.process_time()
    path = '/cluster/scratch/cspreche/spotify_challenge'

    train_path = path + '/training_set_preproc'
    test_path = path + '/test_set_preproc'
    submission_path = path + '/submissions'

    tracks_path1 = train_path + '/log_8_20180902_000000000000.csv'
    sessions_path1 = train_path + '/session_log_8_20180902_000000000000.csv'

    tracks_path2 = train_path + '/log_0_20180807_000000000000.csv'
    sessions_path2 = train_path + '/session_log_0_20180807_000000000000.csv'

    tracks_path3 = train_path + '/log_6_20180801_000000000000.csv'
    sessions_path3 = train_path + '/session_log_6_20180801_000000000000.csv'

    x_rnn_train, x_fc_train, y_train = utils.load_training_data_simple(tracks_path1, sessions_path1)
    x_rnn_valid, x_fc_valid, y_valid = utils.load_training_data_simple(tracks_path2, sessions_path2)
    x_rnn_test, x_fc_test, y_test = utils.load_training_data_simple(tracks_path3, sessions_path3)

    # s = x_rnn.shape[0]
    # shuffle_indices = np.random.permutation(np.arange(s))
    # indices_train = shuffle_indices[:int(s*0.8)]
    # indices_valid = shuffle_indices[int(s*0.8):int(s*0.9)]
    # indices_test = shuffle_indices[int(s*0.9):]
    #
    # x_rnn_train = x_rnn[indices_train,:,:]
    # x_fc_train = x_fc[indices_train,:]
    # y_train = y[indices_train,:]
    #
    # x_rnn_valid = x_rnn[indices_valid,:,:]
    # x_fc_valid = x_fc[indices_valid,:]
    # y_valid = y[indices_valid,:]
    #
    # x_rnn_test = x_rnn[indices_test,:,:]
    # x_fc_test = x_fc[indices_test,:]
    # y_test = y[indices_test,:]
    #
    # del x_rnn, x_fc, y

    # rnn_layer_sizes = np.array([128, 64, 32])
    # dense_layer_parallel_sizes = np.array([32, 32])
    # dense_layer_sequential_sizes = np.array([64, 32, 1])
    rnn_layer_sizes = np.array([1024, 1024, 512, 512])
    dense_layer_parallel_sizes = np.array([512, 512])
    dense_layer_sequential_sizes = np.array([512, 64, 1])
    dropout_prob_rnn = 0.3
    dropout_prob_dense = 0.3
    lambda_reg_dense = 0.001
    lambda_reg_rnn = 0.001
    merge = 'concatenate'

    model = Hybrid()
    model.build_model(
        rnn_layer_sizes = rnn_layer_sizes,
        dense_layer_parallel_sizes = dense_layer_parallel_sizes,
        dense_layer_sequential_sizes = dense_layer_sequential_sizes,
        dropout_prob_rnn = dropout_prob_rnn,
        dropout_prob_dense = dropout_prob_dense,
        lambda_reg_dense = lambda_reg_dense,
        lambda_reg_rnn = lambda_reg_rnn,
        merge = merge)
    model.compile(optimizer = 'Adam', loss = 'm_hinge_acc', lr = 0.001)

    model.print_summary()
    # model.plot_model()

    model.fit(x_rnn_train, x_fc_train, y_train, x_rnn_valid,
        x_fc_valid, y_valid, epochs=300, batch_size = 128,
        verbosity=2, patience = 20)

    model.plot_training()
    model.save_model()

    model.evaluate(x_rnn_test, x_fc_test, y_test, verbosity=2)

    end = time.process_time()
    print("Model trained, time used: %4.2f seconds" % (end-start))
def run_local_test():
    x_rnn, x_fc, y = utils.load_training_data_simple()

    # Generate train, valid and test set
    s = x_rnn.shape[0]
    shuffle_indices = np.random.permutation(np.arange(s))
    indices_train = shuffle_indices[:int(s*0.5)]
    indices_valid = shuffle_indices[int(s*0.5):int(s*0.75)]
    indices_test = shuffle_indices[int(s*0.75):]

    x_rnn_train = x_rnn[indices_train,:,:]
    x_fc_train = x_fc[indices_train,:]
    y_train = y[indices_train,:]

    x_rnn_valid = x_rnn[indices_valid,:,:]
    x_fc_valid = x_fc[indices_valid,:]
    y_valid = y[indices_valid,:]

    x_rnn_test = x_rnn[indices_test,:,:]
    x_fc_test = x_fc[indices_test,:]
    y_test = y[indices_test,:]

    del x_rnn, x_fc, y

    # Generate model
    rnn_layer_sizes = np.array([128, 32, 32])
    dense_layer_parallel_sizes = np.array([32])
    dense_layer_sequential_sizes = np.array([32, 20, 1])
    dropout_prob_rnn = 0.1
    dropout_prob_dense = 0.1
    lambda_reg_dense = 0.001
    lambda_reg_rnn = 0.001

    model = Hybrid()
    model.build_model(
        rnn_layer_sizes = rnn_layer_sizes,
        dense_layer_parallel_sizes = dense_layer_parallel_sizes,
        dense_layer_sequential_sizes = dense_layer_sequential_sizes,
        dropout_prob_rnn = dropout_prob_rnn,
        dropout_prob_dense = dropout_prob_dense,
        lambda_reg_dense = lambda_reg_dense,
        lambda_reg_rnn = lambda_reg_rnn,
        merge = 'maximum')

    # model = Single_RNN_Full(model_name = 'rnn_multiconcat')
    # model.build_model(
    #     rnn_layer_sizes = rnn_layer_sizes,
    #     dense_layer_sequential_sizes = dense_layer_sequential_sizes,
    #     dropout_prob_rnn = dropout_prob_rnn,
    #     dropout_prob_dense = dropout_prob_dense,
    #     lambda_reg_dense = lambda_reg_dense,
    #     lambda_reg_rnn = lambda_reg_rnn,
    #     multiple_concatenate = True)

    model.compile(optimizer = 'Adam', loss = 'm_hinge_acc')

    # model.plot_model()
    model.print_summary()

    model.fit(x_rnn_train, x_fc_train, y_train, x_rnn_valid,
        x_fc_valid, y_valid, epochs=10, verbosity=2, patience = 5)

    model.plot_training()

    model.evaluate(x_rnn_test, x_fc_test, y_test, verbosity=2)