Esempio n. 1
0
 def test_network(self):
     model_path = self.args.save_dir + 'model.pt'
     self.deep_q_network.load_state_dict(
         torch.load(model_path, map_location=lambda storage, loc: storage))
     self.deep_q_network.eval()
     while True:
         state = self.env.reset()
         state = pre_processing(state)
         # for the first state we need to stack them together....
         state = np.stack((state, state, state, state), axis=0)
         # clear the rewrad_sum...
         pipe_sum = 0
         # I haven't set a max step here, but you could set it...
         while True:
             self.env.render()
             state_tensor = torch.tensor(state).unsqueeze(0)
             with torch.no_grad():
                 _, _, actions = self.deep_q_network(state_tensor)
             # action...deterministic...
             action_selected = int(actions.data.numpy()[0])
             state_, reward, done, _ = self.env.step(
                 self.action_space[action_selected])
             if reward > 0:
                 pipe_sum += 1
             # process the output state...
             state_ = pre_processing(state_)
             # concatenate them together...
             state_temp = state[0:3, :, :].copy()
             state_ = np.expand_dims(state_, 0)
             state_ = np.concatenate((state_, state_temp), axis=0)
             if done:
                 break
             state = state_
         print('In this episode, the bird totally pass ' + str(pipe_sum) +
               ' pipes!')
Esempio n. 2
0
    def warm_up_buffer(self):
        print('Warming up')

        for i in range(self.warm_up_episodes):

            states = []
            rewards = []
            actions = []

            dead = False
            done = False
            desired_return = 1
            desired_horizon = 1

            step, score, start_life = 0, 0, 5
            observe = self.environment.reset()

            observe, reward, terminal = self.environment.step(1)

            state = utils.pre_processing(observe)
            history = np.stack((state, state, state, state), axis=2)
            history = np.reshape([history], (1, 84, 84, 4))

            while not done:

                states.append(history)
                command = np.asarray([
                    desired_return * self.return_scale,
                    desired_horizon * self.horizon_scale
                ])
                command = np.reshape(command, [1, len(command)])

                action = self.get_action(history, command)
                actions.append(action)

                next_state, reward, done = self.environment.step(action)
                next_state = utils.pre_processing(observe)
                next_state = np.reshape([next_state], (1, 84, 84, 1))
                next_history = np.append(next_state,
                                         history[:, :, :, :3],
                                         axis=3)

                rewards.append(reward)

                state = next_state
                history = next_history

                desired_return -= reward  # Line 8 Algorithm 2
                desired_horizon -= 1  # Line 9 Algorithm 2
                desired_horizon = np.maximum(desired_horizon, 1)

            self.memory.add_sample(states, actions, rewards)
def correlation_based_implicit_neighbourhood_model(mat, mat_file, l_reg=0.002, gamma=0.005, l_reg2=100.0, k=250):
    # subsample the matrix to make computation faster
    mat = mat[0:mat.shape[0]//128, 0:mat.shape[1]//128]
    mat = mat[mat.getnnz(1)>0][:, mat.getnnz(0)>0]

    print(mat.shape)
    no_users = mat.shape[0]
    no_movies = mat.shape[1]

    #baseline_bu, baseline_bi = baseline_estimator(mat)
    # We should call baseline_estimator but we can init at random for test
    baseline_bu, baseline_bi = np.random.rand(no_users, 1)  * 2 - 1, np.random.rand(1, no_movies) * 2 - 1    

    bu_index, bi_index = pre_processing(mat, mat_file)
    
    # Init parameters
    bu = np.random.rand(no_users, 1)  * 2 - 1
    bi = np.random.rand(1, no_movies) * 2 - 1
    wij = np.random.rand(no_movies, no_movies) * 2 - 1
    cij = np.random.rand(no_movies, no_movies) * 2 - 1

    mu = mat.data[:].mean()

    # Compute similarity matrix
    N = sparse.csr_matrix(mat).copy()
    N.data[:] = 1
    S = sparse.csr_matrix.dot(N.T, N)
    S.data[:] = S.data[:] / (S.data[:] + l_reg2)
    S = S * compute_sparse_correlation_matrix(mat)

    # Train
    print("Train...")
    n_iter = 200
    cx = mat.tocoo()        
    for it in range(n_iter):
        t0 = time()
        for u,i,v in zip(cx.row, cx.col, cx.data):
            #Rk_iu = Nk_iu = bi_index[u]
            Rk_iu = Nk_iu = np.flip(np.argsort(S[i,].toarray()))[:k].ravel()
            e_ui = compute_e_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi)

            bu[u] += gamma * (e_ui - l_reg * bu[u])
            bi[0, i] += gamma * (e_ui - l_reg * bi[0, i])

            buj = mu + baseline_bu[u] + baseline_bi[0, Rk_iu]
            wij[i][Rk_iu] += gamma * ( 1 / sqrt(len(Rk_iu)) * e_ui * (mat[u, Rk_iu].toarray().ravel() - buj) - l_reg * wij[i][Rk_iu] )
            cij[i][Nk_iu] += gamma * ( 1 / sqrt(len(Nk_iu)) * e_ui - l_reg * cij[i][Nk_iu] )
        gamma *= 0.99

        if it % 10 == 0:
          t1 = time()
          print(it, "\ ", n_iter, "(%.2g sec)" % (t1 - t0))
          print("compute loss...")
          print(compute_loss(mat, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, l_reg=l_reg))

    return bu, bi, wij, cij
    def render(self, display):
        if self.image is not None:
            array = np.frombuffer(self.image.raw_data, dtype=np.dtype("uint8"))
            array = np.reshape(array, (self.image.height, self.image.width, 4))
            array = array[:, :, :3]
            # remove info do not need
            array = pre_processing(array)

            #array = array[:, ::-1]
            surface = pygame.surfarray.make_surface(array.swapaxes(0, 1))
            display.blit(surface, (0, 0))
def baseline_estimator(mat, mat_file, l_reg=0.02, learning_rate=0.0000025):
    # subsample the matrix to make computation faster
    mat = mat[0:mat.shape[0] // 128, 0:mat.shape[1] // 128]
    mat = mat[mat.getnnz(1) > 0][:, mat.getnnz(0) > 0]

    print(mat.shape)
    no_users = mat.shape[0]
    no_movies = mat.shape[1]

    bu_index, bi_index = pre_processing(mat, mat_file)

    bu = np.random.rand(no_users, 1) * 2 - 1
    bi = np.random.rand(1, no_movies) * 2 - 1
    #bu = np.zeros((no_users,1))
    #bi = np.zeros((1,no_movies))

    mu = mat.data[:].mean()
    mat_sum1 = mat.sum(1)
    mat_sum0 = mat.sum(0)
    n = mat.data[:].shape[0]

    no_users_entries = np.array((mat != 0).sum(1))
    no_movies_entries = np.array((mat != 0).sum(0))

    # Train
    print("Train...")
    n_iter = 200
    for it in range(n_iter):

        #bi_sum = bi[bi_index].sum(1).reshape((no_users,1))
        #bu_sum = bu.ravel()[bu_index].sum(0).reshape((1,no_movies))

        bi_sum = np.array(list(map(lambda x: bi.ravel()[x].sum(),
                                   bi_index))).reshape((no_users, 1))
        bu_sum = np.array(list(map(lambda x: bu.ravel()[x].sum(),
                                   bu_index))).reshape((1, no_movies))

        # Vectorized operations
        bu_gradient = -2.0 * (mat_sum1 - no_users_entries * mu -
                              no_users_entries * bu -
                              bi_sum) + 2.0 * l_reg * bu
        bu -= learning_rate * bu_gradient

        bi_gradient = -2.0 * (mat_sum0 - no_movies_entries * mu -
                              no_movies_entries * bi -
                              bu_sum) + 2.0 * l_reg * bi
        bi -= learning_rate * bi_gradient

        if it % 10 == 0:
            print("compute loss...")
            print(compute_loss(mat, mu, bu, bi, l_reg=l_reg))

    return bu, bi
def integrated_gradients(inputs,
                         model,
                         target_label_idx,
                         predict_and_gradients,
                         baseline,
                         steps=50,
                         cuda=False):
    if baseline is None:
        baseline = 0 * inputs
    # scale inputs and compute gradients
    scaled_inputs = [
        baseline + (float(i) / steps) * (inputs - baseline)
        for i in range(0, steps + 1)
    ]
    grads, _ = predict_and_gradients(scaled_inputs, model, target_label_idx,
                                     cuda)
    avg_grads = np.average(grads[:-1], axis=0)
    avg_grads = np.transpose(avg_grads, (1, 2, 0))
    delta_X = (
        pre_processing(inputs, cuda) -
        pre_processing(baseline, cuda)).detach().squeeze(0).cpu().numpy()
    delta_X = np.transpose(delta_X, (1, 2, 0))
    integrated_grad = delta_X * avg_grads
    return integrated_grad
Esempio n. 7
0
    def train_network(self):
        # init the memory buff...
        brain_memory = []
        num_of_episode = 0
        global_step = 0
        update_step_counter = 0
        reward_mean = None
        epsilon = self.args.init_exploration
        loss = 0
        while True:
            state = self.env.reset()
            state = pre_processing(state)
            # for the first state we need to stack them together....
            state = np.stack((state, state, state, state), axis=0)
            # clear the rewrad_sum...
            pipe_num = 0
            # I haven't set a max step here, but you could set it...
            while True:
                state_tensor = torch.tensor(state).unsqueeze(0)
                if self.use_cuda:
                    state_tensor = state_tensor.cuda()
                with torch.no_grad():
                    _, _, actions = self.deep_q_network(state_tensor)
                action_selected = select_action(actions, epsilon,
                                                self.num_actions)
                # input the action into the environment...
                state_, reward, done, _ = self.env.step(
                    self.action_space[action_selected])
                # process the output state...
                state_ = pre_processing(state_)
                # concatenate them together...
                state_temp = state[0:3, :, :].copy()
                state_ = np.expand_dims(state_, 0)
                state_ = np.concatenate((state_, state_temp), axis=0)
                # wrapper the reward....
                reward = reward_wrapper(reward)
                # add the pip num...
                if reward > 0:
                    pipe_num += 1
                global_step += 1
                # store the transition...
                brain_memory.append(
                    (state, state_, reward, done, action_selected))
                if len(brain_memory) > self.args.buffer_size:
                    brain_memory.pop(0)
                if global_step >= self.args.observate_time:
                    mini_batch = random.sample(brain_memory,
                                               self.args.batch_size)
                    loss = self._update_network(mini_batch)
                    update_step_counter += 1
                    # up date the target network...
                    if update_step_counter % self.args.hard_update_step == 0:
                        #self._hard_update_target_network(self.deep_q_network, self.target_network)
                        self.target_network.load_state_dict(
                            self.deep_q_network.state_dict())
                # process the epsilon
                if global_step <= self.args.exploration_steps:
                    epsilon -= (self.args.init_exploration -
                                self.args.final_exploration
                                ) / self.args.exploration_steps
                if done:
                    break
                state = state_
            # expoential weighted average...
            reward_mean = pipe_num if reward_mean is None else reward_mean * 0.99 + pipe_num * 0.01
            if num_of_episode % self.args.display_interval == 0:
                print('[{}] Episode: {}, Reward: {}, Loss: {}'.format(
                    str(datetime.now()), num_of_episode, reward_mean, loss))

            if num_of_episode % self.args.save_interval == 0:
                save_path = self.args.save_dir + 'model.pt'
                torch.save(self.deep_q_network.state_dict(), save_path)
            num_of_episode += 1
        noise_feature = keras.backend.squeeze(noise_feature_map[[i][0]], 0)  # tf.squeeze(noise_feature_map[[i][0]], axis=0)  #
        noise_feature = keras.backend.reshape(noise_feature, shape=(noise_feature.shape[0] * noise_feature.shape[1], noise_feature.shape[2]))
        gram_noise = keras.backend.dot(keras.backend.transpose(noise_feature), noise_feature)

        denominator = (4 * keras.backend.constant(texture_feature.shape[0], dtype=tf.float32)**2) * keras.backend.constant(texture_feature.shape[1], dtype=tf.float32)**2

        total_loss += weights[i][0] * (keras.backend.sum(keras.backend.square(tf.subtract(gram_texture, gram_noise))) / keras.backend.cast(denominator, tf.float32))

    return total_loss



if __name__ == '__main__':
    # generate original feature maps
    img_array = utils.pre_processing(input_img, height, width)
    feature_map = utils.compute_vgg_output(img_array)

    # generate initial noise image
    random_ = keras.backend.random_uniform(img_array.shape, minval=0, maxval=0.2)
    noise_img = keras.backend.variable(value=random_, dtype=tf.float32, name="noise_input")

    # compute feature maps of initial noise map
    vgg = vgg_16.VGG16()
    vgg.build(noise_img)

    noise_layers_list = dict({0: vgg.conv1_1, 1: vgg.conv1_2, 2: vgg.pool1,
                              3: vgg.conv2_1, 4: vgg.conv2_2, 5: vgg.pool2,
                              6: vgg.conv3_1, 7: vgg.conv3_2, 8: vgg.conv3_3, 9: vgg.pool3,
                              10: vgg.conv4_1, 11: vgg.conv4_2, 12: vgg.conv4_3, 13: vgg.pool4,
                              14: vgg.conv5_1, 15: vgg.conv5_2, 16: vgg.conv5_3, 17: vgg.pool5})
Esempio n. 9
0
#read data.txt
f = open('input/data.txt')
line = f.readline().strip('\n')
docs = []
n_docs = int(line)
line = f.readline().strip('\n')
while line:
    docs.append(line)
    # print line
    line = f.readline().strip('\n')
f.close()

# Collapsed Gibbs Sampling Derivation for LDA

new_docs = utils.pre_processing(docs)
lls = []
timecosts = []

n_iter = 100
max_topics = 21
# n_topics = 3
import datetime
for n_topics in range(3, max_topics):
    print '======================= n_topics: {} ============================'.format(n_topics)
    startime = datetime.datetime.now()

    # start
    mylda = lda.LDA(
        docs=new_docs,
        n_topics=n_topics,
def svd_more_more(mat,
                  mat_file,
                  gamma1=0.007,
                  gamma2=0.007,
                  gamma3=0.001,
                  l_reg2=100,
                  l_reg6=0.005,
                  l_reg7=0.015,
                  f=50):
    # subsample the matrix to make computation faster
    mat = mat[0:mat.shape[0] // 128, 0:mat.shape[1] // 128]
    mat = mat[mat.getnnz(1) > 0][:, mat.getnnz(0) > 0]

    print(mat.shape)
    no_users = mat.shape[0]
    no_movies = mat.shape[1]

    bu_index, bi_index = pre_processing(mat, mat_file)

    # Init parameters
    bu = np.random.rand(no_users, 1) * 2 - 1
    bi = np.random.rand(1, no_movies) * 2 - 1
    qi = np.random.rand(no_movies, f) * 2 - 1
    pu = np.random.rand(no_users, f) * 2 - 1
    yj = np.random.rand(no_movies, f) * 2 - 1

    mu = mat.data[:].mean()

    # Train
    print("Train...")
    n_iter = 200
    cx = mat.tocoo()
    for it in range(n_iter):
        for u, i, v in zip(cx.row, cx.col, cx.data):
            N_u = bi_index[u]
            e_ui = compute_e_ui(mat, u, i, mu, bu, bi, qi, pu, N_u, yj)

            bu[u] += gamma1 * (e_ui - l_reg6 * bu[u])
            bi[0, i] += gamma1 * (e_ui - l_reg6 * bi[0, i])
            qi[i] += gamma2 * (e_ui *
                               (pu[u] + 1 / sqrt(len(N_u)) * yj[N_u].sum(0)) -
                               l_reg7 * qi[i])
            pu[u] += gamma2 * (e_ui * qi[i] - l_reg7 * pu[u])
            yj[N_u] += gamma2 * (e_ui * 1 / sqrt(len(N_u)) * qi[i] -
                                 l_reg7 * yj[N_u])
        gamma1 *= 0.9
        gamma2 *= 0.9

        if it % 10 == 0:
            print(it, "\ ", n_iter)
            print("compute loss...")
            print(
                compute_loss(mat,
                             mu,
                             bu,
                             bi,
                             qi,
                             pu,
                             N_u,
                             yj,
                             l_reg6=l_reg6,
                             l_reg7=l_reg7))

    return bu, bi, qi, pu, yj
Esempio n. 11
0
def main(args):

    # Device Configuration #
    device = torch.device(
        f'cuda:{args.gpu_num}' if torch.cuda.is_available() else 'cpu')

    # Fix Seed for Reproducibility #
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    # Samples, Plots, Weights and CSV Path #
    paths = [
        args.samples_path, args.weights_path, args.csv_path,
        args.inference_path
    ]
    for path in paths:
        make_dirs(path)

    # Prepare Data #
    data = pd.read_csv(args.data_path)[args.column]

    # Prepare Data #
    scaler_1 = StandardScaler()
    scaler_2 = StandardScaler()
    preprocessed_data = pre_processing(data, scaler_1, scaler_2, args.constant,
                                       args.delta)

    train_X, train_Y, test_X, test_Y = prepare_data(data, preprocessed_data,
                                                    args)

    train_X = moving_windows(train_X, args.ts_dim)
    train_Y = moving_windows(train_Y, args.ts_dim)

    test_X = moving_windows(test_X, args.ts_dim)
    test_Y = moving_windows(test_Y, args.ts_dim)

    # Prepare Networks #
    if args.model == 'conv':
        D = ConvDiscriminator(args.ts_dim).to(device)
        G = ConvGenerator(args.latent_dim, args.ts_dim).to(device)

    elif args.model == 'lstm':
        D = LSTMDiscriminator(args.ts_dim).to(device)
        G = LSTMGenerator(args.latent_dim, args.ts_dim).to(device)

    else:
        raise NotImplementedError

    #########
    # Train #
    #########

    if args.mode == 'train':

        # Loss Function #
        if args.criterion == 'l2':
            criterion = nn.MSELoss()

        elif args.criterion == 'wgangp':
            pass

        else:
            raise NotImplementedError

        # Optimizers #
        if args.optim == 'sgd':
            D_optim = torch.optim.SGD(D.parameters(), lr=args.lr, momentum=0.9)
            G_optim = torch.optim.SGD(G.parameters(), lr=args.lr, momentum=0.9)

        elif args.optim == 'adam':
            D_optim = torch.optim.Adam(D.parameters(),
                                       lr=args.lr,
                                       betas=(0., 0.9))
            G_optim = torch.optim.Adam(G.parameters(),
                                       lr=args.lr,
                                       betas=(0., 0.9))

        else:
            raise NotImplementedError

        D_optim_scheduler = get_lr_scheduler(D_optim, args)
        G_optim_scheduler = get_lr_scheduler(G_optim, args)

        # Lists #
        D_losses, G_losses = list(), list()

        # Train #
        print(
            "Training Time Series GAN started with total epoch of {}.".format(
                args.num_epochs))

        for epoch in range(args.num_epochs):

            # Initialize Optimizers #
            G_optim.zero_grad()
            D_optim.zero_grad()

            #######################
            # Train Discriminator #
            #######################

            if args.criterion == 'l2':
                n_critics = 1
            elif args.criterion == 'wgangp':
                n_critics = 5

            for j in range(n_critics):
                series, start_dates = get_samples(train_X, train_Y,
                                                  args.batch_size)

                # Data Preparation #
                series = series.to(device)
                noise = torch.randn(args.batch_size, 1,
                                    args.latent_dim).to(device)

                # Adversarial Loss using Real Image #
                prob_real = D(series.float())

                if args.criterion == 'l2':
                    real_labels = torch.ones(prob_real.size()).to(device)
                    D_real_loss = criterion(prob_real, real_labels)

                elif args.criterion == 'wgangp':
                    D_real_loss = -torch.mean(prob_real)

                # Adversarial Loss using Fake Image #
                fake_series = G(noise)
                prob_fake = D(fake_series.detach())

                if args.criterion == 'l2':
                    fake_labels = torch.zeros(prob_fake.size()).to(device)
                    D_fake_loss = criterion(prob_fake, fake_labels)

                elif args.criterion == 'wgangp':
                    D_fake_loss = torch.mean(prob_fake)
                    D_gp_loss = args.lambda_gp * get_gradient_penalty(
                        D, series.float(), fake_series.float(), device)

                # Calculate Total Discriminator Loss #
                D_loss = D_fake_loss + D_real_loss

                if args.criterion == 'wgangp':
                    D_loss += args.lambda_gp * D_gp_loss

                # Back Propagation and Update #
                D_loss.backward()
                D_optim.step()

            ###################
            # Train Generator #
            ###################

            # Adversarial Loss #
            fake_series = G(noise)
            prob_fake = D(fake_series)

            # Calculate Total Generator Loss #
            if args.criterion == 'l2':
                real_labels = torch.ones(prob_fake.size()).to(device)
                G_loss = criterion(prob_fake, real_labels)

            elif args.criterion == 'wgangp':
                G_loss = -torch.mean(prob_fake)

            # Back Propagation and Update #
            G_loss.backward()
            G_optim.step()

            # Add items to Lists #
            D_losses.append(D_loss.item())
            G_losses.append(G_loss.item())

            # Adjust Learning Rate #
            D_optim_scheduler.step()
            G_optim_scheduler.step()

            # Print Statistics, Save Model Weights and Series #
            if (epoch + 1) % args.log_every == 0:

                # Print Statistics and Save Model #
                print("Epochs [{}/{}] | D Loss {:.4f} | G Loss {:.4f}".format(
                    epoch + 1, args.num_epochs, np.average(D_losses),
                    np.average(G_losses)))
                torch.save(
                    G.state_dict(),
                    os.path.join(
                        args.weights_path,
                        'TS_using{}_and_{}_Epoch_{}.pkl'.format(
                            G.__class__.__name__, args.criterion.upper(),
                            epoch + 1)))

                # Generate Samples and Save Plots and CSVs #
                series, fake_series = generate_fake_samples(
                    test_X, test_Y, G, scaler_1, scaler_2, args, device)
                plot_series(series, fake_series, G, epoch, args,
                            args.samples_path)
                make_csv(series, fake_series, G, epoch, args, args.csv_path)

    ########
    # Test #
    ########

    elif args.mode == 'test':

        # Load Model Weights #
        G.load_state_dict(
            torch.load(
                os.path.join(
                    args.weights_path, 'TS_using{}_and_{}_Epoch_{}.pkl'.format(
                        G.__class__.__name__, args.criterion.upper(),
                        args.num_epochs))))

        # Lists #
        real, fake = list(), list()

        # Inference #
        for idx in range(0, test_X.shape[0], args.ts_dim):

            # Do not plot if the remaining data is less than time dimension #
            end_ix = idx + args.ts_dim

            if end_ix > len(test_X) - 1:
                break

            # Prepare Data #
            test_data = test_X[idx, :]
            test_data = np.expand_dims(test_data, axis=0)
            test_data = np.expand_dims(test_data, axis=1)
            test_data = torch.from_numpy(test_data).to(device)

            start = test_Y[idx, 0]

            noise = torch.randn(args.val_batch_size, 1,
                                args.latent_dim).to(device)

            # Generate Fake Data #
            with torch.no_grad():
                fake_series = G(noise)

            # Convert to Numpy format for Saving #
            test_data = np.squeeze(test_data.cpu().data.numpy())
            fake_series = np.squeeze(fake_series.cpu().data.numpy())

            test_data = post_processing(test_data, start, scaler_1, scaler_2,
                                        args.delta)
            fake_series = post_processing(fake_series, start, scaler_1,
                                          scaler_2, args.delta)

            real += test_data.tolist()
            fake += fake_series.tolist()

        # Plot, Save to CSV file and Derive Metrics #
        plot_series(real, fake, G, args.num_epochs - 1, args,
                    args.inference_path)
        make_csv(real, fake, G, args.num_epochs - 1, args, args.inference_path)
        derive_metrics(real, fake, args)

    else:
        raise NotImplementedError
Esempio n. 12
0
def generate_timeseries(args):

    # Device Configuration #
    device = torch.device(
        f'cuda:{args.gpu_num}' if torch.cuda.is_available() else 'cpu')

    # Inference Path #
    make_dirs(args.inference_path)

    # Prepare Generator #
    if args.model == 'skip':
        G = SkipGenerator(args.latent_dim, args.ts_dim,
                          args.conditional_dim).to(device)
        G.load_state_dict(
            torch.load(
                os.path.join(
                    args.weights_path,
                    'TimeSeries_Generator_using{}_Epoch_{}.pkl'.format(
                        args.criterion.upper(), args.num_epochs))))

    else:
        raise NotImplementedError

    # Prepare Data #
    data = pd.read_csv(args.data_path)[args.column]

    scaler_1 = StandardScaler()
    scaler_2 = StandardScaler()

    preprocessed_data = pre_processing(data, scaler_1, scaler_2, args.delta)

    X = moving_windows(preprocessed_data, args.ts_dim)
    label = moving_windows(data.to_numpy(), args.ts_dim)

    # Lists #
    real, fake = list(), list()

    # Inference #
    for idx in range(0, data.shape[0], args.ts_dim):

        end_ix = idx + args.ts_dim

        if end_ix > len(data) - 1:
            break

        samples = X[idx, :]
        samples = np.expand_dims(samples, axis=0)
        samples = np.expand_dims(samples, axis=1)

        samples = torch.from_numpy(samples).to(device)
        start_dates = label[idx, 0]

        noise = torch.randn(args.val_batch_size, 1, args.latent_dim).to(device)

        with torch.no_grad():
            fake_series = G(noise)
        fake_series = torch.cat((samples[:, :, :args.conditional_dim].float(),
                                 fake_series.float()),
                                dim=2)

        samples = np.squeeze(samples.cpu().data.numpy())
        fake_series = np.squeeze(fake_series.cpu().data.numpy())

        samples = post_processing(samples, start_dates, scaler_1, scaler_2,
                                  args.delta)
        fake_series = post_processing(fake_series, start_dates, scaler_1,
                                      scaler_2, args.delta)

        real += samples.tolist()
        fake += fake_series.tolist()

    plot_sample(real, fake, args.num_epochs - 1, args)
    make_csv(real, fake, args.num_epochs - 1, args)
Esempio n. 13
0
    def generate_episode(self, environment, e, desired_return, desired_horizon,
                         testing):

        if environment == "Catch-v0":
            env = catch.CatchEnv()
        elif environment == "Catch-v2":
            self.environment = catch_v2.CatchEnv()
        elif environment == "Catch-v3":
            self.environment = catch_v3.CatchEnv()
        elif environment == "Catch-v4":
            self.environment = catch_v4.CatchEnv()

        tot_rewards = []

        done = False
        dead = False

        scores = []
        states = []
        actions = []
        rewards = []

        step, score, start_life = 0, 0, 5

        observe = env.reset()
        observe, _, _ = env.step(1)

        state = utils.pre_processing(observe)
        history = np.stack((state, state, state, state), axis=2)
        history = np.reshape([history], (1, 84, 84, 4))

        while not done:
            states.append(history)

            command = np.asarray([
                desired_return * self.return_scale,
                desired_horizon * self.horizon_scale
            ])
            command = np.reshape(command, [1, len(command)])

            if not testing:
                action = self.get_action(history, command)
                actions.append(action)
            else:
                action = self.get_greedy_action(history, command)

            next_state, reward, done = env.step(action)
            next_state = utils.pre_processing(observe)
            next_state = np.reshape([next_state], (1, 84, 84, 1))
            next_history = np.append(next_state, history[:, :, :, :3], axis=3)

            score += reward
            history = next_history

            desired_return -= reward  # Line 8 Algorithm 2
            desired_horizon -= 1  # Line 9 Algorithm 2
            desired_horizon = np.maximum(desired_horizon, 1)

        self.memory.add_sample(states, actions, rewards)
        self.testing_rewards.append(score)

        if testing:
            print('Querying the model ...')
            print('Testing score: {}'.format(score))

            return score
Esempio n. 14
0
# -*- coding:utf-8 -*-

import os
import utils
import jieba_cut
import pandas as pd
import time
import model_utils

if not os.path.exists('./results'):
    os.makedirs('./results')
t0 = time.time()
print('============================文本前处理开始============================')
data = pd.read_csv(
    '/Users/shen-pc/Desktop/WORK/ITS/KR2/LSD_data/problem_0528.csv')
data_proc = utils.pre_processing(data)
data_proc.to_csv('./results/problem_0528_preprocessing.csv')
t1 = time.time()
print('文本前处理耗时:', (t1 - t0) / 60, 'min')
print('============================文本前处理over============================',
      '\n\n')
'''
------------------------------------------------------------------------------------------------------------------------
'''

print('============================分词开始============================')
data_cut = jieba_cut.cut(data_proc)
data_cut.to_csv('./results/problem_0528_jieba.csv')
t2 = time.time()
print('分词耗时:', (t2 - t1) / 60, 'min')
print('============================分词over============================', '\n\n')
Esempio n. 15
0
def train(data_conf, model_conf, **kwargs):

    try:
        print("-----------------------------------")
        print("Starting Cashflow DL Model Training")
        print("-----------------------------------")
        print()

        # ==============================
        # 0. Main parameters definitions
        # ==============================

        # Size of X and y arrays definition
        N_days_X, N_days_y = int(data_conf['number_of_historical_days']), int(
            data_conf['number_of_predicted_days'])  #365, 92
        print('Number of days used for prediction (X): ', N_days_X)
        print('Number of days predicted (y): ', N_days_y)
        print()

        # Date range definition
        start_date, end_date = data_conf['start_date'], data_conf['end_date']
        import utils as utils
        start_date_dt, end_date_dt, start_date_prediction, end_date_prediction, end_date_plusOneDay, end_date_minus_6month = utils.dates_definitions(
            start_date, end_date, N_days_X, N_days_y)
        print('Date range: ', start_date, end_date)
        print()

        model_name = model_conf['model_name']

    except Exception as e:
        print("Errored on initialization")
        print("Exception Trace: {0}".format(e))
        print(traceback.format_exc())
        raise e

    try:
        # ========================================
        # T.1 Pre-processing before model training
        # ========================================

        # Loading dataset
        table_in = data_conf[environment]['table_to_train_on']
        #ts_balance = spark.read.parquet("/mnt/test/{0}.parquet".format(table_in)).cache()
        ts_balance = spark.read.format("delta").load(
            "/mnt/delta/{0}".format(table_in))

        # Cleaning of the time series
        ts_balance = ts_balance.withColumn(
            'balance', ts_balance.balance.cast("array<float>"))

        # DOES NOT WORK WITH DATABRICKS CONNECT THAT WAY (maybe I need to register the UDF!)
        #ts_balance = ts_balance.withColumn('keep_ts', F.udf(lambda x,y: utils.time_series_cleaning(x,y), "int")('balance', F.lit(20)))  #at least 10 transactions in the ts, to be used in the training
        #ts_balance = ts_balance.where('keep_ts == 1')

        # Creating the dataset on which we train (and test and validate) the model
        ts_balance_model = ts_balance.sample(
            False, 0.7,
            seed=0)  #now 0.7, but in real case would be 0.1 at best... or 0.05
        print('ts_balance_model.count()', ts_balance_model.count())

        # Pre-processing before model training
        import utils as utils
        ts_balance_model = utils.pre_processing(ts_balance_model,
                                                end_date,
                                                spark,
                                                serving=False)
        ts_balance_model.show(3)

        print('ts_balance_model.rdd.getNumPartitions()',
              ts_balance_model.rdd.getNumPartitions())
        ts_balance_model.show(3)

        # Saving prepared dataset
        table_out = 'cashflow_training_step1'
        #ts_balance_model.write.format("parquet").mode("overwrite").save("/mnt/test/{0}.parquet".format(table_out))
        ts_balance_model.write.format("delta").mode("overwrite").save(
            "/mnt/delta/{0}".format(table_out))

    except Exception as e:
        print("Errored on step T.1: pre-processing before model training")
        print("Exception Trace: {0}".format(e))
        print(traceback.format_exc())
        raise e
Esempio n. 16
0
import model_utils
import utils
import jieba_cut
import pandas as pd

data = pd.read_csv(
    '/Users/shen-pc/Desktop/WORK/ITS/My method/results/problem_0528_jieba.csv',
    index_col=0)
sim1 = model_utils.cal_cos_sim(data.iloc[5322]['id'], data.iloc[5323]['id'])
print('原model的结果[5322 vs 5323]=', sim1)
# 来几道新题试一下
data_new = pd.read_csv('/Users/shen-pc/Desktop/WORK/ITS/data/real_item.csv',
                       index_col=0)
data_new.rename(columns={'problem_id': 'id'}, inplace=True)
data_new = data_new.loc[:20]
data_new = utils.pre_processing(data_new)
data_new = jieba_cut.cut(data_new)
# 加入新数据进行训练:
model_new = model_utils.train(data_new)
sim2 = model_utils.cal_cos_sim(data.iloc[5322]['id'], data.iloc[5323]['id'])
sim3 = model_utils.cal_cos_sim(data.iloc[100]['id'], data.iloc[1000]['id'])
print('新model的结果[5322 vs 5323]=', sim2)
print('新model的结果[100 vs 1000]=', sim3)
# 涉及原本没有的题目:
sim4 = model_utils.cal_cos_sim(data.iloc[0]['id'], data_new.iloc[0]['id'])
print('新model的结果[old 0 vs new 0]=', sim4)

# 最相似:
most1 = model_utils.most_similar(data.iloc[0]['id'])
most2 = model_utils.most_similar(data_new.iloc[0]['id'])
print('\n\n\n\n', data.loc[0, 'cut'], '\n', most1, '\n\n')
Esempio n. 17
0
    
    save_path = 'crnn_overratio_%1.1f_'%OERT_RATIO+utils.time_for_saving()
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    
    #get the original data, including segmenting signals,forming the labels
    
    if os.path.exists(excel_path):
        df=pd.read_excel(excel_path,dtype={'Name':str, 'Value':float})
        
    else:
        raise FileNotFoundError('Please contact the authorts for the dataset')
        
    recording_list = df.to_dict('index')
    recording_list,max_len, max_sig_len = utils.pre_processing(recording_list)
    
    fold_data = utils.get_fold_info(recording_list, OERT_RATIO,experiment = 'seq')
    fold_results = {}
    
    for fold_number in range(10):
        
        model_para = {'over_ratio': OERT_RATIO,
                  'total_epoch': TOTAL_EPOCH,
                  'CNN_channel': CNN_CH,
                  'RNN_channel': RNN_CH,
                  'filter_size': FILTER_SIZE,
                  'pooling_size': POOLING_SIZE,
                  'batch_size': BATCH_SIZE,
                  'fc_channel':FC_CH,
                  'l2_c':L2_LAMBDA,
Esempio n. 18
0
def test(rank, params, shared_model, count, lock):
    logging.basicConfig(filename='./2blocks_rew.log', level=logging.INFO)
    ptitle('Test Process: {}'.format(rank))
    gpu_id = params.gpu_ids_test[rank % len(params.gpu_ids_test)]

    env = Env(True, 1, down_period=2)

    # model = A3C()
    model = A3C_LSTM()
    with torch.cuda.device(gpu_id):
        model = model.cuda()
    agent = run_agent(model, gpu_id)

    episode = 0
    while episode <= params.episode_test:
        env.reset()
        with lock:
            n_update = count.value
        agent.synchronize(shared_model)

        num_steps = 0
        accumulated_reward = 0
        nAction = 0
        line1 = 0
        line2 = 0
        line3 = 0
        line4 = 0
        nMove = 0
        rew_height = 0
        rew_move = 0

        while True:
            num_steps += 1

            obs = pre_processing(env.shadow_map,
                                 env._get_curr_block_pos())  # env.map
            action = agent.action_test(obs)
            if action == 5:
                action = 100000
            rew, shadow_reward, done, putting, height = env.step(
                action)  # what is the 'is_new_block'?
            if rew == 0.0 and action != 3 and action != 4:
                nMove += 1
                if nMove < 6:
                    rew_move += 0.2
                if putting:
                    rew_height += -(height / 20.0)
                    nMove = 0

            if rew == 1.0:
                line1 += 1
            elif rew == 8.0:
                line2 += 1
            elif rew == 27.0:
                line3 += 1
            elif rew == 64:
                line4 += 1
            '''
            if nAction < 9:
                obs = pre_processing(env.map, env._get_curr_block_pos())
                action = agent.action_test(obs)
                rew, shadow_reward, is_new_block = env.step(action)     # what is the 'is_new_block'?
                nAction += 1

            else:
                rew, is_new_block = env.step(100000)  # falling
                nAction = 0
            '''

            accumulated_reward = rew + rew_move + rew_height

            if env.is_game_end():
                episode += 1
                print(" ".join([
                    "-------------episode stats-------------\n",
                    "nUpdate: {}\n".format(n_update),
                    "line1: {}\n".format(line1), "line2: {}\n".format(line2),
                    "line3: {}\n".format(line3),
                    "line4: {}\n".format(line4), "all_lines: {}\n".format(
                        str(line1 + line2 + line3 + line4)),
                    "score: {}\n".format(env.score),
                    "rew_move: {}\n".format(rew_move),
                    "rew_height: {}\n".format(rew_height),
                    "steps: {}\n".format(num_steps)
                ]))
                logging.info(" ".join([
                    "-------------episode stats-------------\n",
                    "nUpdate: {}\n".format(n_update),
                    "line1: {}\n".format(line1), "line2: {}\n".format(line2),
                    "line3: {}\n".format(line3),
                    "line4: {}\n".format(line4), "all_lines: {}\n".format(
                        str(line1 + line2 + line3 + line4)),
                    "score: {}\n".format(env.score),
                    "rew_move: {}\n".format(rew_move),
                    "rew_height: {}\n".format(rew_height),
                    "steps: {}\n".format(num_steps)
                ]))
                break

            if env.score > 1000:
                episode += 1
                print(" ".join([
                    "-------------episode stats-------------\n",
                    "nUpdate: {}\n".format(n_update),
                    "line1: {}\n".format(line1), "line2: {}\n".format(line2),
                    "line3: {}\n".format(line3),
                    "line4: {}\n".format(line4), "all_lines: {}\n".format(
                        str(line1 + line2 + line3 + line4)),
                    "score: {}\n".format(env.score),
                    "rew_move: {}\n".format(rew_move),
                    "rew_height: {}\n".format(rew_height),
                    "steps: {}\n".format(num_steps)
                ]))
                with torch.cuda.device(gpu_id):
                    torch.save(agent.model.state_dict(),
                               './weight/model' + str(n_update) + '.ckpt')
                logging.info(" ".join([
                    "-------------episode stats-------------\n",
                    "nUpdate: {}\n".format(n_update),
                    "line1: {}\n".format(line1), "line2: {}\n".format(line2),
                    "line3: {}\n".format(line3),
                    "line4: {}\n".format(line4), "all_lines: {}\n".format(
                        str(line1 + line2 + line3 + line4)),
                    "score: {}\n".format(env.score),
                    "rew_move: {}\n".format(rew_move),
                    "rew_height: {}\n".format(rew_height),
                    "steps: {}\n".format(num_steps)
                ]))
                break
def correlation_based_implicit_neighbourhood_model_vectorized(mat, mat_file, l_reg=0.002, gamma=0.005, l_reg2=100.0, k=250):
    gamma /= 100

    # subsample the matrix to make computation faster
    mat = mat[0:mat.shape[0]//128, 0:mat.shape[1]//128]
    mat = mat[mat.getnnz(1)>0][:, mat.getnnz(0)>0]

    print(mat.shape)
    no_users = mat.shape[0]
    no_movies = mat.shape[1]
    no_users_entries = np.array((mat != 0).sum(1))
    no_movies_entries = np.array((mat != 0).sum(0))    

    #baseline_bu, baseline_bi = baseline_estimator(mat)
    # We should call baseline_estimator but we can init at random for testing
    baseline_bu, baseline_bi = np.random.rand(no_users, 1)  * 2 - 1, np.random.rand(1, no_movies) * 2 - 1    

    bu_index, bi_index = pre_processing(mat, mat_file)
    
    bu = np.random.rand(no_users, 1)  * 2 - 1
    bi = np.random.rand(1, no_movies) * 2 - 1
    wij = np.random.rand(no_movies, no_movies) * 2 - 1
    cij = np.random.rand(no_movies, no_movies) * 2 - 1

    mu = mat.data[:].mean()

    # Compute similarity matrix
    N = sparse.csr_matrix(mat).copy()
    N.data[:] = 1
    S = sparse.csr_matrix.dot(N.T, N)
    S.data[:] = S.data[:] / (S.data[:] + l_reg2)
    S = S * compute_sparse_correlation_matrix(mat)

    Rk = []
    cx = mat.tocoo()
    for u,i,v in zip(cx.row, cx.col, cx.data):
        Rk.append((u, i, np.flip(np.argsort(S[i,].toarray()))[:k].ravel()))

    # Train
    print("Train...")
    n_iter = 200
    for it in range(n_iter):
        t0 = time() 

        e = compute_e_vectorized(mat, mu, bu, bi, Rk, wij, Rk, cij, baseline_bu, baseline_bi)
        # Vectorized operations
        bu += gamma * (e.sum(1) - no_users_entries * l_reg * bu)
        bi += gamma * (e.sum(0) - no_movies_entries * l_reg * bi)

        # TODO: vectorize the following
        for u, i, Rk_iu in Rk:
            Nk_iu = Rk_iu
            e_ui = e[u, i]
            buj = mu + baseline_bu[u] + baseline_bi[0, Rk_iu]
            wij[i][Rk_iu] += gamma * ( 1 / sqrt(len(Rk_iu)) * e_ui * (mat[u, Rk_iu].toarray().ravel() - buj) - l_reg * wij[i][Rk_iu] )
            cij[i][Nk_iu] += gamma * ( 1 / sqrt(len(Nk_iu)) * e_ui - l_reg * cij[i][Nk_iu] )
        gamma *= 0.99

        if it % 10 == 0:
          t1 = time()
          print(it, "\ ", n_iter, "(%.2g sec)" % (t1 - t0))       
          print("compute loss...")
          print(compute_loss_vectorized(mat, mu, bu, bi, Rk, wij, Rk, cij, baseline_bu, baseline_bi, l_reg=l_reg))  

    return bu, bi, wij, cij
Esempio n. 20
0
def train(args):

    # Device Configuration #
    device = torch.device(
        f'cuda:{args.gpu_num}' if torch.cuda.is_available() else 'cpu')

    # Fix Seed for Reproducibility #
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    # Samples, Plots, Weights and CSV Path #
    paths = [
        args.samples_path, args.plots_path, args.weights_path, args.csv_path
    ]
    for path in paths:
        make_dirs(path)

    # Prepare Data #
    data = pd.read_csv(args.data_path)[args.column]

    # Pre-processing #
    scaler_1 = StandardScaler()
    scaler_2 = StandardScaler()
    preprocessed_data = pre_processing(data, scaler_1, scaler_2, args.delta)

    X = moving_windows(preprocessed_data, args.ts_dim)
    label = moving_windows(data.to_numpy(), args.ts_dim)

    # Prepare Networks #
    D = Discriminator(args.ts_dim).to(device)
    G = Generator(args.latent_dim, args.ts_dim,
                  args.conditional_dim).to(device)

    # Loss Function #
    if args.criterion == 'l2':
        criterion = nn.MSELoss()
    elif args.criterion == 'wgangp':
        pass
    else:
        raise NotImplementedError

    # Optimizers #
    D_optim = torch.optim.Adam(D.parameters(), lr=args.lr, betas=(0.5, 0.9))
    G_optim = torch.optim.Adam(G.parameters(), lr=args.lr, betas=(0.5, 0.9))

    D_optim_scheduler = get_lr_scheduler(D_optim, args)
    G_optim_scheduler = get_lr_scheduler(G_optim, args)

    # Lists #
    D_losses, G_losses = list(), list()

    # Train #
    print("Training Time Series GAN started with total epoch of {}.".format(
        args.num_epochs))

    for epoch in range(args.num_epochs):

        # Initialize Optimizers #
        G_optim.zero_grad()
        D_optim.zero_grad()

        if args.criterion == 'l2':
            n_critics = 1
        elif args.criterion == 'wgangp':
            n_critics = 5

        #######################
        # Train Discriminator #
        #######################

        for j in range(n_critics):
            series, start_dates = get_samples(X, label, args.batch_size)

            # Data Preparation #
            series = series.to(device)
            noise = torch.randn(args.batch_size, 1, args.latent_dim).to(device)

            # Adversarial Loss using Real Image #
            prob_real = D(series.float())

            if args.criterion == 'l2':
                real_labels = torch.ones(prob_real.size()).to(device)
                D_real_loss = criterion(prob_real, real_labels)

            elif args.criterion == 'wgangp':
                D_real_loss = -torch.mean(prob_real)

            # Adversarial Loss using Fake Image #
            fake_series = G(noise)
            fake_series = torch.cat(
                (series[:, :, :args.conditional_dim].float(),
                 fake_series.float()),
                dim=2)

            prob_fake = D(fake_series.detach())

            if args.criterion == 'l2':
                fake_labels = torch.zeros(prob_fake.size()).to(device)
                D_fake_loss = criterion(prob_fake, fake_labels)

            elif args.criterion == 'wgangp':
                D_fake_loss = torch.mean(prob_fake)
                D_gp_loss = args.lambda_gp * get_gradient_penalty(
                    D, series.float(), fake_series.float(), device)

            # Calculate Total Discriminator Loss #
            D_loss = D_fake_loss + D_real_loss

            if args.criterion == 'wgangp':
                D_loss += args.lambda_gp * D_gp_loss

            # Back Propagation and Update #
            D_loss.backward()
            D_optim.step()

        ###################
        # Train Generator #
        ###################

        # Adversarial Loss #
        fake_series = G(noise)
        fake_series = torch.cat(
            (series[:, :, :args.conditional_dim].float(), fake_series.float()),
            dim=2)
        prob_fake = D(fake_series)

        # Calculate Total Generator Loss #
        if args.criterion == 'l2':
            real_labels = torch.ones(prob_fake.size()).to(device)
            G_loss = criterion(prob_fake, real_labels)

        elif args.criterion == 'wgangp':
            G_loss = -torch.mean(prob_fake)

        # Back Propagation and Update #
        G_loss.backward()
        G_optim.step()

        # Add items to Lists #
        D_losses.append(D_loss.item())
        G_losses.append(G_loss.item())

        ####################
        # Print Statistics #
        ####################

        print("Epochs [{}/{}] | D Loss {:.4f} | G Loss {:.4f}".format(
            epoch + 1, args.num_epochs, np.average(D_losses),
            np.average(G_losses)))

        # Adjust Learning Rate #
        D_optim_scheduler.step()
        G_optim_scheduler.step()

        # Save Model Weights and Series #
        if (epoch + 1) % args.save_every == 0:
            torch.save(
                G.state_dict(),
                os.path.join(
                    args.weights_path,
                    'TimeSeries_Generator_using{}_Epoch_{}.pkl'.format(
                        args.criterion.upper(), epoch + 1)))

            series, fake_series = generate_fake_samples(
                X, label, G, scaler_1, scaler_2, args, device)
            plot_sample(series, fake_series, epoch, args)
            make_csv(series, fake_series, epoch, args)

    print("Training finished.")
def integrated_model(mat,
                     mat_file,
                     gamma1=0.007,
                     gamma2=0.007,
                     gamma3=0.001,
                     l_reg2=100,
                     l_reg6=0.005,
                     l_reg7=0.015,
                     l_reg8=0.015,
                     k=300,
                     f=50):
    # subsample the matrix to make computation faster
    mat = mat[0:mat.shape[0] // 128, 0:mat.shape[1] // 128]
    mat = mat[mat.getnnz(1) > 0][:, mat.getnnz(0) > 0]

    print(mat.shape)
    no_users = mat.shape[0]
    no_movies = mat.shape[1]

    #baseline_bu, baseline_bi = baseline_estimator(mat)
    # We should call baseline_estimator but we can init at random for test
    baseline_bu, baseline_bi = np.random.rand(
        no_users, 1) * 2 - 1, np.random.rand(1, no_movies) * 2 - 1

    bu_index, bi_index = pre_processing(mat, mat_file)

    # Init parameters
    bu = np.random.rand(no_users, 1) * 2 - 1
    bi = np.random.rand(1, no_movies) * 2 - 1
    wij = np.random.rand(no_movies, no_movies) * 2 - 1
    cij = np.random.rand(no_movies, no_movies) * 2 - 1
    qi = np.random.rand(no_movies, f) * 2 - 1
    pu = np.random.rand(no_users, f) * 2 - 1
    yj = np.random.rand(no_movies, f) * 2 - 1

    mu = mat.data[:].mean()
    N = sparse.csr_matrix(mat).copy()
    N.data[:] = 1
    S = sparse.csr_matrix.dot(N.T, N)
    S.data[:] = S.data[:] / (S.data[:] + l_reg2)
    S = S * compute_sparse_correlation_matrix(mat)

    # Train
    print("Train...")
    n_iter = 200
    cx = mat.tocoo()
    for it in range(n_iter):
        for u, i, v in zip(cx.row, cx.col, cx.data):
            #Rk_iu = Nk_iu = bi_index[u]
            N_u = bi_index[u]
            Rk_iu = Nk_iu = np.flip(np.argsort(S[i, ].toarray()))[:k].ravel()
            e_ui = compute_e_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij,
                                baseline_bu, baseline_bi, qi, pu, N_u, yj)

            bu[u] += gamma1 * (e_ui - l_reg6 * bu[u])
            bi[0, i] += gamma1 * (e_ui - l_reg6 * bi[0, i])
            qi[i] += gamma2 * (e_ui *
                               (pu[u] + 1 / sqrt(len(N_u)) * yj[N_u].sum(0)) -
                               l_reg7 * qi[i])
            pu[u] += gamma2 * (e_ui * qi[i] - l_reg7 * pu[u])
            yj[N_u] += gamma2 * (e_ui * 1 / sqrt(len(N_u)) * qi[i] -
                                 l_reg7 * yj[N_u])
            buj = mu + baseline_bu[u] + baseline_bi[0, Rk_iu]
            wij[i][Rk_iu] += gamma3 * (1 / sqrt(len(Rk_iu)) * e_ui *
                                       (mat[u, Rk_iu].toarray().ravel() - buj)
                                       - l_reg8 * wij[i][Rk_iu])
            cij[i][Nk_iu] += gamma3 * (1 / sqrt(len(Nk_iu)) * e_ui -
                                       l_reg8 * cij[i][Nk_iu])
        gamma1 *= 0.9
        gamma2 *= 0.9
        gamma3 *= 0.9

        if it % 10 == 0:
            print(it, "\ ", n_iter)
            print("compute loss...")
            print(
                compute_loss(mat,
                             mu,
                             bu,
                             bi,
                             Rk_iu,
                             wij,
                             Nk_iu,
                             cij,
                             baseline_bu,
                             baseline_bi,
                             qi,
                             pu,
                             N_u,
                             yj,
                             l_reg6=l_reg6,
                             l_reg7=l_reg7,
                             l_reg8=l_reg8))

    return bu, bi, qi, pu, yj, wij, cij
    def generate_episode(self, environment, e, desired_return, desired_horizon,
                         testing):

        env = gym.make(environment)

        tot_rewards = []

        done = False
        dead = False

        scores = []
        states = []
        actions = []
        rewards = []

        step, score, start_life = 0, 0, 5

        observe = env.reset()
        for _ in range(random.randint(1, 30)):
            observe, _, _, _ = env.step(1)

        state = utils.pre_processing(observe)
        history = np.stack((state, state, state, state), axis=2)
        history = np.reshape([history], (1, 84, 84, 4))

        while not done:
            states.append(history)

            command = np.asarray([
                desired_return * self.return_scale,
                desired_horizon * self.horizon_scale
            ])
            command = np.reshape(command, [1, len(command)])

            if not testing:
                action = self.get_action(history, command)
                actions.append(action)
            else:
                action = self.get_greedy_action(history, command)

            if action == 0:
                real_action = 1
            elif action == 1:
                real_action = 2
            else:
                real_action = 3

            next_state, reward, done, info = env.step(real_action)
            next_state = utils.pre_processing(observe)
            next_state = np.reshape([next_state], (1, 84, 84, 1))
            next_history = np.append(next_state, history[:, :, :, :3], axis=3)

            clipped_reward = np.clip(reward, -1, 1)
            rewards.append(clipped_reward)

            score += reward

            if start_life > info['ale.lives']:
                dead = True
                start_life = info['ale.lives']

            if dead:
                dead = False
            else:
                history = next_history

            desired_return -= reward  # Line 8 Algorithm 2
            desired_horizon -= 1  # Line 9 Algorithm 2
            desired_horizon = np.maximum(desired_horizon, 1)

        self.memory.add_sample(states, actions, rewards)

        self.testing_rewards.append(score)

        if testing:
            print('Querying the model ...')
            print('Testing score: {}'.format(score))

        return score
Esempio n. 23
0
    make_sentences_vectors, make_similarity_matrix, apply_pagerank, ask_top_n_sentences_to_extract, extract_sentences

pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)

dataset_path = Path.cwd() / "data" / "Reviews.csv"
if __name__ == '__main__':
    dataset = pd.read_csv(dataset_path, nrows=100)
    dataset.drop_duplicates(subset=['Text'], inplace=True)
    dataset.dropna(axis=0, inplace=True)

    sentences_list = split_in_sentences(dataset['Text'])
    sentences_list = remove_html_tag(sentences_list)

    pre_processed_sentences = pre_processing(sentences_list)

    embedding_dimensionality = ask_embedding_dim()
    embeddings = get_word_embeddings(embedding_dimensionality)

    sents_vects = make_sentences_vectors(pre_processed_sentences, embeddings, int(embedding_dimensionality))

    similarity_matrix = make_similarity_matrix(sentences_list, sents_vects, int(embedding_dimensionality))

    pagerank_scores = apply_pagerank(similarity_matrix)

    number_sentences_to_extract = ask_top_n_sentences_to_extract()

    for ex_sent in extract_sentences(number_sentences_to_extract, sentences_list, pagerank_scores):
        print(ex_sent, "\n")
Esempio n. 24
0
saved_path = 'trained_models'
n_action = 2

torch.manual_seed(123)

N = 45000

estimator = DQN(n_action)
if N > 0:
    estimator.model = torch.load(f"{saved_path}/model_{N}.pth")

memory = deque(maxlen=memory_size)

env = FlappyBird()
image, reward, is_done = env.next_step(0)
image = pre_processing(image[:screen_width, :int(env.base_y)], image_size,
                       image_size)
image = torch.from_numpy(image)
state = torch.cat(tuple(image for _ in range(4)))[None, :, :, :]

for iter in tqdm(range(N, n_iter), initial=N, total=n_iter):
    epsilon = final_epsilon + (n_iter - iter) * (init_epsilon -
                                                 final_epsilon) / n_iter
    policy = gen_epsilon_greedy_policy(estimator, epsilon, n_action)
    action = policy(state)
    next_image, reward, is_done = env.next_step(action)
    next_image = pre_processing(next_image[:screen_width, :int(env.base_y)],
                                image_size, image_size)
    next_image = torch.from_numpy(next_image)
    next_state = torch.cat((state[0, 1:, :, :], next_image))[None, :, :, :]
    memory.append([state, action, next_state, reward, is_done])
    loss = estimator.replay(memory, batch_size, gamma)
Esempio n. 25
0
def run_loop(rank, params, shared_model, shared_optimizer, count, lock):
    ptitle('Training Process: {}'.format(rank))
    gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)]

    env = Env(False, 1, down_period=2)

    # model = A3C()
    model = A3C_LSTM()
    with torch.cuda.device(gpu_id):
        model = model.cuda()
    agent = run_agent(model, gpu_id)

    episode = 0
    while episode <= params.episode:
        env.reset()
        agent.done = False
        num_steps = 0

        agent.synchronize(shared_model)
        nAction = 0

        nMove = 0

        while True:
            num_steps += 1
            # random_action = random.randrange(0, 5)
            '''
            if nAction < 9:
                obs = pre_processing(env.map, env._get_curr_block_pos())
                action, value, log_prob, entropy = agent.action_train(obs)
                rew, is_new_block = env.step(action)     # what is the 'is_new_block'?
                nAction += 1
                if nAction != 9:
                    rew = np.clip(rew, 0.0, 64.0)
                    agent.put_reward(rew, value, log_prob, entropy)
            else:
                rew, is_new_block = env.step(100000)  # falling
                rew = np.clip(rew, 0.0, 64.0)
                agent.put_reward(rew, value, log_prob, entropy)
                nAction = 0
            '''
            obs = pre_processing(env.shadow_map, env._get_curr_block_pos())   # env.map
            action, value, log_prob, entropy = agent.action_train(obs)
            if action == 5:
                action = 100000
            rew, shadow_rew, done, putting, height = env.step(action)  # what is the 'is_new_block'?
            rew = np.clip(rew, -1.0, 64.0)
            if rew == 0.0 and action != 3 and action != 4:
                nMove += 1
                if nMove < 6:
                    rew = 0.2
                if putting:
                    rew = - (height / 20.0)
                    nMove = 0
            agent.put_reward(rew, value, log_prob, entropy)

            # pdb.set_trace()
            if env.is_game_end():
                episode += 1
                agent.done = True

            # if num_steps % params.num_steps == 0:
            # if env.is_game_end() or rew >= 1.0:
            if env.is_game_end():
                next_obs = pre_processing(env.map, env._get_curr_block_pos())
                agent.training(next_obs, shared_model, shared_optimizer, params)
                with lock:  # synchronize vale of all process
                    count.value += 1

            if env.is_game_end():
                break