def test_network(self): model_path = self.args.save_dir + 'model.pt' self.deep_q_network.load_state_dict( torch.load(model_path, map_location=lambda storage, loc: storage)) self.deep_q_network.eval() while True: state = self.env.reset() state = pre_processing(state) # for the first state we need to stack them together.... state = np.stack((state, state, state, state), axis=0) # clear the rewrad_sum... pipe_sum = 0 # I haven't set a max step here, but you could set it... while True: self.env.render() state_tensor = torch.tensor(state).unsqueeze(0) with torch.no_grad(): _, _, actions = self.deep_q_network(state_tensor) # action...deterministic... action_selected = int(actions.data.numpy()[0]) state_, reward, done, _ = self.env.step( self.action_space[action_selected]) if reward > 0: pipe_sum += 1 # process the output state... state_ = pre_processing(state_) # concatenate them together... state_temp = state[0:3, :, :].copy() state_ = np.expand_dims(state_, 0) state_ = np.concatenate((state_, state_temp), axis=0) if done: break state = state_ print('In this episode, the bird totally pass ' + str(pipe_sum) + ' pipes!')
def warm_up_buffer(self): print('Warming up') for i in range(self.warm_up_episodes): states = [] rewards = [] actions = [] dead = False done = False desired_return = 1 desired_horizon = 1 step, score, start_life = 0, 0, 5 observe = self.environment.reset() observe, reward, terminal = self.environment.step(1) state = utils.pre_processing(observe) history = np.stack((state, state, state, state), axis=2) history = np.reshape([history], (1, 84, 84, 4)) while not done: states.append(history) command = np.asarray([ desired_return * self.return_scale, desired_horizon * self.horizon_scale ]) command = np.reshape(command, [1, len(command)]) action = self.get_action(history, command) actions.append(action) next_state, reward, done = self.environment.step(action) next_state = utils.pre_processing(observe) next_state = np.reshape([next_state], (1, 84, 84, 1)) next_history = np.append(next_state, history[:, :, :, :3], axis=3) rewards.append(reward) state = next_state history = next_history desired_return -= reward # Line 8 Algorithm 2 desired_horizon -= 1 # Line 9 Algorithm 2 desired_horizon = np.maximum(desired_horizon, 1) self.memory.add_sample(states, actions, rewards)
def correlation_based_implicit_neighbourhood_model(mat, mat_file, l_reg=0.002, gamma=0.005, l_reg2=100.0, k=250): # subsample the matrix to make computation faster mat = mat[0:mat.shape[0]//128, 0:mat.shape[1]//128] mat = mat[mat.getnnz(1)>0][:, mat.getnnz(0)>0] print(mat.shape) no_users = mat.shape[0] no_movies = mat.shape[1] #baseline_bu, baseline_bi = baseline_estimator(mat) # We should call baseline_estimator but we can init at random for test baseline_bu, baseline_bi = np.random.rand(no_users, 1) * 2 - 1, np.random.rand(1, no_movies) * 2 - 1 bu_index, bi_index = pre_processing(mat, mat_file) # Init parameters bu = np.random.rand(no_users, 1) * 2 - 1 bi = np.random.rand(1, no_movies) * 2 - 1 wij = np.random.rand(no_movies, no_movies) * 2 - 1 cij = np.random.rand(no_movies, no_movies) * 2 - 1 mu = mat.data[:].mean() # Compute similarity matrix N = sparse.csr_matrix(mat).copy() N.data[:] = 1 S = sparse.csr_matrix.dot(N.T, N) S.data[:] = S.data[:] / (S.data[:] + l_reg2) S = S * compute_sparse_correlation_matrix(mat) # Train print("Train...") n_iter = 200 cx = mat.tocoo() for it in range(n_iter): t0 = time() for u,i,v in zip(cx.row, cx.col, cx.data): #Rk_iu = Nk_iu = bi_index[u] Rk_iu = Nk_iu = np.flip(np.argsort(S[i,].toarray()))[:k].ravel() e_ui = compute_e_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi) bu[u] += gamma * (e_ui - l_reg * bu[u]) bi[0, i] += gamma * (e_ui - l_reg * bi[0, i]) buj = mu + baseline_bu[u] + baseline_bi[0, Rk_iu] wij[i][Rk_iu] += gamma * ( 1 / sqrt(len(Rk_iu)) * e_ui * (mat[u, Rk_iu].toarray().ravel() - buj) - l_reg * wij[i][Rk_iu] ) cij[i][Nk_iu] += gamma * ( 1 / sqrt(len(Nk_iu)) * e_ui - l_reg * cij[i][Nk_iu] ) gamma *= 0.99 if it % 10 == 0: t1 = time() print(it, "\ ", n_iter, "(%.2g sec)" % (t1 - t0)) print("compute loss...") print(compute_loss(mat, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, l_reg=l_reg)) return bu, bi, wij, cij
def render(self, display): if self.image is not None: array = np.frombuffer(self.image.raw_data, dtype=np.dtype("uint8")) array = np.reshape(array, (self.image.height, self.image.width, 4)) array = array[:, :, :3] # remove info do not need array = pre_processing(array) #array = array[:, ::-1] surface = pygame.surfarray.make_surface(array.swapaxes(0, 1)) display.blit(surface, (0, 0))
def baseline_estimator(mat, mat_file, l_reg=0.02, learning_rate=0.0000025): # subsample the matrix to make computation faster mat = mat[0:mat.shape[0] // 128, 0:mat.shape[1] // 128] mat = mat[mat.getnnz(1) > 0][:, mat.getnnz(0) > 0] print(mat.shape) no_users = mat.shape[0] no_movies = mat.shape[1] bu_index, bi_index = pre_processing(mat, mat_file) bu = np.random.rand(no_users, 1) * 2 - 1 bi = np.random.rand(1, no_movies) * 2 - 1 #bu = np.zeros((no_users,1)) #bi = np.zeros((1,no_movies)) mu = mat.data[:].mean() mat_sum1 = mat.sum(1) mat_sum0 = mat.sum(0) n = mat.data[:].shape[0] no_users_entries = np.array((mat != 0).sum(1)) no_movies_entries = np.array((mat != 0).sum(0)) # Train print("Train...") n_iter = 200 for it in range(n_iter): #bi_sum = bi[bi_index].sum(1).reshape((no_users,1)) #bu_sum = bu.ravel()[bu_index].sum(0).reshape((1,no_movies)) bi_sum = np.array(list(map(lambda x: bi.ravel()[x].sum(), bi_index))).reshape((no_users, 1)) bu_sum = np.array(list(map(lambda x: bu.ravel()[x].sum(), bu_index))).reshape((1, no_movies)) # Vectorized operations bu_gradient = -2.0 * (mat_sum1 - no_users_entries * mu - no_users_entries * bu - bi_sum) + 2.0 * l_reg * bu bu -= learning_rate * bu_gradient bi_gradient = -2.0 * (mat_sum0 - no_movies_entries * mu - no_movies_entries * bi - bu_sum) + 2.0 * l_reg * bi bi -= learning_rate * bi_gradient if it % 10 == 0: print("compute loss...") print(compute_loss(mat, mu, bu, bi, l_reg=l_reg)) return bu, bi
def integrated_gradients(inputs, model, target_label_idx, predict_and_gradients, baseline, steps=50, cuda=False): if baseline is None: baseline = 0 * inputs # scale inputs and compute gradients scaled_inputs = [ baseline + (float(i) / steps) * (inputs - baseline) for i in range(0, steps + 1) ] grads, _ = predict_and_gradients(scaled_inputs, model, target_label_idx, cuda) avg_grads = np.average(grads[:-1], axis=0) avg_grads = np.transpose(avg_grads, (1, 2, 0)) delta_X = ( pre_processing(inputs, cuda) - pre_processing(baseline, cuda)).detach().squeeze(0).cpu().numpy() delta_X = np.transpose(delta_X, (1, 2, 0)) integrated_grad = delta_X * avg_grads return integrated_grad
def train_network(self): # init the memory buff... brain_memory = [] num_of_episode = 0 global_step = 0 update_step_counter = 0 reward_mean = None epsilon = self.args.init_exploration loss = 0 while True: state = self.env.reset() state = pre_processing(state) # for the first state we need to stack them together.... state = np.stack((state, state, state, state), axis=0) # clear the rewrad_sum... pipe_num = 0 # I haven't set a max step here, but you could set it... while True: state_tensor = torch.tensor(state).unsqueeze(0) if self.use_cuda: state_tensor = state_tensor.cuda() with torch.no_grad(): _, _, actions = self.deep_q_network(state_tensor) action_selected = select_action(actions, epsilon, self.num_actions) # input the action into the environment... state_, reward, done, _ = self.env.step( self.action_space[action_selected]) # process the output state... state_ = pre_processing(state_) # concatenate them together... state_temp = state[0:3, :, :].copy() state_ = np.expand_dims(state_, 0) state_ = np.concatenate((state_, state_temp), axis=0) # wrapper the reward.... reward = reward_wrapper(reward) # add the pip num... if reward > 0: pipe_num += 1 global_step += 1 # store the transition... brain_memory.append( (state, state_, reward, done, action_selected)) if len(brain_memory) > self.args.buffer_size: brain_memory.pop(0) if global_step >= self.args.observate_time: mini_batch = random.sample(brain_memory, self.args.batch_size) loss = self._update_network(mini_batch) update_step_counter += 1 # up date the target network... if update_step_counter % self.args.hard_update_step == 0: #self._hard_update_target_network(self.deep_q_network, self.target_network) self.target_network.load_state_dict( self.deep_q_network.state_dict()) # process the epsilon if global_step <= self.args.exploration_steps: epsilon -= (self.args.init_exploration - self.args.final_exploration ) / self.args.exploration_steps if done: break state = state_ # expoential weighted average... reward_mean = pipe_num if reward_mean is None else reward_mean * 0.99 + pipe_num * 0.01 if num_of_episode % self.args.display_interval == 0: print('[{}] Episode: {}, Reward: {}, Loss: {}'.format( str(datetime.now()), num_of_episode, reward_mean, loss)) if num_of_episode % self.args.save_interval == 0: save_path = self.args.save_dir + 'model.pt' torch.save(self.deep_q_network.state_dict(), save_path) num_of_episode += 1
noise_feature = keras.backend.squeeze(noise_feature_map[[i][0]], 0) # tf.squeeze(noise_feature_map[[i][0]], axis=0) # noise_feature = keras.backend.reshape(noise_feature, shape=(noise_feature.shape[0] * noise_feature.shape[1], noise_feature.shape[2])) gram_noise = keras.backend.dot(keras.backend.transpose(noise_feature), noise_feature) denominator = (4 * keras.backend.constant(texture_feature.shape[0], dtype=tf.float32)**2) * keras.backend.constant(texture_feature.shape[1], dtype=tf.float32)**2 total_loss += weights[i][0] * (keras.backend.sum(keras.backend.square(tf.subtract(gram_texture, gram_noise))) / keras.backend.cast(denominator, tf.float32)) return total_loss if __name__ == '__main__': # generate original feature maps img_array = utils.pre_processing(input_img, height, width) feature_map = utils.compute_vgg_output(img_array) # generate initial noise image random_ = keras.backend.random_uniform(img_array.shape, minval=0, maxval=0.2) noise_img = keras.backend.variable(value=random_, dtype=tf.float32, name="noise_input") # compute feature maps of initial noise map vgg = vgg_16.VGG16() vgg.build(noise_img) noise_layers_list = dict({0: vgg.conv1_1, 1: vgg.conv1_2, 2: vgg.pool1, 3: vgg.conv2_1, 4: vgg.conv2_2, 5: vgg.pool2, 6: vgg.conv3_1, 7: vgg.conv3_2, 8: vgg.conv3_3, 9: vgg.pool3, 10: vgg.conv4_1, 11: vgg.conv4_2, 12: vgg.conv4_3, 13: vgg.pool4, 14: vgg.conv5_1, 15: vgg.conv5_2, 16: vgg.conv5_3, 17: vgg.pool5})
#read data.txt f = open('input/data.txt') line = f.readline().strip('\n') docs = [] n_docs = int(line) line = f.readline().strip('\n') while line: docs.append(line) # print line line = f.readline().strip('\n') f.close() # Collapsed Gibbs Sampling Derivation for LDA new_docs = utils.pre_processing(docs) lls = [] timecosts = [] n_iter = 100 max_topics = 21 # n_topics = 3 import datetime for n_topics in range(3, max_topics): print '======================= n_topics: {} ============================'.format(n_topics) startime = datetime.datetime.now() # start mylda = lda.LDA( docs=new_docs, n_topics=n_topics,
def svd_more_more(mat, mat_file, gamma1=0.007, gamma2=0.007, gamma3=0.001, l_reg2=100, l_reg6=0.005, l_reg7=0.015, f=50): # subsample the matrix to make computation faster mat = mat[0:mat.shape[0] // 128, 0:mat.shape[1] // 128] mat = mat[mat.getnnz(1) > 0][:, mat.getnnz(0) > 0] print(mat.shape) no_users = mat.shape[0] no_movies = mat.shape[1] bu_index, bi_index = pre_processing(mat, mat_file) # Init parameters bu = np.random.rand(no_users, 1) * 2 - 1 bi = np.random.rand(1, no_movies) * 2 - 1 qi = np.random.rand(no_movies, f) * 2 - 1 pu = np.random.rand(no_users, f) * 2 - 1 yj = np.random.rand(no_movies, f) * 2 - 1 mu = mat.data[:].mean() # Train print("Train...") n_iter = 200 cx = mat.tocoo() for it in range(n_iter): for u, i, v in zip(cx.row, cx.col, cx.data): N_u = bi_index[u] e_ui = compute_e_ui(mat, u, i, mu, bu, bi, qi, pu, N_u, yj) bu[u] += gamma1 * (e_ui - l_reg6 * bu[u]) bi[0, i] += gamma1 * (e_ui - l_reg6 * bi[0, i]) qi[i] += gamma2 * (e_ui * (pu[u] + 1 / sqrt(len(N_u)) * yj[N_u].sum(0)) - l_reg7 * qi[i]) pu[u] += gamma2 * (e_ui * qi[i] - l_reg7 * pu[u]) yj[N_u] += gamma2 * (e_ui * 1 / sqrt(len(N_u)) * qi[i] - l_reg7 * yj[N_u]) gamma1 *= 0.9 gamma2 *= 0.9 if it % 10 == 0: print(it, "\ ", n_iter) print("compute loss...") print( compute_loss(mat, mu, bu, bi, qi, pu, N_u, yj, l_reg6=l_reg6, l_reg7=l_reg7)) return bu, bi, qi, pu, yj
def main(args): # Device Configuration # device = torch.device( f'cuda:{args.gpu_num}' if torch.cuda.is_available() else 'cpu') # Fix Seed for Reproducibility # random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) # Samples, Plots, Weights and CSV Path # paths = [ args.samples_path, args.weights_path, args.csv_path, args.inference_path ] for path in paths: make_dirs(path) # Prepare Data # data = pd.read_csv(args.data_path)[args.column] # Prepare Data # scaler_1 = StandardScaler() scaler_2 = StandardScaler() preprocessed_data = pre_processing(data, scaler_1, scaler_2, args.constant, args.delta) train_X, train_Y, test_X, test_Y = prepare_data(data, preprocessed_data, args) train_X = moving_windows(train_X, args.ts_dim) train_Y = moving_windows(train_Y, args.ts_dim) test_X = moving_windows(test_X, args.ts_dim) test_Y = moving_windows(test_Y, args.ts_dim) # Prepare Networks # if args.model == 'conv': D = ConvDiscriminator(args.ts_dim).to(device) G = ConvGenerator(args.latent_dim, args.ts_dim).to(device) elif args.model == 'lstm': D = LSTMDiscriminator(args.ts_dim).to(device) G = LSTMGenerator(args.latent_dim, args.ts_dim).to(device) else: raise NotImplementedError ######### # Train # ######### if args.mode == 'train': # Loss Function # if args.criterion == 'l2': criterion = nn.MSELoss() elif args.criterion == 'wgangp': pass else: raise NotImplementedError # Optimizers # if args.optim == 'sgd': D_optim = torch.optim.SGD(D.parameters(), lr=args.lr, momentum=0.9) G_optim = torch.optim.SGD(G.parameters(), lr=args.lr, momentum=0.9) elif args.optim == 'adam': D_optim = torch.optim.Adam(D.parameters(), lr=args.lr, betas=(0., 0.9)) G_optim = torch.optim.Adam(G.parameters(), lr=args.lr, betas=(0., 0.9)) else: raise NotImplementedError D_optim_scheduler = get_lr_scheduler(D_optim, args) G_optim_scheduler = get_lr_scheduler(G_optim, args) # Lists # D_losses, G_losses = list(), list() # Train # print( "Training Time Series GAN started with total epoch of {}.".format( args.num_epochs)) for epoch in range(args.num_epochs): # Initialize Optimizers # G_optim.zero_grad() D_optim.zero_grad() ####################### # Train Discriminator # ####################### if args.criterion == 'l2': n_critics = 1 elif args.criterion == 'wgangp': n_critics = 5 for j in range(n_critics): series, start_dates = get_samples(train_X, train_Y, args.batch_size) # Data Preparation # series = series.to(device) noise = torch.randn(args.batch_size, 1, args.latent_dim).to(device) # Adversarial Loss using Real Image # prob_real = D(series.float()) if args.criterion == 'l2': real_labels = torch.ones(prob_real.size()).to(device) D_real_loss = criterion(prob_real, real_labels) elif args.criterion == 'wgangp': D_real_loss = -torch.mean(prob_real) # Adversarial Loss using Fake Image # fake_series = G(noise) prob_fake = D(fake_series.detach()) if args.criterion == 'l2': fake_labels = torch.zeros(prob_fake.size()).to(device) D_fake_loss = criterion(prob_fake, fake_labels) elif args.criterion == 'wgangp': D_fake_loss = torch.mean(prob_fake) D_gp_loss = args.lambda_gp * get_gradient_penalty( D, series.float(), fake_series.float(), device) # Calculate Total Discriminator Loss # D_loss = D_fake_loss + D_real_loss if args.criterion == 'wgangp': D_loss += args.lambda_gp * D_gp_loss # Back Propagation and Update # D_loss.backward() D_optim.step() ################### # Train Generator # ################### # Adversarial Loss # fake_series = G(noise) prob_fake = D(fake_series) # Calculate Total Generator Loss # if args.criterion == 'l2': real_labels = torch.ones(prob_fake.size()).to(device) G_loss = criterion(prob_fake, real_labels) elif args.criterion == 'wgangp': G_loss = -torch.mean(prob_fake) # Back Propagation and Update # G_loss.backward() G_optim.step() # Add items to Lists # D_losses.append(D_loss.item()) G_losses.append(G_loss.item()) # Adjust Learning Rate # D_optim_scheduler.step() G_optim_scheduler.step() # Print Statistics, Save Model Weights and Series # if (epoch + 1) % args.log_every == 0: # Print Statistics and Save Model # print("Epochs [{}/{}] | D Loss {:.4f} | G Loss {:.4f}".format( epoch + 1, args.num_epochs, np.average(D_losses), np.average(G_losses))) torch.save( G.state_dict(), os.path.join( args.weights_path, 'TS_using{}_and_{}_Epoch_{}.pkl'.format( G.__class__.__name__, args.criterion.upper(), epoch + 1))) # Generate Samples and Save Plots and CSVs # series, fake_series = generate_fake_samples( test_X, test_Y, G, scaler_1, scaler_2, args, device) plot_series(series, fake_series, G, epoch, args, args.samples_path) make_csv(series, fake_series, G, epoch, args, args.csv_path) ######## # Test # ######## elif args.mode == 'test': # Load Model Weights # G.load_state_dict( torch.load( os.path.join( args.weights_path, 'TS_using{}_and_{}_Epoch_{}.pkl'.format( G.__class__.__name__, args.criterion.upper(), args.num_epochs)))) # Lists # real, fake = list(), list() # Inference # for idx in range(0, test_X.shape[0], args.ts_dim): # Do not plot if the remaining data is less than time dimension # end_ix = idx + args.ts_dim if end_ix > len(test_X) - 1: break # Prepare Data # test_data = test_X[idx, :] test_data = np.expand_dims(test_data, axis=0) test_data = np.expand_dims(test_data, axis=1) test_data = torch.from_numpy(test_data).to(device) start = test_Y[idx, 0] noise = torch.randn(args.val_batch_size, 1, args.latent_dim).to(device) # Generate Fake Data # with torch.no_grad(): fake_series = G(noise) # Convert to Numpy format for Saving # test_data = np.squeeze(test_data.cpu().data.numpy()) fake_series = np.squeeze(fake_series.cpu().data.numpy()) test_data = post_processing(test_data, start, scaler_1, scaler_2, args.delta) fake_series = post_processing(fake_series, start, scaler_1, scaler_2, args.delta) real += test_data.tolist() fake += fake_series.tolist() # Plot, Save to CSV file and Derive Metrics # plot_series(real, fake, G, args.num_epochs - 1, args, args.inference_path) make_csv(real, fake, G, args.num_epochs - 1, args, args.inference_path) derive_metrics(real, fake, args) else: raise NotImplementedError
def generate_timeseries(args): # Device Configuration # device = torch.device( f'cuda:{args.gpu_num}' if torch.cuda.is_available() else 'cpu') # Inference Path # make_dirs(args.inference_path) # Prepare Generator # if args.model == 'skip': G = SkipGenerator(args.latent_dim, args.ts_dim, args.conditional_dim).to(device) G.load_state_dict( torch.load( os.path.join( args.weights_path, 'TimeSeries_Generator_using{}_Epoch_{}.pkl'.format( args.criterion.upper(), args.num_epochs)))) else: raise NotImplementedError # Prepare Data # data = pd.read_csv(args.data_path)[args.column] scaler_1 = StandardScaler() scaler_2 = StandardScaler() preprocessed_data = pre_processing(data, scaler_1, scaler_2, args.delta) X = moving_windows(preprocessed_data, args.ts_dim) label = moving_windows(data.to_numpy(), args.ts_dim) # Lists # real, fake = list(), list() # Inference # for idx in range(0, data.shape[0], args.ts_dim): end_ix = idx + args.ts_dim if end_ix > len(data) - 1: break samples = X[idx, :] samples = np.expand_dims(samples, axis=0) samples = np.expand_dims(samples, axis=1) samples = torch.from_numpy(samples).to(device) start_dates = label[idx, 0] noise = torch.randn(args.val_batch_size, 1, args.latent_dim).to(device) with torch.no_grad(): fake_series = G(noise) fake_series = torch.cat((samples[:, :, :args.conditional_dim].float(), fake_series.float()), dim=2) samples = np.squeeze(samples.cpu().data.numpy()) fake_series = np.squeeze(fake_series.cpu().data.numpy()) samples = post_processing(samples, start_dates, scaler_1, scaler_2, args.delta) fake_series = post_processing(fake_series, start_dates, scaler_1, scaler_2, args.delta) real += samples.tolist() fake += fake_series.tolist() plot_sample(real, fake, args.num_epochs - 1, args) make_csv(real, fake, args.num_epochs - 1, args)
def generate_episode(self, environment, e, desired_return, desired_horizon, testing): if environment == "Catch-v0": env = catch.CatchEnv() elif environment == "Catch-v2": self.environment = catch_v2.CatchEnv() elif environment == "Catch-v3": self.environment = catch_v3.CatchEnv() elif environment == "Catch-v4": self.environment = catch_v4.CatchEnv() tot_rewards = [] done = False dead = False scores = [] states = [] actions = [] rewards = [] step, score, start_life = 0, 0, 5 observe = env.reset() observe, _, _ = env.step(1) state = utils.pre_processing(observe) history = np.stack((state, state, state, state), axis=2) history = np.reshape([history], (1, 84, 84, 4)) while not done: states.append(history) command = np.asarray([ desired_return * self.return_scale, desired_horizon * self.horizon_scale ]) command = np.reshape(command, [1, len(command)]) if not testing: action = self.get_action(history, command) actions.append(action) else: action = self.get_greedy_action(history, command) next_state, reward, done = env.step(action) next_state = utils.pre_processing(observe) next_state = np.reshape([next_state], (1, 84, 84, 1)) next_history = np.append(next_state, history[:, :, :, :3], axis=3) score += reward history = next_history desired_return -= reward # Line 8 Algorithm 2 desired_horizon -= 1 # Line 9 Algorithm 2 desired_horizon = np.maximum(desired_horizon, 1) self.memory.add_sample(states, actions, rewards) self.testing_rewards.append(score) if testing: print('Querying the model ...') print('Testing score: {}'.format(score)) return score
# -*- coding:utf-8 -*- import os import utils import jieba_cut import pandas as pd import time import model_utils if not os.path.exists('./results'): os.makedirs('./results') t0 = time.time() print('============================文本前处理开始============================') data = pd.read_csv( '/Users/shen-pc/Desktop/WORK/ITS/KR2/LSD_data/problem_0528.csv') data_proc = utils.pre_processing(data) data_proc.to_csv('./results/problem_0528_preprocessing.csv') t1 = time.time() print('文本前处理耗时:', (t1 - t0) / 60, 'min') print('============================文本前处理over============================', '\n\n') ''' ------------------------------------------------------------------------------------------------------------------------ ''' print('============================分词开始============================') data_cut = jieba_cut.cut(data_proc) data_cut.to_csv('./results/problem_0528_jieba.csv') t2 = time.time() print('分词耗时:', (t2 - t1) / 60, 'min') print('============================分词over============================', '\n\n')
def train(data_conf, model_conf, **kwargs): try: print("-----------------------------------") print("Starting Cashflow DL Model Training") print("-----------------------------------") print() # ============================== # 0. Main parameters definitions # ============================== # Size of X and y arrays definition N_days_X, N_days_y = int(data_conf['number_of_historical_days']), int( data_conf['number_of_predicted_days']) #365, 92 print('Number of days used for prediction (X): ', N_days_X) print('Number of days predicted (y): ', N_days_y) print() # Date range definition start_date, end_date = data_conf['start_date'], data_conf['end_date'] import utils as utils start_date_dt, end_date_dt, start_date_prediction, end_date_prediction, end_date_plusOneDay, end_date_minus_6month = utils.dates_definitions( start_date, end_date, N_days_X, N_days_y) print('Date range: ', start_date, end_date) print() model_name = model_conf['model_name'] except Exception as e: print("Errored on initialization") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e try: # ======================================== # T.1 Pre-processing before model training # ======================================== # Loading dataset table_in = data_conf[environment]['table_to_train_on'] #ts_balance = spark.read.parquet("/mnt/test/{0}.parquet".format(table_in)).cache() ts_balance = spark.read.format("delta").load( "/mnt/delta/{0}".format(table_in)) # Cleaning of the time series ts_balance = ts_balance.withColumn( 'balance', ts_balance.balance.cast("array<float>")) # DOES NOT WORK WITH DATABRICKS CONNECT THAT WAY (maybe I need to register the UDF!) #ts_balance = ts_balance.withColumn('keep_ts', F.udf(lambda x,y: utils.time_series_cleaning(x,y), "int")('balance', F.lit(20))) #at least 10 transactions in the ts, to be used in the training #ts_balance = ts_balance.where('keep_ts == 1') # Creating the dataset on which we train (and test and validate) the model ts_balance_model = ts_balance.sample( False, 0.7, seed=0) #now 0.7, but in real case would be 0.1 at best... or 0.05 print('ts_balance_model.count()', ts_balance_model.count()) # Pre-processing before model training import utils as utils ts_balance_model = utils.pre_processing(ts_balance_model, end_date, spark, serving=False) ts_balance_model.show(3) print('ts_balance_model.rdd.getNumPartitions()', ts_balance_model.rdd.getNumPartitions()) ts_balance_model.show(3) # Saving prepared dataset table_out = 'cashflow_training_step1' #ts_balance_model.write.format("parquet").mode("overwrite").save("/mnt/test/{0}.parquet".format(table_out)) ts_balance_model.write.format("delta").mode("overwrite").save( "/mnt/delta/{0}".format(table_out)) except Exception as e: print("Errored on step T.1: pre-processing before model training") print("Exception Trace: {0}".format(e)) print(traceback.format_exc()) raise e
import model_utils import utils import jieba_cut import pandas as pd data = pd.read_csv( '/Users/shen-pc/Desktop/WORK/ITS/My method/results/problem_0528_jieba.csv', index_col=0) sim1 = model_utils.cal_cos_sim(data.iloc[5322]['id'], data.iloc[5323]['id']) print('原model的结果[5322 vs 5323]=', sim1) # 来几道新题试一下 data_new = pd.read_csv('/Users/shen-pc/Desktop/WORK/ITS/data/real_item.csv', index_col=0) data_new.rename(columns={'problem_id': 'id'}, inplace=True) data_new = data_new.loc[:20] data_new = utils.pre_processing(data_new) data_new = jieba_cut.cut(data_new) # 加入新数据进行训练: model_new = model_utils.train(data_new) sim2 = model_utils.cal_cos_sim(data.iloc[5322]['id'], data.iloc[5323]['id']) sim3 = model_utils.cal_cos_sim(data.iloc[100]['id'], data.iloc[1000]['id']) print('新model的结果[5322 vs 5323]=', sim2) print('新model的结果[100 vs 1000]=', sim3) # 涉及原本没有的题目: sim4 = model_utils.cal_cos_sim(data.iloc[0]['id'], data_new.iloc[0]['id']) print('新model的结果[old 0 vs new 0]=', sim4) # 最相似: most1 = model_utils.most_similar(data.iloc[0]['id']) most2 = model_utils.most_similar(data_new.iloc[0]['id']) print('\n\n\n\n', data.loc[0, 'cut'], '\n', most1, '\n\n')
save_path = 'crnn_overratio_%1.1f_'%OERT_RATIO+utils.time_for_saving() if not os.path.exists(save_path): os.mkdir(save_path) #get the original data, including segmenting signals,forming the labels if os.path.exists(excel_path): df=pd.read_excel(excel_path,dtype={'Name':str, 'Value':float}) else: raise FileNotFoundError('Please contact the authorts for the dataset') recording_list = df.to_dict('index') recording_list,max_len, max_sig_len = utils.pre_processing(recording_list) fold_data = utils.get_fold_info(recording_list, OERT_RATIO,experiment = 'seq') fold_results = {} for fold_number in range(10): model_para = {'over_ratio': OERT_RATIO, 'total_epoch': TOTAL_EPOCH, 'CNN_channel': CNN_CH, 'RNN_channel': RNN_CH, 'filter_size': FILTER_SIZE, 'pooling_size': POOLING_SIZE, 'batch_size': BATCH_SIZE, 'fc_channel':FC_CH, 'l2_c':L2_LAMBDA,
def test(rank, params, shared_model, count, lock): logging.basicConfig(filename='./2blocks_rew.log', level=logging.INFO) ptitle('Test Process: {}'.format(rank)) gpu_id = params.gpu_ids_test[rank % len(params.gpu_ids_test)] env = Env(True, 1, down_period=2) # model = A3C() model = A3C_LSTM() with torch.cuda.device(gpu_id): model = model.cuda() agent = run_agent(model, gpu_id) episode = 0 while episode <= params.episode_test: env.reset() with lock: n_update = count.value agent.synchronize(shared_model) num_steps = 0 accumulated_reward = 0 nAction = 0 line1 = 0 line2 = 0 line3 = 0 line4 = 0 nMove = 0 rew_height = 0 rew_move = 0 while True: num_steps += 1 obs = pre_processing(env.shadow_map, env._get_curr_block_pos()) # env.map action = agent.action_test(obs) if action == 5: action = 100000 rew, shadow_reward, done, putting, height = env.step( action) # what is the 'is_new_block'? if rew == 0.0 and action != 3 and action != 4: nMove += 1 if nMove < 6: rew_move += 0.2 if putting: rew_height += -(height / 20.0) nMove = 0 if rew == 1.0: line1 += 1 elif rew == 8.0: line2 += 1 elif rew == 27.0: line3 += 1 elif rew == 64: line4 += 1 ''' if nAction < 9: obs = pre_processing(env.map, env._get_curr_block_pos()) action = agent.action_test(obs) rew, shadow_reward, is_new_block = env.step(action) # what is the 'is_new_block'? nAction += 1 else: rew, is_new_block = env.step(100000) # falling nAction = 0 ''' accumulated_reward = rew + rew_move + rew_height if env.is_game_end(): episode += 1 print(" ".join([ "-------------episode stats-------------\n", "nUpdate: {}\n".format(n_update), "line1: {}\n".format(line1), "line2: {}\n".format(line2), "line3: {}\n".format(line3), "line4: {}\n".format(line4), "all_lines: {}\n".format( str(line1 + line2 + line3 + line4)), "score: {}\n".format(env.score), "rew_move: {}\n".format(rew_move), "rew_height: {}\n".format(rew_height), "steps: {}\n".format(num_steps) ])) logging.info(" ".join([ "-------------episode stats-------------\n", "nUpdate: {}\n".format(n_update), "line1: {}\n".format(line1), "line2: {}\n".format(line2), "line3: {}\n".format(line3), "line4: {}\n".format(line4), "all_lines: {}\n".format( str(line1 + line2 + line3 + line4)), "score: {}\n".format(env.score), "rew_move: {}\n".format(rew_move), "rew_height: {}\n".format(rew_height), "steps: {}\n".format(num_steps) ])) break if env.score > 1000: episode += 1 print(" ".join([ "-------------episode stats-------------\n", "nUpdate: {}\n".format(n_update), "line1: {}\n".format(line1), "line2: {}\n".format(line2), "line3: {}\n".format(line3), "line4: {}\n".format(line4), "all_lines: {}\n".format( str(line1 + line2 + line3 + line4)), "score: {}\n".format(env.score), "rew_move: {}\n".format(rew_move), "rew_height: {}\n".format(rew_height), "steps: {}\n".format(num_steps) ])) with torch.cuda.device(gpu_id): torch.save(agent.model.state_dict(), './weight/model' + str(n_update) + '.ckpt') logging.info(" ".join([ "-------------episode stats-------------\n", "nUpdate: {}\n".format(n_update), "line1: {}\n".format(line1), "line2: {}\n".format(line2), "line3: {}\n".format(line3), "line4: {}\n".format(line4), "all_lines: {}\n".format( str(line1 + line2 + line3 + line4)), "score: {}\n".format(env.score), "rew_move: {}\n".format(rew_move), "rew_height: {}\n".format(rew_height), "steps: {}\n".format(num_steps) ])) break
def correlation_based_implicit_neighbourhood_model_vectorized(mat, mat_file, l_reg=0.002, gamma=0.005, l_reg2=100.0, k=250): gamma /= 100 # subsample the matrix to make computation faster mat = mat[0:mat.shape[0]//128, 0:mat.shape[1]//128] mat = mat[mat.getnnz(1)>0][:, mat.getnnz(0)>0] print(mat.shape) no_users = mat.shape[0] no_movies = mat.shape[1] no_users_entries = np.array((mat != 0).sum(1)) no_movies_entries = np.array((mat != 0).sum(0)) #baseline_bu, baseline_bi = baseline_estimator(mat) # We should call baseline_estimator but we can init at random for testing baseline_bu, baseline_bi = np.random.rand(no_users, 1) * 2 - 1, np.random.rand(1, no_movies) * 2 - 1 bu_index, bi_index = pre_processing(mat, mat_file) bu = np.random.rand(no_users, 1) * 2 - 1 bi = np.random.rand(1, no_movies) * 2 - 1 wij = np.random.rand(no_movies, no_movies) * 2 - 1 cij = np.random.rand(no_movies, no_movies) * 2 - 1 mu = mat.data[:].mean() # Compute similarity matrix N = sparse.csr_matrix(mat).copy() N.data[:] = 1 S = sparse.csr_matrix.dot(N.T, N) S.data[:] = S.data[:] / (S.data[:] + l_reg2) S = S * compute_sparse_correlation_matrix(mat) Rk = [] cx = mat.tocoo() for u,i,v in zip(cx.row, cx.col, cx.data): Rk.append((u, i, np.flip(np.argsort(S[i,].toarray()))[:k].ravel())) # Train print("Train...") n_iter = 200 for it in range(n_iter): t0 = time() e = compute_e_vectorized(mat, mu, bu, bi, Rk, wij, Rk, cij, baseline_bu, baseline_bi) # Vectorized operations bu += gamma * (e.sum(1) - no_users_entries * l_reg * bu) bi += gamma * (e.sum(0) - no_movies_entries * l_reg * bi) # TODO: vectorize the following for u, i, Rk_iu in Rk: Nk_iu = Rk_iu e_ui = e[u, i] buj = mu + baseline_bu[u] + baseline_bi[0, Rk_iu] wij[i][Rk_iu] += gamma * ( 1 / sqrt(len(Rk_iu)) * e_ui * (mat[u, Rk_iu].toarray().ravel() - buj) - l_reg * wij[i][Rk_iu] ) cij[i][Nk_iu] += gamma * ( 1 / sqrt(len(Nk_iu)) * e_ui - l_reg * cij[i][Nk_iu] ) gamma *= 0.99 if it % 10 == 0: t1 = time() print(it, "\ ", n_iter, "(%.2g sec)" % (t1 - t0)) print("compute loss...") print(compute_loss_vectorized(mat, mu, bu, bi, Rk, wij, Rk, cij, baseline_bu, baseline_bi, l_reg=l_reg)) return bu, bi, wij, cij
def train(args): # Device Configuration # device = torch.device( f'cuda:{args.gpu_num}' if torch.cuda.is_available() else 'cpu') # Fix Seed for Reproducibility # random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) # Samples, Plots, Weights and CSV Path # paths = [ args.samples_path, args.plots_path, args.weights_path, args.csv_path ] for path in paths: make_dirs(path) # Prepare Data # data = pd.read_csv(args.data_path)[args.column] # Pre-processing # scaler_1 = StandardScaler() scaler_2 = StandardScaler() preprocessed_data = pre_processing(data, scaler_1, scaler_2, args.delta) X = moving_windows(preprocessed_data, args.ts_dim) label = moving_windows(data.to_numpy(), args.ts_dim) # Prepare Networks # D = Discriminator(args.ts_dim).to(device) G = Generator(args.latent_dim, args.ts_dim, args.conditional_dim).to(device) # Loss Function # if args.criterion == 'l2': criterion = nn.MSELoss() elif args.criterion == 'wgangp': pass else: raise NotImplementedError # Optimizers # D_optim = torch.optim.Adam(D.parameters(), lr=args.lr, betas=(0.5, 0.9)) G_optim = torch.optim.Adam(G.parameters(), lr=args.lr, betas=(0.5, 0.9)) D_optim_scheduler = get_lr_scheduler(D_optim, args) G_optim_scheduler = get_lr_scheduler(G_optim, args) # Lists # D_losses, G_losses = list(), list() # Train # print("Training Time Series GAN started with total epoch of {}.".format( args.num_epochs)) for epoch in range(args.num_epochs): # Initialize Optimizers # G_optim.zero_grad() D_optim.zero_grad() if args.criterion == 'l2': n_critics = 1 elif args.criterion == 'wgangp': n_critics = 5 ####################### # Train Discriminator # ####################### for j in range(n_critics): series, start_dates = get_samples(X, label, args.batch_size) # Data Preparation # series = series.to(device) noise = torch.randn(args.batch_size, 1, args.latent_dim).to(device) # Adversarial Loss using Real Image # prob_real = D(series.float()) if args.criterion == 'l2': real_labels = torch.ones(prob_real.size()).to(device) D_real_loss = criterion(prob_real, real_labels) elif args.criterion == 'wgangp': D_real_loss = -torch.mean(prob_real) # Adversarial Loss using Fake Image # fake_series = G(noise) fake_series = torch.cat( (series[:, :, :args.conditional_dim].float(), fake_series.float()), dim=2) prob_fake = D(fake_series.detach()) if args.criterion == 'l2': fake_labels = torch.zeros(prob_fake.size()).to(device) D_fake_loss = criterion(prob_fake, fake_labels) elif args.criterion == 'wgangp': D_fake_loss = torch.mean(prob_fake) D_gp_loss = args.lambda_gp * get_gradient_penalty( D, series.float(), fake_series.float(), device) # Calculate Total Discriminator Loss # D_loss = D_fake_loss + D_real_loss if args.criterion == 'wgangp': D_loss += args.lambda_gp * D_gp_loss # Back Propagation and Update # D_loss.backward() D_optim.step() ################### # Train Generator # ################### # Adversarial Loss # fake_series = G(noise) fake_series = torch.cat( (series[:, :, :args.conditional_dim].float(), fake_series.float()), dim=2) prob_fake = D(fake_series) # Calculate Total Generator Loss # if args.criterion == 'l2': real_labels = torch.ones(prob_fake.size()).to(device) G_loss = criterion(prob_fake, real_labels) elif args.criterion == 'wgangp': G_loss = -torch.mean(prob_fake) # Back Propagation and Update # G_loss.backward() G_optim.step() # Add items to Lists # D_losses.append(D_loss.item()) G_losses.append(G_loss.item()) #################### # Print Statistics # #################### print("Epochs [{}/{}] | D Loss {:.4f} | G Loss {:.4f}".format( epoch + 1, args.num_epochs, np.average(D_losses), np.average(G_losses))) # Adjust Learning Rate # D_optim_scheduler.step() G_optim_scheduler.step() # Save Model Weights and Series # if (epoch + 1) % args.save_every == 0: torch.save( G.state_dict(), os.path.join( args.weights_path, 'TimeSeries_Generator_using{}_Epoch_{}.pkl'.format( args.criterion.upper(), epoch + 1))) series, fake_series = generate_fake_samples( X, label, G, scaler_1, scaler_2, args, device) plot_sample(series, fake_series, epoch, args) make_csv(series, fake_series, epoch, args) print("Training finished.")
def integrated_model(mat, mat_file, gamma1=0.007, gamma2=0.007, gamma3=0.001, l_reg2=100, l_reg6=0.005, l_reg7=0.015, l_reg8=0.015, k=300, f=50): # subsample the matrix to make computation faster mat = mat[0:mat.shape[0] // 128, 0:mat.shape[1] // 128] mat = mat[mat.getnnz(1) > 0][:, mat.getnnz(0) > 0] print(mat.shape) no_users = mat.shape[0] no_movies = mat.shape[1] #baseline_bu, baseline_bi = baseline_estimator(mat) # We should call baseline_estimator but we can init at random for test baseline_bu, baseline_bi = np.random.rand( no_users, 1) * 2 - 1, np.random.rand(1, no_movies) * 2 - 1 bu_index, bi_index = pre_processing(mat, mat_file) # Init parameters bu = np.random.rand(no_users, 1) * 2 - 1 bi = np.random.rand(1, no_movies) * 2 - 1 wij = np.random.rand(no_movies, no_movies) * 2 - 1 cij = np.random.rand(no_movies, no_movies) * 2 - 1 qi = np.random.rand(no_movies, f) * 2 - 1 pu = np.random.rand(no_users, f) * 2 - 1 yj = np.random.rand(no_movies, f) * 2 - 1 mu = mat.data[:].mean() N = sparse.csr_matrix(mat).copy() N.data[:] = 1 S = sparse.csr_matrix.dot(N.T, N) S.data[:] = S.data[:] / (S.data[:] + l_reg2) S = S * compute_sparse_correlation_matrix(mat) # Train print("Train...") n_iter = 200 cx = mat.tocoo() for it in range(n_iter): for u, i, v in zip(cx.row, cx.col, cx.data): #Rk_iu = Nk_iu = bi_index[u] N_u = bi_index[u] Rk_iu = Nk_iu = np.flip(np.argsort(S[i, ].toarray()))[:k].ravel() e_ui = compute_e_ui(mat, u, i, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, qi, pu, N_u, yj) bu[u] += gamma1 * (e_ui - l_reg6 * bu[u]) bi[0, i] += gamma1 * (e_ui - l_reg6 * bi[0, i]) qi[i] += gamma2 * (e_ui * (pu[u] + 1 / sqrt(len(N_u)) * yj[N_u].sum(0)) - l_reg7 * qi[i]) pu[u] += gamma2 * (e_ui * qi[i] - l_reg7 * pu[u]) yj[N_u] += gamma2 * (e_ui * 1 / sqrt(len(N_u)) * qi[i] - l_reg7 * yj[N_u]) buj = mu + baseline_bu[u] + baseline_bi[0, Rk_iu] wij[i][Rk_iu] += gamma3 * (1 / sqrt(len(Rk_iu)) * e_ui * (mat[u, Rk_iu].toarray().ravel() - buj) - l_reg8 * wij[i][Rk_iu]) cij[i][Nk_iu] += gamma3 * (1 / sqrt(len(Nk_iu)) * e_ui - l_reg8 * cij[i][Nk_iu]) gamma1 *= 0.9 gamma2 *= 0.9 gamma3 *= 0.9 if it % 10 == 0: print(it, "\ ", n_iter) print("compute loss...") print( compute_loss(mat, mu, bu, bi, Rk_iu, wij, Nk_iu, cij, baseline_bu, baseline_bi, qi, pu, N_u, yj, l_reg6=l_reg6, l_reg7=l_reg7, l_reg8=l_reg8)) return bu, bi, qi, pu, yj, wij, cij
def generate_episode(self, environment, e, desired_return, desired_horizon, testing): env = gym.make(environment) tot_rewards = [] done = False dead = False scores = [] states = [] actions = [] rewards = [] step, score, start_life = 0, 0, 5 observe = env.reset() for _ in range(random.randint(1, 30)): observe, _, _, _ = env.step(1) state = utils.pre_processing(observe) history = np.stack((state, state, state, state), axis=2) history = np.reshape([history], (1, 84, 84, 4)) while not done: states.append(history) command = np.asarray([ desired_return * self.return_scale, desired_horizon * self.horizon_scale ]) command = np.reshape(command, [1, len(command)]) if not testing: action = self.get_action(history, command) actions.append(action) else: action = self.get_greedy_action(history, command) if action == 0: real_action = 1 elif action == 1: real_action = 2 else: real_action = 3 next_state, reward, done, info = env.step(real_action) next_state = utils.pre_processing(observe) next_state = np.reshape([next_state], (1, 84, 84, 1)) next_history = np.append(next_state, history[:, :, :, :3], axis=3) clipped_reward = np.clip(reward, -1, 1) rewards.append(clipped_reward) score += reward if start_life > info['ale.lives']: dead = True start_life = info['ale.lives'] if dead: dead = False else: history = next_history desired_return -= reward # Line 8 Algorithm 2 desired_horizon -= 1 # Line 9 Algorithm 2 desired_horizon = np.maximum(desired_horizon, 1) self.memory.add_sample(states, actions, rewards) self.testing_rewards.append(score) if testing: print('Querying the model ...') print('Testing score: {}'.format(score)) return score
make_sentences_vectors, make_similarity_matrix, apply_pagerank, ask_top_n_sentences_to_extract, extract_sentences pd.set_option('display.max_columns', None) pd.set_option('display.expand_frame_repr', False) pd.set_option('max_colwidth', -1) dataset_path = Path.cwd() / "data" / "Reviews.csv" if __name__ == '__main__': dataset = pd.read_csv(dataset_path, nrows=100) dataset.drop_duplicates(subset=['Text'], inplace=True) dataset.dropna(axis=0, inplace=True) sentences_list = split_in_sentences(dataset['Text']) sentences_list = remove_html_tag(sentences_list) pre_processed_sentences = pre_processing(sentences_list) embedding_dimensionality = ask_embedding_dim() embeddings = get_word_embeddings(embedding_dimensionality) sents_vects = make_sentences_vectors(pre_processed_sentences, embeddings, int(embedding_dimensionality)) similarity_matrix = make_similarity_matrix(sentences_list, sents_vects, int(embedding_dimensionality)) pagerank_scores = apply_pagerank(similarity_matrix) number_sentences_to_extract = ask_top_n_sentences_to_extract() for ex_sent in extract_sentences(number_sentences_to_extract, sentences_list, pagerank_scores): print(ex_sent, "\n")
saved_path = 'trained_models' n_action = 2 torch.manual_seed(123) N = 45000 estimator = DQN(n_action) if N > 0: estimator.model = torch.load(f"{saved_path}/model_{N}.pth") memory = deque(maxlen=memory_size) env = FlappyBird() image, reward, is_done = env.next_step(0) image = pre_processing(image[:screen_width, :int(env.base_y)], image_size, image_size) image = torch.from_numpy(image) state = torch.cat(tuple(image for _ in range(4)))[None, :, :, :] for iter in tqdm(range(N, n_iter), initial=N, total=n_iter): epsilon = final_epsilon + (n_iter - iter) * (init_epsilon - final_epsilon) / n_iter policy = gen_epsilon_greedy_policy(estimator, epsilon, n_action) action = policy(state) next_image, reward, is_done = env.next_step(action) next_image = pre_processing(next_image[:screen_width, :int(env.base_y)], image_size, image_size) next_image = torch.from_numpy(next_image) next_state = torch.cat((state[0, 1:, :, :], next_image))[None, :, :, :] memory.append([state, action, next_state, reward, is_done]) loss = estimator.replay(memory, batch_size, gamma)
def run_loop(rank, params, shared_model, shared_optimizer, count, lock): ptitle('Training Process: {}'.format(rank)) gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)] env = Env(False, 1, down_period=2) # model = A3C() model = A3C_LSTM() with torch.cuda.device(gpu_id): model = model.cuda() agent = run_agent(model, gpu_id) episode = 0 while episode <= params.episode: env.reset() agent.done = False num_steps = 0 agent.synchronize(shared_model) nAction = 0 nMove = 0 while True: num_steps += 1 # random_action = random.randrange(0, 5) ''' if nAction < 9: obs = pre_processing(env.map, env._get_curr_block_pos()) action, value, log_prob, entropy = agent.action_train(obs) rew, is_new_block = env.step(action) # what is the 'is_new_block'? nAction += 1 if nAction != 9: rew = np.clip(rew, 0.0, 64.0) agent.put_reward(rew, value, log_prob, entropy) else: rew, is_new_block = env.step(100000) # falling rew = np.clip(rew, 0.0, 64.0) agent.put_reward(rew, value, log_prob, entropy) nAction = 0 ''' obs = pre_processing(env.shadow_map, env._get_curr_block_pos()) # env.map action, value, log_prob, entropy = agent.action_train(obs) if action == 5: action = 100000 rew, shadow_rew, done, putting, height = env.step(action) # what is the 'is_new_block'? rew = np.clip(rew, -1.0, 64.0) if rew == 0.0 and action != 3 and action != 4: nMove += 1 if nMove < 6: rew = 0.2 if putting: rew = - (height / 20.0) nMove = 0 agent.put_reward(rew, value, log_prob, entropy) # pdb.set_trace() if env.is_game_end(): episode += 1 agent.done = True # if num_steps % params.num_steps == 0: # if env.is_game_end() or rew >= 1.0: if env.is_game_end(): next_obs = pre_processing(env.map, env._get_curr_block_pos()) agent.training(next_obs, shared_model, shared_optimizer, params) with lock: # synchronize vale of all process count.value += 1 if env.is_game_end(): break