def __init__(self,
                 init_joints_idxs,
                 trans_estimation=None,
                 reduction='sum',
                 data_weight=1.0,
                 depth_loss_weight=1e2,
                 dtype=torch.float32,
                 **kwargs):
        super(SMPLifyCameraInitLoss, self).__init__()
        self.dtype = dtype

        if trans_estimation is not None:
            self.register_buffer(
                'trans_estimation',
                utils.to_tensor(trans_estimation, dtype=dtype))
        else:
            self.trans_estimation = trans_estimation

        self.register_buffer('data_weight',
                             torch.tensor(data_weight, dtype=dtype))
        self.register_buffer(
            'init_joints_idxs',
            utils.to_tensor(init_joints_idxs, dtype=torch.long))
        self.register_buffer('depth_loss_weight',
                             torch.tensor(depth_loss_weight, dtype=dtype))
Esempio n. 2
0
def rollout(policy, env, timestep_limit=None, add_noise=False):
    total_reward = 0.0
    state = env.reset()
    state = utils.to_tensor(state).unsqueeze(0)
    if self.args.is_cuda:
        state = state.cuda()
    done = False

    while not done:
        if store_transition:
            self.num_frames += 1
            self.gen_frames += 1
        if render and is_render: self.env.render()
        # print(state)
        # exit(0)
        action = self.pop.forward(state)
        action.clamp(-1, 1)
        action = utils.to_numpy(action.cpu())
        if is_action_noise: action += self.ounoise.noise()
        # print("come there in evaluate")
        next_state, reward, done, info = self.env.step(
            action.flatten())  # Simulate one step in environment
        # print("come there in evaluate")
        next_state = utils.to_tensor(next_state).unsqueeze(0)
        if self.args.is_cuda:
            next_state = next_state.cuda()
        total_reward += reward

        if store_transition:
            self.add_experience(state, action, next_state, reward, done)
        state = next_state
    if store_transition: self.num_games += 1
    # print("come here,total_reward:",total_reward)
    return total_reward
Esempio n. 3
0
    def update_policy(self):
        state_batch, action_batch, reward_batch, next_state_batch, terminal_batch = self.memory.sample_batch(self.batch_size)
        state = to_tensor(np.array(state_batch), device=device)
        action = to_tensor(np.array(action_batch), device=device)
        next_state = to_tensor(np.array(next_state_batch), device=device)

        # compute target Q value
        next_q_value = self.critic_target([next_state, self.actor_target(next_state)])
        target_q_value = to_tensor(reward_batch, device=device) \
                         + self.discount * to_tensor((1 - terminal_batch.astype(np.float)), device=device) * next_q_value

        # Critic and Actor update
        self.critic.zero_grad()
        with torch.set_grad_enabled(True):
            q_values = self.critic([state, action])
            critic_loss = criterion(q_values, target_q_value.detach())
            critic_loss.backward()
            self.critic_optim.step()

        self.actor.zero_grad()
        with torch.set_grad_enabled(True):
            policy_loss = -self.critic([state.detach(), self.actor(state)]).mean()
            policy_loss.backward()
            self.actor_optim.step()

        # Target update
        soft_update(self.actor_target, self.actor, self.tau)
        soft_update(self.critic_target, self.critic, self.tau)

        return to_numpy(-policy_loss), to_numpy(critic_loss), to_numpy(q_values.mean())
Esempio n. 4
0
    def __init__(self,
                 C=10,
                 H=128,
                 W=128,
                 sampling='S',
                 dtype=torch.cuda.FloatTensor):
        super(OperatorBatch, self).__init__()
        self.C, self.H, self.W, self.dtype = C, H, W, dtype

        # subspace dimension reduction
        pca_dic_data = scio.loadmat(
            './matfile/pytorch_Ramp2D_200reps_guido_trainingset.mat')

        self.V = to_tensor(pca_dic_data['V'])
        self.V_conj = to_tensor(pca_dic_data['V_conj'])
        if dtype is not None:
            self.V, self.V_conj = self.V.type(dtype), self.V_conj.type(dtype)
        assert self.V.shape[1] == self.C, 'Channels Error!'

        # init mask
        mask_data = scio.loadmat('./matfile/train_dataXS11_s.mat')
        if sampling == 'C':
            mask = mask_data['samplemask_s']
        if sampling == 'S':
            mask = mask_data['samplemask_s']
        self.mask = np.squeeze(np.asarray(mask - 1))
        print('mask.shape', self.mask.shape)
def critic_loss(model, model_prime, data_batch, args):
    obs, options, rewards, next_obs, dones = data_batch
    batch_idx = torch.arange(len(options)).long()
    options = torch.LongTensor(options).to(model.device)
    rewards = torch.FloatTensor(rewards).to(model.device)
    masks = 1 - torch.FloatTensor(dones).to(model.device)

    # The loss is the TD loss of Q and the update target, so we need to calculate Q
    states = model.get_state(to_tensor(obs)).squeeze(0)
    Q = model.get_Q(states)

    # the update target contains Q_next, but for stable learning we use prime network for this
    next_states_prime = model_prime.get_state(to_tensor(next_obs)).squeeze(0)
    next_Q_prime = model_prime.get_Q(next_states_prime)  # detach?

    # Additionally, we need the beta probabilities of the next state
    next_states = model.get_state(to_tensor(next_obs)).squeeze(0)
    next_termination_probs = model.get_terminations(next_states).detach()
    next_options_term_prob = next_termination_probs[batch_idx, options]


    # Now we can calculate the update target gt
    gt = rewards + masks * args.gamma * \
        ((1 - next_options_term_prob) * next_Q_prime[batch_idx, options] + next_options_term_prob  * next_Q_prime.max(dim=-1)[0])

    # to update Q we want to use the actual network, not the prime
    td_err = (Q[batch_idx, options] - gt.detach()).pow(2).mul(0.5).mean()
    return td_err
Esempio n. 6
0
    def test_goal2(self):
        """
        This method tests goal 2 i.e. whether f(x, z) is monotonically increasing in z for any
        given x.
        """

        x_goal = to_tensor(
            self.x_test[np.random.choice(self.x_test.shape[0], 10)],
            self.device)
        # x_goal = to_tensor(
        #    sample_random(self.x_range_test, 10, self.x_space_size), self.device,
        # )
        z_goal = torch.sort(
            to_tensor(sample_random(self.z_range, 15, self.z_space_size),
                      self.device),
            dim=0,
        )[0]
        # Get the z-sample predictions for every test data point.
        y_predict_mat = self.model.get_z_sample_preds(
            x_pt=x_goal,
            z_samples=z_goal,
        )

        y_predict_mat_d = y_predict_mat.cpu().detach().numpy()

        ascending = np.all(y_predict_mat_d[:-1] <= y_predict_mat_d[1:])

        return ascending
def actor_loss(obs, option, logp, entropy, reward, done, next_obs, model,
               model_prime, args):
    state = model.get_state(to_tensor(obs))
    next_state = model.get_state(to_tensor(next_obs))
    next_state_prime = model_prime.get_state(to_tensor(next_obs))

    option_term_prob = model.get_terminations(state)[:, option]
    next_option_term_prob = model.get_terminations(
        next_state)[:, option].detach()

    Q = model.get_Q(state).detach().squeeze()
    next_Q_prime = model_prime.get_Q(next_state_prime).detach().squeeze()

    # Target update gt
    gt = reward + (1 - done) * args.gamma * \
        ((1 - next_option_term_prob) * next_Q_prime[option] + next_option_term_prob  * next_Q_prime.max(dim=-1)[0])

    pdb.set_trace()

    # The termination loss
    termination_loss = option_term_prob * (Q[option].detach() - Q.max(
        dim=-1)[0].detach() + args.termination_reg) * (1 - done)

    # actor-critic policy gradient with entropy regularization
    policy_loss = -logp * (gt.detach() -
                           Q[option]) - args.entropy_reg * entropy
    actor_loss = termination_loss + policy_loss
    return actor_loss
Esempio n. 8
0
def synthesize(model, gan_type, code):
    """Synthesizes an image with the give code."""
    if gan_type == 'pggan':
        image = model(to_tensor(code))['image']
    elif gan_type in ['stylegan', 'stylegan2']:
        image = model.synthesis(to_tensor(code))['image']
    image = postprocess(image)[0]
    return image
Esempio n. 9
0
    def __init__(self, states: np.ndarray, actions: np.ndarray, old_log_probs: np.ndarray, rewards: np.ndarray):
        self.states = states
        self.actions = actions
        self.old_log_probs = old_log_probs
        self.rewards = rewards

        self.states_t = to_tensor(self.states)
        self.actions_t = to_tensor(self.actions)
        self.old_log_probs_t = to_tensor(self.old_log_probs)
        self.rewards_t = to_tensor(self.rewards)
Esempio n. 10
0
 def train(self):
     self.epi_rewards = []
     self.epi_losses = []
     
     for epi in range(self.conf['num_episode']):  # episodes
         print("--- episode %s ---"%(epi))
         epi_reward = 0.0
         state = self.env.reset()                    # [2*num_plant, 1]
         state_ts = to_tensor(state).unsqueeze(0)    # [1, 2*num_plant, 1] # unsqueeze(0) on 'state' is necessary for reply memory
         dataPlot = dataPlotter_v2(self.conf)
         
         t = P.t_start
         while t < P.t_end:        # one episode (simulation)
             t_next_plot = t + P.t_plot
             while t < t_next_plot:  # data plot period
                 if round(t,3)*1000 % 10 == 0: # every 10 ms, schedule udpate
                     
                     action = self.agent.select_action(state_ts)     # action type: tensor [1X1]
                     next_state, reward, done, info = self.env.step(action.item(), t) # shape of next_state : [(2*num_plant) X 1]
                     epi_reward += reward
                     # self.env.step(0, t)    # test for env.step() function
                     
                     if done: 
                         next_state_ts = None
                         break
                     else:
                         next_state_ts = to_tensor(next_state).unsqueeze(0)  # [1, 2*num_plant, 1]
                     reward_ts = to_tensor(np.asarray(reward).reshape(-1))   # it's size should be [1] for reply buffer
                     
                     # memory push
                     self.memory.push_transition(state_ts, action, next_state_ts, reward_ts)
                     
                     state_ts = next_state_ts
                     
                     # model optimization step
                     currLoss = self.agent.optimization_model(self.memory)
                 else:   # every 1 ms
                     self.env.update_plant_state(t) # plant status update
                 t = t + P.Ts
             # self.update_dataPlot(dataPlot, t) # update data plot
             if next_state_ts == None:   # episode terminates
                 dataPlot.close()
                 break
         
         # episode done
         self.epi_rewards.append(epi_reward)
         self.epi_losses.append(currLoss)
         # The target network has its weights kept frozen most of the time
         if epi % self.conf['target_update'] == 0:
             self.agent.scheduler_target.load_state_dict(self.agent.scheduler.state_dict())
 
     # Save satet_dict
     torch.save(self.agent.scheduler.state_dict(), MODEL_PATH)
     self.save_log()
     self.load_log()
Esempio n. 11
0
    def __init__(self, z_samples, datasets, device):

        self.less_than_ratios = z_samples.less_than_ratios

        self.y_test = datasets.y_test
        self.x_test = datasets.x_test

        self.y_test_pt = to_tensor(datasets.y_test, device)
        self.x_test_pt = to_tensor(datasets.x_test, device)
        self.x_orderings_pt = [
            torch.sort(self.x_test_pt[:, i])[1]
            for i in range(self.x_test_pt.shape[1])
        ]
Esempio n. 12
0
    def _compute_advantage_estimates(self, trajectories):
        states = []
        actions = []
        returns = []
        log_probs = []
        advantages = []

        for traj in trajectories:
            advantage = 0
            return_ = traj[-1].value

            for i in reversed(range(len(traj) - 1)):
                exp = traj[i]
                reward = np.asarray([exp.reward])
                done = np.asarray([exp.done])
                return_ = reward + self._gamma * (1 - done) * return_
                next_value = traj[i + 1].value

                # Generalized Advantage Estimation
                # δᵗ = rᵗ + γ * V(sᵗ⁺ⁱ) − V(sᵗ)
                # Â(∞)= ∑ γˡ * δᵗ
                delta = reward + self._gamma * (1 -
                                                done) * next_value - exp.value
                advantage = advantage * (self._gae_lambda *
                                         self._gamma) * (1 - done) + delta

                states.append(exp.state)
                actions.append(exp.action)
                returns.append(return_)
                log_probs.append(exp.log_prob)
                advantages.append(advantage)

        # Required only for debugging
        states = list(reversed(states))
        actions = list(reversed(actions))
        returns = list(reversed(returns))
        log_probs = list(reversed(log_probs))
        advantages = list(reversed(advantages))

        states = to_tensor(states)
        actions = to_tensor(actions)
        returns = to_tensor(returns)
        log_probs = to_tensor(log_probs)
        advantages = to_tensor(advantages)

        # Normalize the advantages
        advantages = (advantages -
                      torch.mean(advantages)) / torch.std(advantages)

        return states, actions, returns, log_probs, advantages
Esempio n. 13
0
def main():
	global words
	global word2idx
	test_file = restaurant_test_file
	
	# Load Model
	path = "Saved_Models/PD8/10epoch_5"
	model = torch.load(path)
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	model = model.to(device)

	print("\nTest Set :")
	#test_vocab = file_stats(test_file, test_file=True)

	with open("test_pd_data.pickle", "rb") as f:
		test_vocab = pickle.load(f)
	file_stats(test_vocab, return_vocab=False)

	words, word2idx = create_word_vocab(test_vocab, words, word2idx)
	weights_matrix = create_weights(words, word2idx)

	test_vocab  = to_tensor(test_vocab, word2idx)
	test_set  = vocabDataset(test_vocab)
	test_batch  = DataLoader(test_set, batch_size=32, shuffle=True)

	acc, cm = model.test(test_batch, weights_matrix)
	print(cm)	 
	print(f'Test Acc: {acc*100:.2f}%')
Esempio n. 14
0
def optimize_style(source_image, model, model_name, gan_type, dlatent,
                   iteration):
    resolution = parse_resolution(model_name)

    img = image_reader(source_image, resize=resolution)  # (1,3,1024,1024) -1~1
    img = img.to(device)

    MSE_Loss = nn.MSELoss(reduction="mean")

    img_p = img.clone()  # Perceptual loss 用画像
    upsample2d = torch.nn.Upsample(scale_factor=256 / resolution,
                                   mode="bilinear")  # VGG入力のため(256,256)にリサイズ
    img_p = upsample2d(img_p)

    perceptual_net = VGG16_for_Perceptual(n_layers=[2, 4, 14, 21]).to(device)
    w = to_tensor(dlatent).requires_grad_()
    optimizer = optim.Adam({w}, lr=0.01, betas=(0.9, 0.999), eps=1e-8)

    for i in progress_bar(range(iteration)):
        optimizer.zero_grad()
        synth_img = forward(model, gan_type, w)
        synth_img = (synth_img + 1.0) / 2.0
        mse_loss, perceptual_loss = caluclate_loss(synth_img, img,
                                                   perceptual_net, img_p,
                                                   MSE_Loss, upsample2d)
        loss = mse_loss + perceptual_loss
        loss.backward()
        optimizer.step()

    return w.detach().cpu().numpy()
Esempio n. 15
0
    def forward(self, x):
        if isinstance(x, tuple):
            im, gt = x
        else:
            im = x
            gt = None

        assert im.size(0) == 1, 'only single element batches supported'
        feats = self.features(to_tensor(im))
        roi_boxes, rpn_prob, rpn_loss = self.rpn(im, feats, gt)

        if gt is not None:
            # append gt boxes and sample fg / bg boxes
            # proposal_target-layer.py
            all_rois, frcnn_labels, roi_boxes, frcnn_bbox_targets = self.frcnn_targets(
                roi_boxes, im, gt)
        # r-cnn
        regions = self.roi_pooling(feats, roi_boxes)
        scores, bbox_pred = self.classifier(regions)
        boxes = self.bbox_reg(roi_boxes, bbox_pred, im)
        # apply cls + bbox reg loss here
        if gt is not None:
            frcnn_loss = self.frcnn_loss(scores, bbox_pred, frcnn_labels,
                                         frcnn_bbox_targets)
            loss = frcnn_loss + rpn_loss
            return loss, scores, boxes
        return scores, boxes
Esempio n. 16
0
    def __init__(self, args):
        super().__init__()
        self.lr = args.learning_rate
        self.z_dim = 100
        self.eval_interval = 100
        self.eval_size = 16
        self.data_dir = args.data_dir
        self.device = "cpu" if args.no_cuda else "cuda"
        # evaluate generator based pn fixed noise during training
        self.fixed_z = to_tensor(
            np.random.normal(0, 1, size=(self.eval_size, self.z_dim)),
            False).to(self.device)

        self.label_smooth = args.label_smooth
        self.G_loss = []
        self.D_loss = []

        self.path = args.path
        self.batch_size = args.batch_size
        self.checkpoint_path = os.path.join(self.path, "checkpoint")
        self.images_path = os.path.join(self.path, "images")
        self.train_images_path = os.path.join(self.images_path, "train_images")
        self.val_images_path = os.path.join(self.images_path, "val_images")
        make_dirs(self.checkpoint_path, self.train_images_path,
                  self.val_images_path)
Esempio n. 17
0
 def predict(self, x):
     #self.load_model('nn_model.chp')
     x = x.to_numpy()
     x_tensor = to_tensor(x)
     ypred = self.model(x_tensor)
     ypred = to_numpy(ypred)
     return ypred.reshape(-1)
Esempio n. 18
0
def run_cmu_test_instance(model, HouseImg, HouseData, idx, sep, use_cuda=True):
    NofNodes = 30
    PT1 = np.copy(HouseData[idx]).astype(np.float32)
    PT2 = np.copy(HouseData[idx + sep]).astype(np.float32)
    gTruth = np.random.permutation(NofNodes)
    PT1 = PT1[gTruth, :]

    pt1 = pc_normalize(PT1)
    pt2 = pc_normalize(PT2)
    pt1 = rotate_pt(pt1)

    nn_idx1 = knn(pt1, 8)
    nn_idx2 = knn(pt2, 8)
    mask = np.asarray([1.0] * pt1.shape[0]).astype(np.float32)

    with torch.no_grad():
        pt1, pt2, nn_idx1, nn_idx2, mask = to_tensor(
            [pt1, pt2, nn_idx1, nn_idx2, mask], use_cuda)
        feature1 = model(pt1.permute(0, 2, 1), nn_idx1.contiguous(), mask)
        feature2 = model(pt2.permute(0, 2, 1), nn_idx2.contiguous(), mask)
        sim = torch.bmm(feature1.permute(0, 2, 1), feature2)

    cost = -sim[0].cpu().numpy()
    row_ind, col_ind = linear_sum_assignment(cost)
    acc = ComputeAccuracyPas(col_ind, gTruth, NofNodes)
    return acc
Esempio n. 19
0
    def quantify_model(self):
        modules = list(self.model.named_modules())
        for i, ind in enumerate(self.compressible_idx):
            layer_name = modules[ind][0]
            layer = modules[ind][1]
            for name, param in layer.named_parameters():
                param_name = '{}.{}_{}bit'.format(layer_name, name,
                                                  self.strategy_bits[i])
                # self.log_writer.write(param_name + '\n')
                # self.log_writer.flush()

                if not self.compressed_params.__contains__(
                        param_name):  # quantify parameters
                    num = 2**self.strategy_bits[i]
                    # if not type(num) == int:
                    # print("num is not integer")
                    # print(type(num))

                    if self.quantify_kind == 'linear':
                        min = param.data.min()
                        unit = (param.data.max() - min) / num
                        index = (param.data - min) / unit
                        index = index.int()
                        index[index == num] -= 1
                    else:
                        tmp_param = param.data.view(-1)
                        if num >= len(tmp_param):  # do nothing
                            index = to_tensor(np.array(range(
                                len(tmp_param)))).int()
                            index = index.view(param.data.shape)
                        else:  # k-means
                            estimator = KMeans(n_clusters=num)
                            estimator.fit(tmp_param.reshape(-1, 1).cpu())
                            index = torch.Tensor(estimator.labels_).int()
                            index = index.view(param.data.shape)

                    center = [
                        param.data[index == i].mean().item()
                        for i in range(num)
                    ]
                    self.compressed_params[param_name +
                                           'c'] = to_tensor(center)
                    self.compressed_params[param_name + 'i'] = index

                idx = self.compressed_params[param_name + 'i'].long()
                param.data.copy_(self.compressed_params[param_name + 'c'][idx])
Esempio n. 20
0
    def episode_init(self, state):
        state = utils.to_tensor(state).view((1, ) + state.shape)

        action = self.policy(state)
        self.next_action = action
        self.next_state = state

        return action.cpu().numpy()
    def __getitem__(self, index):
        """
        returns one Phoneme (1, 1, 40, #f
        """

        article_torch = utils.to_tensor((X_data[index]))

        dict_ = {'article': article_torch}

        return dict_
Esempio n. 22
0
    def update_end(self, reward):
        current_action = self.next_action
        current_state = self.next_state

        current_state_value = self.critic.estimate_state(current_state)
        td_target = utils.to_tensor([[float(reward)]])
        td_error = td_target - current_state_value

        self.actor.update(current_state, current_action, td_error)
        self.critic.update(current_state_value, td_target)
Esempio n. 23
0
 def select_action(self, state, apply_noise=False):
     self.eval()
     action = to_numpy(self.actor(to_tensor(np.array([state]), device=device))).squeeze(0)
     self.train()
     if apply_noise:
         action = action + self.noise.sample()
     action = np.clip(action, -1., 1.)
     self.last_action = action
     #print('action:', action, 'output:', action.argmax())
     return action.argmax() if self.discrete else action
Esempio n. 24
0
    def update(self, state, reward, done):

        state = utils.to_tensor(state).view((1, ) + state.shape)
        next_action = -1
        if not done:
            next_action = self.update_step(state, reward)
        if done:
            self.update_end(reward)

        return next_action
Esempio n. 25
0
def main():
    runs = 8
    # ds = RandomDataset()
    # ds = ImageDataset()
    ds = TextDataset()
    # ds = RecommendationDataset()
    k = ds.k()

    objs = [0] * 5
    times = [0] * 5
    names = [None] * 5
    for run in range(runs):
        step = ds.step(run)
        # M = torch.from_numpy(ds.generate(run)).float().to(device)
        M, E = ds.generate(run)
        M = utils.to_tensor(M)
        E = utils.to_tensor(E)
        # M = ds.generate(run)

        print(M.shape)

        algorithms: List[Optimizer] = [
            SPGD(M, k, step, True, 0.1),
            MultUpdate(M, k),
            BlockCoordinate(M, k, step, False),
            AO_ADMM(M, k),
            LeastSquares(M, k)
        ]
        for i, opt in enumerate(algorithms):
            print(opt.name())
            names[i] = opt.name()
            start_time = time.time()
            W, H = opt.optimize(E, ds.iterations())
            print()
            objs[i] += utils.objective(M, W, H, E)
            elapsed = time.time() - start_time
            times[i] += elapsed
            print("time", elapsed)
            ds.postprocess(W, H, run, opt.short_name())

    print(names)
    print(objs)
    print(times)
Esempio n. 26
0
    def __getitem__(self, idx):
        # image = utils.load_image(self.images[idx])
        image = self.images[idx]
        label = utils.to_tensor(self.labels[idx], torch.long)

        if self.transforms is not None:
            t_image = self.transforms(image)
            image = resize_transform(image)

        return t_image, label, np.array(image)
Esempio n. 27
0
    def predict_action(self, state):  # return an action
        action_values = self(state)
        actions_chosen = list()

        for idx in range(0, self.action_dim * 3, self.action_dim):
            probabilities = self.softmax_dim_0(action_values[idx:idx +
                                                             self.action_dim])
            dis = torch.distributions.Categorical(probs=probabilities)
            actions_chosen.append(dis.sample())

        return utils.to_tensor(actions_chosen).long()
Esempio n. 28
0
    def select_action(self, state):
        action_prob = to_numpy(self.actor(to_tensor(state.reshape(
            1, -1)))).squeeze(0)
        dice = stats.rv_discrete(
            values=(range(self.action_start, self.action_end + 1),
                    action_prob))
        action = dice.rvs(size=1)

        # print(action_prob)
        # print('select action: {}'.format(action[0]))
        return action[0]
Esempio n. 29
0
 def feature_extraction(self, x: List[List]):
     # Batch_iter sorts the sentences.
     sents_tensor = to_tensor(x, device=self.device) # (bs, max_seq_len)
     seq_lengths = torch.tensor([len(sent) for sent in x])
     masks = generate_sent_masks(sents_tensor, seq_lengths).to(self.device)
     x = self.embedding(sents_tensor)
     x = self.dropout(x)
     x = pack_padded_sequence(x, lengths=seq_lengths, batch_first=True)
     x, _ = self.lstm(x)
     x, _ = pad_packed_sequence(x, batch_first=True)
     return x, masks
Esempio n. 30
0
    def update_policy(self):
        # Sample batch
        state_batch, action_batch, reward_batch, \
        next_state_batch, terminal_batch = self.memory.sample_and_split(self.batch_size)

        # normalize the reward
        batch_mean_reward = np.mean(reward_batch)
        if self.moving_average is None:
            self.moving_average = batch_mean_reward
        else:
            self.moving_average += self.moving_alpha * (batch_mean_reward -
                                                        self.moving_average)
        reward_batch -= self.moving_average
        # if reward_batch.std() > 0:
        #     reward_batch /= reward_batch.std()

        # Prepare for the target q batch
        with torch.no_grad():
            next_q_values = self.critic_target([
                to_tensor(next_state_batch),
                self.actor_target(to_tensor(next_state_batch)),
            ])

        target_q_batch = to_tensor(reward_batch) + \
                         self.discount * to_tensor(terminal_batch.astype(np.float)) * next_q_values

        # Critic update
        self.critic.zero_grad()

        q_batch = self.critic(
            [to_tensor(state_batch),
             to_tensor(action_batch)])

        value_loss = criterion(q_batch, target_q_batch)
        value_loss.backward()
        self.critic_optim.step()

        # Actor update
        self.actor.zero_grad()

        policy_loss = -self.critic(
            [to_tensor(state_batch),
             self.actor(to_tensor(state_batch))])

        policy_loss = policy_loss.mean()
        policy_loss.backward()
        self.actor_optim.step()

        # Target update
        self.soft_update(self.actor_target, self.actor)
        self.soft_update(self.critic_target, self.critic)
Esempio n. 31
0
    def __getitem__(self, idx):
        image = utils.load_image(self.images[idx])
        # image = self.images[idx]
        label = utils.to_tensor(self.labels[idx], torch.long)

        if self.transforms is not None:
            image = self.transforms(image)

        if self.output_name:
            return image, label, self.images[idx]

        return image, label
Esempio n. 32
0
def main():
	global words
	global word2idx
	print("ABSA ATAE-LSTM")
	#train_file = restaurant_train_file
	train_file = pd_data
	test_file = restaurant_test_file

	print("Overall Train Data :")
	vocab = file_stats(train_file, pd_data=True) # Return data without 'conflict' and print stats
	# Shuffle the data
	p = np.random.permutation(len(vocab))
	vocab = np.array(vocab)[p]

	#test_vocab = file_stats(test_file, pd_data=False)

	# Create word vocab and weights for embedding
	words, word2idx = create_word_vocab(vocab, words, word2idx)
	#words, word2idx = create_word_vocab(test_vocab, words, word2idx)

	#################################################################################
	######################### Train/ Val Split ######################################
	#################################################################################

	print("Splitting training data in 80/20 for train/valid")
	train_len = int(0.8 * len(vocab))
	valid_len = int(1 * len(vocab))
	train_vocab = vocab[:train_len]
	valid_vocab = vocab[train_len:valid_len]
	#test_vocab = vocab[valid_len:]

	"""
	with open("train_pd_data.pickle", "wb") as f:
		pickle.dump(train_vocab, f)
	with open("valid_pd_data.pickle", "wb") as f:
		pickle.dump(valid_vocab, f)
	with open("test_pd_data.pickle", "wb") as f:
		pickle.dump(test_vocab, f)"""

	print("\nTraining set :")
	file_stats(train_vocab, return_vocab=False)

	print("\nValidation set :")
	file_stats(valid_vocab, return_vocab=False)

	# Weight matrix

	weights_matrix = create_weights(words, word2idx)
	#print(weights_matrix.shape)

	#train_vocab = vocab
	#valid_vocab = test_vocab

	train_vocab = to_tensor(train_vocab, word2idx)
	valid_vocab = to_tensor(valid_vocab, word2idx)

	train_set = vocabDataset(train_vocab)
	valid_set = vocabDataset(valid_vocab)

	labels = []
	for data in train_set:
		labels.append(data['sentiment'])
	
	labels = torch.stack(labels)

	valid_batch = DataLoader(valid_set, batch_size=32, shuffle=True)

	#b_sampler = ATAE_BatchSampler(labels, batch_size=32)
	weights=np.array([1.5e-05,0.0002,1.8e-05])
	label_weights = weights[labels.numpy()]
	sampler = torch.utils.data.WeightedRandomSampler(weights= label_weights, num_samples=len(label_weights),replacement=True)
	train_batch = DataLoader(train_set, batch_size=32, shuffle=False, sampler=sampler,batch_sampler=None)
	#train_batch = DataLoader(train_set, batch_sampler=b_sampler)

	hidden_dim = 300
	output_dim = 3 # positive, negative, neutral
	dropout = 0.5

	model = ATAE_LSTM(weights_matrix, hidden_dim, output_dim, dropout, words, word2idx)
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	model = model.to(device)

	optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.01, weight_decay=1e-3)
	#optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr = 0.1, momentum=0.9, weight_decay=1e-5)
	
	##########################################################################################
	#####################  L2 only on weights   ##############################################
	##########################################################################################

	"""
	L2_params_id = [id(p) for p in model.Wp] + [id(p) for p in model.Wx] + [id(p) for p in model.fc.weight] + [id(p) for p in model.attn.weight]
	L2_params = [p for p in model.parameters() if id(p) in L2_params_id and p.requires_grad]
	base_params = [p for p in model.parameters() if id(p) not in L2_params_id and p.requires_grad]

	optimizer = optim.Adam([
			{'params': base_params},
			{'params': L2_params, 'weight_decay': 1e-3},
			], lr=0.01)
	"""

	weighted_loss = torch.tensor([1,1,1]).float()
	criterion = nn.CrossEntropyLoss(weight=weighted_loss.cuda())

	for epoch in range(100):		
		t_loss, t_acc, cm1 = model.train_(train_batch, optimizer, criterion)
		v_loss, v_acc, cm2 = model.eval_(valid_batch, criterion)

		log_value('loss/train_loss', t_loss, epoch)
		log_value('loss/valid_loss', v_loss, epoch)
		log_value('acc/train_acc', t_acc, epoch)
		log_value('acc/valid_acc', v_acc, epoch)

		print(cm1)
		print(cm2)
		print(f'Epoch: {epoch+1:02}, Train Loss: {t_loss:.8f}, Train Acc: {t_acc*100:.2f}%, \
			Val. Loss: {v_loss:.8f}, Val. Acc: {v_acc*100:.2f}%')

		path = "Saved_Models/PD8/10epoch_" + str(epoch)
		torch.save(model, path)


	path = "Saved_Models/pd_10"
	torch.save(model, path)
	print("Training done")
	print("Model saved at ", path)