Esempio n. 1
0
def RewardSave(request, vendor_id):
	vendor = get_object_or_404(Vendor, pk = vendor_id)
	new_reward = Reward(
			owner = request.user,
			vendor = vendor,
			title = request.POST['reward_title'],
			description = request.POST['reward_description'],
			image_url = request.POST['reward_image_url'],
			website_url = request.POST['reward_website_url'],
			cost = float(request.POST['reward_cost'])
			)
	new_reward.save()
	return redirect('rewards-home')
Esempio n. 2
0
def reward(id):
        if request.method == 'GET':
		rewards = Reward.objects(itemid=id)
		return get_items(request, rewards)
	if request.method == 'POST':
		item = Reward.objects(itemid=id).first()
		print 'Fetched Item'
	        if request_has_json():
			item.description =  request.json['description']
	                item.save()
			return 'Updated'
                else:
                        form = RewardForm(request.form)
		        form.populate_obj(item)
			print 'Populated Item from Form'
			print item.description
			item.save()
			return render_template('list_items.html',items=[item])
Esempio n. 3
0
 def cache_Reward(self, rid):
     #returns {user:rewards}
     r = Reward.by_rid(rid) 
     key = 'reward_%s' %(rid)
     #activeRewards_cache[uid] = r
     #cacheEntry = r #activeRewards_cache
     memcache.add(key, r)#cacheEntry)
     result = r
     #returns the reward obj. dict
     return result
Esempio n. 4
0
def user_rewards(user_id):
    # Get the user account relevant to the user_id parameter
    user = User.query.get(user_id)
    # Verify the logged in user is actually the user or an attached user.
    verify_user(user)

    if request.method == 'GET':
        # Return the user's rewards in JSON format.
        return jsonify(rewards=[r.serialize() for r in user.rewards])
    elif request.method == 'POST':
        required_json = ['name', 'cost']
        json = request.json

        if not valid_json(json, required_json):
            abort(400)

        reward = Reward(name=json.get('name'), cost=json.get('cost'), user_id=user_id)

        db.session.add(reward)
        db.session.commit()

        notify_if_partner("A new reward is available in the store!")

        return jsonify(reward.serialize()), 201
Esempio n. 5
0
 def post(self, uid, rid):
     #link reward to user
     #    get rid data
     #    create linkedReward object
     #    write object to CACHE
     #    return 200, ok to client
     
     #get reward info from DB...Change to cache hit
     
     #use rid to pull up reward info
     reward = Reward.by_rid(rid)
     logging.error("got reward from db")
     for r in reward:
         mid = r.mid
         type = r.type
         summary = r.summary
         details = r.details
         fineprint = r.fineprint
         discount = r.discount
         credit = r.credit
         l_amount = r.l_amount
         l_visits = r.l_visits
         l_maxNumber = r.l_maxNumber
         exp = r.exp
     #insert active reward into datastore    
     r = ActiveReward.register(uid, rid, mid, type, summary, details, fineprint, discount, credit, l_amount, l_visits, l_maxNumber, exp)
     #update cache
     if r:
         message = {
             'status': 200,
             'message': 'linked reward and updated cache',
         }    
         resp = jsonify(message)
         resp.status_code = 200
         
         #check for object in cache
         key = "activeRewards_%s" %(uid)
         activeRewards_cache = memcache.get(key)
         if activeRewards_cache:
             #if found, delete object to force a db read next get request
             memcache.delete(key)
             return resp
         else:
             return resp
     else:
         #if no r linking failed
         pass
Esempio n. 6
0
def rewards():
	if request.method == 'POST':
		form = RewardForm(request.form)
		if request_has_json():
		  try:
			  json_data = json.dumps(request.json,default=json_util.default)
		          model = Reward.from_json(json_data)
			  model.save()
		  except ValidationError as e:
		         return jsonify(item=str(e))
		else:
		 if form.validate():
		   form.save()
	         else:
                   return render_template('add_reward.html',form=form)
		return list_rewards()
	else:
   		return list_rewards()
Esempio n. 7
0
 def post(self, mid):
     # creates a reward
     # @param mid: merchant id 
     # create rid
     
     data = request.json
     rid = new_id()
     mid = mid
     type = data['type']
     summary = data['summary']
     details = data['details']
     fineprint = data['fineprint']
     discount = data['discount']
     credit = data['credit']
     l_amount = data['l_amount']
     l_visits = data['l_visits']
     l_maxNumber = data['l_maxNumber']
     exp = data['exp']
     
     r = Reward.register(rid, mid, type, summary, details, fineprint, discount, credit, l_amount, l_visits, l_maxNumber, exp)
     
     if r:
         message = {
         'status': 200,
         'message': "reward created",
         }
         resp = jsonify(message)
         resp.status_code = 200
     
         return resp
     else:
         message = {
             'status': 404,
             'message': 'bad data',
         }
         resp = jsonify(message)
         resp.status_code = 404
     
         return resp
Esempio n. 8
0
def stages(jobid):
    if request.method == 'GET':
        job = Job.objects(jobid=int(jobid)).first()
        stages = Stage.objects(job=job)
        itemLst = [dict(stage.to_mongo()) for stage in stages]
        return mongodoc_jsonify(items=itemLst)
    if request.method == 'POST':
        if request_has_json():
            try:
                job = Job.objects(jobid=int(jobid)).first()
                reward = Reward.objects(itemid=request.json['reward']).first()
                model = Stage()
                model.job = job
                model.reward = reward
                model.rewardDesc = reward.itemid
                model.jobDesc = str(job.jobid)
                model.stage = request.json['stage']
                model.save()
                model.reload()
                return mongodoc_jsonify(item=model.to_mongo())
            except ValidationError as e:
                return jsonify(item=str(e))
Esempio n. 9
0
def rewards():
    if 'userid' in session:
        user = User.query.get_or_404(session['userid'])

        if request.method == 'POST':
            form = RewardForm(request.form)
            if form.validate():
                print form.cost.data
                newreward = Reward(form.title.data, user, None, form.cost.data)
                db.session.add(newreward)
                db.session.commit()
                return redirect(url_for('rewards'))
            else:
                rewards = user.rewards.filter(
                    Reward.redeemed == False).order_by(desc(
                        Reward.dateadded)).all()
                return render_template('rewards.html',
                                       username=user.username,
                                       points=user.points,
                                       rewards=rewards,
                                       form=form,
                                       idform=RewardIdForm(),
                                       updateform=RewardUpdateForm(),
                                       containerclass="rewardcontainer")

        else:
            rewards = user.rewards.filter(Reward.redeemed == False).order_by(
                desc(Reward.dateadded)).all()
            return render_template('rewards.html',
                                   username=user.username,
                                   points=user.points,
                                   rewards=rewards,
                                   form=RewardForm(),
                                   idform=RewardIdForm(),
                                   updateform=RewardUpdateForm(),
                                   containerclass="rewardcontainer")
    # if no session, display default home page. TODO: convert to splash page
    else:
        return redirect(url_for('front'))
Esempio n. 10
0
def create_new_reward(current_user):
    create_table = Reward(author = current_user, coins = 0)
    db.session.add(create_table)
    db.session.commit()
Esempio n. 11
0
class InfoGAIL(BaseGAIL):
    def __init__(self,
                 args,
                 logger,
                 state_size=2,
                 action_size=4,
                 context_size=1,
                 num_goals=4,
                 history_size=1,
                 dtype=torch.FloatTensor):
        super(InfoGAIL, self).__init__(args,
                                       logger,
                                       state_size=state_size,
                                       action_size=action_size,
                                       context_size=context_size,
                                       num_goals=num_goals,
                                       history_size=history_size,
                                       dtype=dtype)

        # Create networks
        self.policy_net = Policy(state_size=state_size * history_size,
                                 action_size=0,
                                 latent_size=context_size,
                                 output_size=action_size,
                                 hidden_size=64,
                                 output_activation='sigmoid')
        self.old_policy_net = Policy(state_size=state_size * history_size,
                                     action_size=0,
                                     latent_size=context_size,
                                     output_size=action_size,
                                     hidden_size=64,
                                     output_activation='sigmoid')

        # Use value network for calculating GAE. We should use this for
        # training the policy network.
        if args.use_value_net:
            # context_size contains num_goals
            self.value_net = Value(state_size * history_size + context_size,
                                   hidden_size=64)

        # Reward net is the discriminator network. Discriminator does not
        # receive the latent vector in InfoGAIL.
        self.reward_net = Reward(
            state_size * history_size,
            action_size,  # action size
            0,  # latent size
            hidden_size=64)

        self.posterior_net = DiscretePosterior(
            state_size=state_size * history_size,  # state
            action_size=0,  # action
            latent_size=0,  # context
            hidden_size=64,
            output_size=num_goals)

        self.opt_policy = optim.Adam(self.policy_net.parameters(), lr=0.0003)
        self.opt_reward = optim.Adam(self.reward_net.parameters(), lr=0.0003)
        self.opt_value = optim.Adam(self.value_net.parameters(), lr=0.0003)
        self.opt_posterior = optim.Adam(self.posterior_net.parameters(),
                                        lr=0.0003)

        # Create loss functions
        self.criterion = nn.BCELoss()
        self.criterion_posterior = nn.CrossEntropyLoss()

        self.create_environment()

    def checkpoint_data_to_save(self):
        return {
            'policy': self.policy_net,
            'value': self.value_net,
            'reward': self.reward_net,
            'posterior': self.posterior_net,
        }

    def load_checkpoint_data(self, checkpoint_path):
        assert os.path.exists(checkpoint_path), \
            'Checkpoint path does not exists {}'.format(checkpoint_path)
        checkpoint_data = torch.load(checkpoint_path)
        self.policy_net = checkpoint_data['policy']
        self.value_net = checkpoint_data['value']
        self.reward_net = checkpoint_data['reward']
        self.posterior_net = checkpoint_data['posterior']

    def update_params_for_batch(self, states, actions, latent_c, targets,
                                advantages, expert_states, expert_actions,
                                optim_batch_size, optim_batch_size_exp,
                                optim_iters):
        '''Update parameters for one batch of data.

        Update the policy network, discriminator (reward) network and the
        posterior network here.
        '''
        args, dtype = self.args, self.dtype
        curr_id, curr_id_exp = 0, 0
        for _ in range(optim_iters):
            curr_batch_size = min(optim_batch_size, actions.size(0) - curr_id)
            curr_batch_size_exp = min(optim_batch_size_exp,
                                      expert_actions.size(0) - curr_id_exp)
            start_idx, end_idx = curr_id, curr_id + curr_batch_size

            state_var = Variable(states[start_idx:end_idx])
            action_var = Variable(actions[start_idx:end_idx])
            latent_c_var = Variable(latent_c[start_idx:end_idx])
            advantages_var = Variable(advantages[start_idx:end_idx])

            start_idx, end_idx = curr_id_exp, curr_id_exp + curr_batch_size_exp
            expert_state_var = Variable(expert_states[start_idx:end_idx])
            expert_action_var = Variable(expert_actions[start_idx:end_idx])

            # Update reward net
            self.opt_reward.zero_grad()

            # Backprop with expert demonstrations
            expert_output = self.reward_net(
                torch.cat((expert_state_var, expert_action_var), 1))
            expert_disc_loss = self.criterion(
                expert_output,
                Variable(
                    torch.zeros(expert_action_var.size(0), 1).type(dtype)))
            expert_disc_loss.backward()

            # Backprop with generated demonstrations
            gen_output = self.reward_net(torch.cat((state_var, action_var), 1))
            gen_disc_loss = self.criterion(
                gen_output,
                Variable(torch.ones(action_var.size(0), 1)).type(dtype))
            gen_disc_loss.backward()

            # Add loss scalars.
            self.logger.summary_writer.add_scalars(
                'loss/discriminator', {
                    'total': expert_disc_loss.data[0] + gen_disc_loss.data[0],
                    'expert': expert_disc_loss.data[0],
                    'gen': gen_disc_loss.data[0],
                }, self.gail_step_count)
            self.opt_reward.step()

            reward_l2_norm, reward_grad_l2_norm = \
                    get_weight_norm_for_network(self.reward_net)
            self.logger.summary_writer.add_scalar('weight/discriminator/param',
                                                  reward_l2_norm,
                                                  self.gail_step_count)
            self.logger.summary_writer.add_scalar('weight/discriminator/grad',
                                                  reward_grad_l2_norm,
                                                  self.gail_step_count)

            # Update posterior net. We need to do this by reparameterization
            # trick.
            predicted_posterior = self.posterior_net(state_var)
            # There is no GOAL info in latent_c_var here.
            # TODO: This 0 and -1 stuff is not needed here. Confirm?
            _, true_posterior = torch.max(latent_c_var.data, dim=1)
            posterior_loss = self.criterion_posterior(predicted_posterior,
                                                      Variable(true_posterior))
            posterior_loss.backward()
            self.logger.summary_writer.add_scalar('loss/posterior',
                                                  posterior_loss.data[0],
                                                  self.gail_step_count)

            # compute old and new action probabilities
            action_means, action_log_stds, action_stds = self.policy_net(
                torch.cat((state_var, latent_c_var), 1))
            log_prob_cur = normal_log_density(action_var, action_means,
                                              action_log_stds, action_stds)

            action_means_old, action_log_stds_old, action_stds_old = \
                    self.old_policy_net(torch.cat(
                        (state_var, latent_c_var), 1))
            log_prob_old = normal_log_density(action_var, action_means_old,
                                              action_log_stds_old,
                                              action_stds_old)

            if args.use_value_net:
                # update value net
                self.opt_value.zero_grad()
                value_var = self.value_net(
                    torch.cat((state_var, latent_c_var), 1))
                value_loss = (value_var - \
                        targets[curr_id:curr_id+curr_batch_size]).pow(2.).mean()
                value_loss.backward()
                self.opt_value.step()

            # Update policy net (PPO step)
            self.opt_policy.zero_grad()
            ratio = torch.exp(log_prob_cur - log_prob_old)  # pnew / pold
            surr1 = ratio * advantages_var[:, 0]
            surr2 = torch.clamp(ratio, 1.0 - self.args.clip_epsilon, 1.0 +
                                self.args.clip_epsilon) * advantages_var[:, 0]
            policy_surr = -torch.min(surr1, surr2).mean()
            policy_surr.backward()
            # torch.nn.utils.clip_grad_norm(self.policy_net.parameters(), 40)
            self.opt_policy.step()
            self.logger.summary_writer.add_scalar('loss/policy',
                                                  policy_surr.data[0],
                                                  self.gail_step_count)

            policy_l2_norm, policy_grad_l2_norm = \
                    get_weight_norm_for_network(self.policy_net)
            self.logger.summary_writer.add_scalar('weight/policy/param',
                                                  policy_l2_norm,
                                                  self.gail_step_count)
            self.logger.summary_writer.add_scalar('weight/policy/grad',
                                                  policy_grad_l2_norm,
                                                  self.gail_step_count)

            # set new starting point for batch
            curr_id += curr_batch_size
            curr_id_exp += curr_batch_size_exp

            self.gail_step_count += 1

    def update_params(self, gen_batch, expert_batch, episode_idx, optim_epochs,
                      optim_batch_size):
        '''Update params for Policy (G), Reward (D) and Posterior (q) networks.
        '''
        args, dtype = self.args, self.dtype

        self.opt_policy.lr = self.args.learning_rate \
            * max(1.0 - float(episode_idx)/args.num_epochs, 0)
        clip_epsilon = self.args.clip_epsilon \
            * max(1.0 - float(episode_idx)/args.num_epochs, 0)

        # generated trajectories
        states = torch.Tensor(np.array(gen_batch.state)).type(dtype)
        actions = torch.Tensor(np.array(gen_batch.action)).type(dtype)
        rewards = torch.Tensor(np.array(gen_batch.reward)).type(dtype)
        masks = torch.Tensor(np.array(gen_batch.mask)).type(dtype)

        ## Expand states to include history ##
        # Generated trajectories already have history in them.

        latent_c = torch.Tensor(np.array(gen_batch.c)).type(dtype)
        values = None
        if args.use_value_net:
            values = self.value_net(Variable(torch.cat((states, latent_c), 1)))

        # expert trajectories
        list_of_expert_states, list_of_expert_actions = [], []
        list_of_masks = []
        for i in range(len(expert_batch.state)):
            ## Expand expert states ##
            expanded_states = self.expand_states_numpy(expert_batch.state[i],
                                                       self.history_size)
            list_of_expert_states.append(torch.Tensor(expanded_states))
            list_of_expert_actions.append(torch.Tensor(expert_batch.action[i]))
            list_of_masks.append(torch.Tensor(expert_batch.mask[i]))

        expert_states = torch.cat(list_of_expert_states, 0).type(dtype)
        expert_actions = torch.cat(list_of_expert_actions, 0).type(dtype)
        expert_masks = torch.cat(list_of_masks, 0).type(dtype)

        assert expert_states.size(0) == expert_actions.size(0), \
                "Expert transition size do not match"
        assert expert_states.size(0) == expert_masks.size(0), \
                "Expert transition size do not match"

        # compute advantages
        returns, advantages = get_advantage_for_rewards(rewards,
                                                        masks,
                                                        self.args.gamma,
                                                        values,
                                                        dtype=dtype)
        targets = Variable(returns)
        advantages = (advantages - advantages.mean()) / advantages.std()

        # Backup params after computing probs but before updating new params
        for old_policy_param, policy_param in zip(
                self.old_policy_net.parameters(),
                self.policy_net.parameters()):
            old_policy_param.data.copy_(policy_param.data)

        # update value, reward and policy networks
        optim_iters = self.args.batch_size // optim_batch_size
        optim_batch_size_exp = expert_actions.size(0) // optim_iters

        # Remove extra 1 array shape from actions, since actions were added as
        # 1-hot vector of shape (1, A).
        actions = np.squeeze(actions)
        expert_actions = np.squeeze(expert_actions)

        for _ in range(optim_epochs):
            perm = np.random.permutation(np.arange(actions.size(0)))
            perm_exp = np.random.permutation(np.arange(expert_actions.size(0)))
            if args.cuda:
                perm = torch.cuda.LongTensor(perm)
                perm_exp = torch.cuda.LongTensor(perm_exp)
            else:
                perm, perm_exp = torch.LongTensor(perm), torch.LongTensor(
                    perm_exp)

            self.update_params_for_batch(
                states[perm], actions[perm], latent_c[perm], targets[perm],
                advantages[perm], expert_states[perm_exp],
                expert_actions[perm_exp], optim_batch_size,
                optim_batch_size_exp, optim_iters)

    def train_gail(self, expert):
        '''Train Info-GAIL.'''
        args, dtype = self.args, self.dtype
        results = {
            'average_reward': [],
            'episode_reward': [],
            'true_traj': {},
            'pred_traj': {}
        }
        self.train_step_count, self.gail_step_count = 0, 0

        for ep_idx in range(args.num_epochs):
            memory = Memory()

            num_steps = 0
            reward_batch, true_reward_batch = [], []
            expert_true_reward_batch = []
            true_traj_curr_episode, gen_traj_curr_episode = [], []

            while num_steps < args.batch_size:
                traj_expert = expert.sample(size=1)
                state_expert, action_expert, _, _ = traj_expert

                # Expert state and actions
                state_expert = state_expert[0]
                action_expert = action_expert[0]
                expert_episode_len = len(state_expert)

                # Sample start state or should we just choose the start state
                # from the expert trajectory sampled above.
                # curr_state_obj = self.sample_start_state()
                curr_state_obj = State(state_expert[0], self.obstacles)
                curr_state_feat = self.get_state_features(
                    curr_state_obj, self.args.use_state_features)

                # Add history to state
                if args.history_size > 1:
                    curr_state = -1 * np.ones(
                        (args.history_size * curr_state_feat.shape[0]),
                        dtype=np.float32)
                    curr_state[(args.history_size-1) \
                            * curr_state_feat.shape[0]:] = curr_state_feat
                else:
                    curr_state = curr_state_feat

                # TODO: Make this a separate function. Can be parallelized.
                ep_reward, ep_true_reward, expert_true_reward = 0, 0, 0
                true_traj, gen_traj = [], []
                gen_traj_dict = {
                    'features': [],
                    'actions': [],
                    'c': [],
                    'mask': []
                }
                disc_reward, posterior_reward = 0.0, 0.0
                # Use a hard-coded list for memory to gather experience since we
                # need to mutate it before finally creating a memory object.

                c_sampled = np.zeros((self.num_goals), dtype=np.float32)
                c_sampled[np.random.randint(0, self.num_goals)] = 1.0
                c_sampled_tensor = torch.zeros((1)).type(torch.LongTensor)
                c_sampled_tensor[0] = int(np.argmax(c_sampled))
                if self.args.cuda:
                    c_sampled_tensor = torch.cuda.LongTensor(c_sampled_tensor)

                memory_list = []
                for t in range(expert_episode_len):
                    action = self.select_action(
                        np.concatenate((curr_state, c_sampled)))
                    action_numpy = action.data.cpu().numpy()

                    # Save generated and true trajectories
                    true_traj.append((state_expert[t], action_expert[t]))
                    gen_traj.append((curr_state_obj.coordinates, action_numpy))
                    gen_traj_dict['features'].append(
                        self.get_state_features(curr_state_obj,
                                                self.args.use_state_features))
                    gen_traj_dict['actions'].append(action_numpy)
                    gen_traj_dict['c'].append(c_sampled)

                    action = epsilon_greedy_linear_decay(action_numpy,
                                                         args.num_epochs * 0.5,
                                                         ep_idx,
                                                         self.action_size,
                                                         low=0.05,
                                                         high=0.3)

                    # Get the discriminator reward
                    disc_reward_t = float(
                        self.reward_net(
                            torch.cat((Variable(
                                torch.from_numpy(curr_state).unsqueeze(
                                    0)).type(dtype),
                                       Variable(
                                           torch.from_numpy(
                                               oned_to_onehot(
                                                   action, self.action_size)).
                                           unsqueeze(0)).type(dtype)),
                                      1)).data.cpu().numpy()[0, 0])

                    if args.use_log_rewards and disc_reward_t < 1e-6:
                        disc_reward_t += 1e-6

                    disc_reward_t = -math.log(disc_reward_t) \
                            if args.use_log_rewards else -disc_reward_t
                    disc_reward += disc_reward_t

                    # Predict c given (x_t)
                    predicted_posterior = self.posterior_net(
                        Variable(torch.from_numpy(curr_state).unsqueeze(
                            0)).type(dtype))
                    posterior_reward_t = self.criterion_posterior(
                        predicted_posterior,
                        Variable(c_sampled_tensor)).data.cpu().numpy()[0]

                    posterior_reward += (self.args.lambda_posterior *
                                         posterior_reward_t)

                    # Update Rewards
                    ep_reward += (disc_reward_t + posterior_reward_t)
                    true_goal_state = [
                        int(x) for x in state_expert[-1].tolist()
                    ]
                    if self.args.flag_true_reward == 'grid_reward':
                        ep_true_reward += self.true_reward.reward_at_location(
                            curr_state_obj.coordinates,
                            goals=[true_goal_state])
                        expert_true_reward += self.true_reward.reward_at_location(
                            state_expert[t], goals=[true_goal_state])
                    elif self.args.flag_true_reward == 'action_reward':
                        ep_true_reward += self.true_reward.reward_at_location(
                            np.argmax(action_expert[t]), action)
                        expert_true_reward += self.true_reward.corret_action_reward
                    else:
                        raise ValueError("Incorrect true reward type")

                    # Update next state
                    next_state_obj = self.transition_func(
                        curr_state_obj, Action(action), 0)
                    next_state_feat = self.get_state_features(
                        next_state_obj, self.args.use_state_features)
                    #next_state = running_state(next_state)

                    mask = 0 if t == expert_episode_len - 1 else 1

                    # Push to memory
                    memory_list.append([
                        curr_state,
                        np.array([oned_to_onehot(action,
                                                 self.action_size)]), mask,
                        next_state_feat, disc_reward_t + posterior_reward_t,
                        c_sampled, c_sampled
                    ])

                    if args.render:
                        env.render()

                    if not mask:
                        break

                    curr_state_obj = next_state_obj
                    curr_state_feat = next_state_feat

                    if args.history_size > 1:
                        curr_state[:(args.history_size-1) \
                                * curr_state_feat.shape[0]] = \
                                curr_state[curr_state_feat.shape[0]:]
                        curr_state[(args.history_size-1) \
                                * curr_state_feat.shape[0]:] = curr_state_feat
                    else:
                        curr_state = curr_state_feat



                assert memory_list[-1][2] == 0, \
                        "Mask for final end state is not 0."
                for memory_t in memory_list:
                    memory.push(*memory_t)

                self.logger.summary_writer.add_scalars(
                    'gen_traj/gen_reward', {
                        'discriminator': disc_reward,
                        'posterior': posterior_reward,
                    }, self.train_step_count)

                num_steps += (t - 1)
                reward_batch.append(ep_reward)
                true_reward_batch.append(ep_true_reward)
                expert_true_reward_batch.append(expert_true_reward)
                results['episode_reward'].append(ep_reward)

                # Append trajectories
                true_traj_curr_episode.append(true_traj)
                gen_traj_curr_episode.append(gen_traj)

            results['average_reward'].append(np.mean(reward_batch))

            # Add to tensorboard
            self.logger.summary_writer.add_scalars(
                'gen_traj/reward', {
                    'average': np.mean(reward_batch),
                    'max': np.max(reward_batch),
                    'min': np.min(reward_batch)
                }, self.train_step_count)
            self.logger.summary_writer.add_scalars(
                'gen_traj/true_reward', {
                    'average': np.mean(true_reward_batch),
                    'max': np.max(true_reward_batch),
                    'min': np.min(true_reward_batch),
                    'expert_true': np.mean(expert_true_reward_batch)
                }, self.train_step_count)

            # Add predicted and generated trajectories to results
            if ep_idx % self.args.save_interval == 0:
                results['true_traj'][ep_idx] = copy.deepcopy(
                    true_traj_curr_episode)
                results['pred_traj'][ep_idx] = copy.deepcopy(
                    gen_traj_curr_episode)

            # Update parameters
            gen_batch = memory.sample()

            # We do not get the context variable from expert trajectories.
            # Hence we need to fill it in later.
            expert_batch = expert.sample(size=args.num_expert_trajs)

            self.update_params(gen_batch, expert_batch, ep_idx,
                               args.optim_epochs, args.optim_batch_size)

            self.train_step_count += 1

            if ep_idx > 0 and ep_idx % args.log_interval == 0:
                print('Episode [{}/{}]  Avg R: {:.2f}   Max R: {:.2f} \t' \
                      'True Avg {:.2f}   True Max R: {:.2f}   ' \
                      'Expert (Avg): {:.2f}'.format(
                          ep_idx, args.num_epochs, np.mean(reward_batch),
                          np.max(reward_batch), np.mean(true_reward_batch),
                          np.max(true_reward_batch),
                          np.mean(expert_true_reward_batch)))

            results_path = os.path.join(args.results_dir, 'results.pkl')
            with open(results_path, 'wb') as results_f:
                pickle.dump((results), results_f, protocol=2)
                # print("Did save results to {}".format(results_path))

            if ep_idx % args.save_interval == 0:
                checkpoint_filepath = self.model_checkpoint_filepath(ep_idx)
                torch.save(self.checkpoint_data_to_save(), checkpoint_filepath)
                print("Did save checkpoint: {}".format(checkpoint_filepath))
Esempio n. 12
0
    def save_draft_to_campaign(self,campaign,draft_data):

        form_campaign_info = CampaignInfoForm.from_json(draft_data["form_campaign_info"])
        form_basics = CampaignBasicForm.from_json(draft_data["form_basics"])
        form_paid = CampaignPaidForm.from_json(draft_data["form_paid"])
        form_bonus_reward = CampaignBonusRewardForm.from_json(draft_data["form_bonus_reward"])
        form_confirmation_message = CampaignConfirmationMessageForm.from_json(draft_data["form_confirmation_message"])

        campaign.campaign_type_id = form_basics.campaign_type_id.data
        campaign.category_id = form_basics.category_id.data
        campaign.campaign_receiver_id = form_basics.campaign_receiver_id.data
        campaign.funding_goal = float(str(form_basics.dollar_amount.data))
        campaign.expiration_date = form_basics.deadline.data
        campaign.fulfillment_service = form_basics.fulfillment_service.data
        campaign.evergreen_campaign_page = form_basics.evergreen_campaign_page.data
        campaign.fulfillment_service = form_basics.fulfillment_service.data
        campaign.campaign_management = form_basics.campaign_management.data

        campaign.description = form_campaign_info.full_description.data
        campaign.short_description = form_campaign_info.description.data
        campaign.title = form_campaign_info.campaign_title.data
        campaign.thumbnail_url = form_campaign_info.thumbnail_url.data

        campaign.confirmation_message = form_confirmation_message.confirmation_message.data

        campaign.vanity_url = urlify(campaign.title)

        for i_reward in campaign.rewards:
            i_reward.is_active=False

        for idx, reward_forms in enumerate(draft_data["form_rewards"]):
            form_reward = CampaignRewardForm.from_json(reward_forms);
            current_reward = None
            for i_reward in campaign.rewards:
                if str(i_reward.id).lower() == str(form_reward.id.data).lower():
                    current_reward = i_reward
                    i_reward.is_active=True
                    break

            if current_reward != None:
                pass
            else:
                current_reward = Reward()
                current_reward.claimed = 0
                current_reward.id = form_reward.id.data
                campaign.rewards.append(current_reward)

            current_reward.ordinal = idx
            current_reward.is_featured = form_reward.is_featured.data
            current_reward.title = form_reward.name.data
            current_reward.inventory = form_reward.quantity.data if form_reward.quantity.data else 0
            current_reward.cost = form_reward.dollar_amount.data
            current_reward.thumbnail_url = form_reward.thumbnail_url.data
            current_reward.description = form_reward.description.data
            current_reward.delivery_date = form_reward.delivery_date.data
            current_reward.is_shipping_required = form_reward.shipping_required.data
            current_reward.is_limited_quantity = form_reward.limited_quantity.data
            current_reward.international_shipping_fee = form_reward.international_shipping_fee.data if form_reward.international_shipping_fee.data else 0
            current_reward.shipping_fee = form_reward.shipping_fee.data if form_reward.shipping_fee.data else 0


            if campaign.bonus_reward == None:
                campaign.bonus_reward = Reward()
                campaign.bonus_reward.campaign = campaign

            campaign.bonus_reward.claimed = 0
            campaign.bonus_reward.referrals_needed = form_bonus_reward.referrals_needed.data
            campaign.bonus_reward.title = form_bonus_reward.name.data
            campaign.bonus_reward.inventory = form_bonus_reward.quantity.data if form_bonus_reward.quantity.data else 0
            campaign.bonus_reward.cost = 0
            campaign.bonus_reward.thumbnail_url = form_bonus_reward.thumbnail_url.data
            campaign.bonus_reward.description = form_bonus_reward.description.data
            campaign.bonus_reward.delivery_date = form_bonus_reward.delivery_date.data
            campaign.bonus_reward.is_shipping_required = form_bonus_reward.shipping_required.data
            campaign.bonus_reward.is_limited_quantity = form_bonus_reward.limited_quantity.data
            campaign.bonus_reward.international_shipping_fee = form_bonus_reward.international_shipping_fee.data if form_bonus_reward.international_shipping_fee.data else 0
            campaign.bonus_reward.shipping_fee = form_bonus_reward.shipping_fee.data if form_bonus_reward.shipping_fee.data else 0

        return campaign
Esempio n. 13
0
# Initialise training environment and experience replay memory
env = GymEnv(cfg)
replay = Replay(cfg, env.action_size)
# Initialise dataset replay with S random seed episodes
for s in range(cfg['seed_episodes']):
    observation = env.reset()
    done = False
    while not done:
        next_observation, action, reward, done = env.step()
        replay.append(observation, action, reward, done)
        observation = next_observation

# Init PlaNet
transition_model = Transition(cfg)
observation_model = Observation(cfg)
reward_model = Reward(cfg)
encoder = Encoder(cfg)

optim = tf.train.AdamOptimizer(cfg['learning_rate'], epsilon=cfg['optim_eps'])
planner = MPCPlanner(cfg, env.action_size, transition_model, reward_model)
global_prior = tf.distributions.Normal(
    tf.zeros([cfg['batch_size'], cfg['state_size']]),
    tf.ones([cfg['batch_size'], cfg['state_size']]))  # Global prior N(0, I)
free_nats = tf.fill(dims=[
    1,
], value=cfg['free_nats'])  # Allowed deviation in KL divergence

# Training
for episode in trange(cfg['train']['episodes']):
    # Model fitting
    losses = []
Esempio n. 14
0
    def save_draft_to_campaign(self, campaign, draft_data):

        form_campaign_info = CampaignInfoForm.from_json(
            draft_data["form_campaign_info"])
        form_basics = CampaignBasicForm.from_json(draft_data["form_basics"])
        form_paid = CampaignPaidForm.from_json(draft_data["form_paid"])
        form_bonus_reward = CampaignBonusRewardForm.from_json(
            draft_data["form_bonus_reward"])
        form_confirmation_message = CampaignConfirmationMessageForm.from_json(
            draft_data["form_confirmation_message"])

        campaign.campaign_type_id = form_basics.campaign_type_id.data
        campaign.category_id = form_basics.category_id.data
        campaign.campaign_receiver_id = form_basics.campaign_receiver_id.data
        campaign.funding_goal = float(str(form_basics.dollar_amount.data))
        campaign.expiration_date = form_basics.deadline.data
        campaign.fulfillment_service = form_basics.fulfillment_service.data
        campaign.evergreen_campaign_page = form_basics.evergreen_campaign_page.data
        campaign.fulfillment_service = form_basics.fulfillment_service.data
        campaign.campaign_management = form_basics.campaign_management.data

        campaign.description = form_campaign_info.full_description.data
        campaign.short_description = form_campaign_info.description.data
        campaign.title = form_campaign_info.campaign_title.data
        campaign.thumbnail_url = form_campaign_info.thumbnail_url.data

        campaign.confirmation_message = form_confirmation_message.confirmation_message.data

        campaign.vanity_url = urlify(campaign.title)

        for i_reward in campaign.rewards:
            i_reward.is_active = False

        for idx, reward_forms in enumerate(draft_data["form_rewards"]):
            form_reward = CampaignRewardForm.from_json(reward_forms)
            current_reward = None
            for i_reward in campaign.rewards:
                if str(i_reward.id).lower() == str(
                        form_reward.id.data).lower():
                    current_reward = i_reward
                    i_reward.is_active = True
                    break

            if current_reward != None:
                pass
            else:
                current_reward = Reward()
                current_reward.claimed = 0
                current_reward.id = form_reward.id.data
                campaign.rewards.append(current_reward)

            current_reward.ordinal = idx
            current_reward.is_featured = form_reward.is_featured.data
            current_reward.title = form_reward.name.data
            current_reward.inventory = form_reward.quantity.data if form_reward.quantity.data else 0
            current_reward.cost = form_reward.dollar_amount.data
            current_reward.thumbnail_url = form_reward.thumbnail_url.data
            current_reward.description = form_reward.description.data
            current_reward.delivery_date = form_reward.delivery_date.data
            current_reward.is_shipping_required = form_reward.shipping_required.data
            current_reward.is_limited_quantity = form_reward.limited_quantity.data
            current_reward.international_shipping_fee = form_reward.international_shipping_fee.data if form_reward.international_shipping_fee.data else 0
            current_reward.shipping_fee = form_reward.shipping_fee.data if form_reward.shipping_fee.data else 0

            if campaign.bonus_reward == None:
                campaign.bonus_reward = Reward()
                campaign.bonus_reward.campaign = campaign

            campaign.bonus_reward.claimed = 0
            campaign.bonus_reward.referrals_needed = form_bonus_reward.referrals_needed.data
            campaign.bonus_reward.title = form_bonus_reward.name.data
            campaign.bonus_reward.inventory = form_bonus_reward.quantity.data if form_bonus_reward.quantity.data else 0
            campaign.bonus_reward.cost = 0
            campaign.bonus_reward.thumbnail_url = form_bonus_reward.thumbnail_url.data
            campaign.bonus_reward.description = form_bonus_reward.description.data
            campaign.bonus_reward.delivery_date = form_bonus_reward.delivery_date.data
            campaign.bonus_reward.is_shipping_required = form_bonus_reward.shipping_required.data
            campaign.bonus_reward.is_limited_quantity = form_bonus_reward.limited_quantity.data
            campaign.bonus_reward.international_shipping_fee = form_bonus_reward.international_shipping_fee.data if form_bonus_reward.international_shipping_fee.data else 0
            campaign.bonus_reward.shipping_fee = form_bonus_reward.shipping_fee.data if form_bonus_reward.shipping_fee.data else 0

        return campaign
Esempio n. 15
0
def list_rewards():
	rewards = Reward.objects()
	return get_items(request, rewards)
torch.manual_seed(args.seed)

policy_net = Policy(num_inputs,
                    0,
                    num_c,
                    num_actions,
                    hidden_size=64,
                    output_activation='sigmoid').type(dtype)
old_policy_net = Policy(num_inputs,
                        0,
                        num_c,
                        num_actions,
                        hidden_size=64,
                        output_activation='sigmoid').type(dtype)
#value_net = Value(num_inputs+num_c, hidden_size=64).type(dtype)
reward_net = Reward(num_inputs, num_actions, num_c, hidden_size=64).type(dtype)
posterior_net = Posterior(num_inputs, num_actions, num_c,
                          hidden_size=64).type(dtype)

opt_policy = optim.Adam(policy_net.parameters(), lr=0.0003)
#opt_value = optim.Adam(value_net.parameters(), lr=0.0003)
opt_reward = optim.Adam(reward_net.parameters(), lr=0.0003)
opt_posterior = optim.Adam(posterior_net.parameters(), lr=0.0003)


def epsilon_greedy_linear_decay(action_vector,
                                n_episodes,
                                n,
                                low=0.1,
                                high=0.9):
    if n <= n_episodes:
Esempio n. 17
0
def edit_reward_form(id):
   item = Reward.objects(itemid=id).first()
   form = RewardForm(request.form, obj=item)
   return render_template('add_reward.html',form=form, edit=True)	
Esempio n. 18
0
def register():
    # if post data exists
    if request.method == 'POST':
        form = RegistrationForm(request.form)
        if form.validate():
            usernm = form.username.data
            pw = form.password.data
            verify = form.verify.data
            email = form.email.data
            userquery = User.query.filter_by(username=usernm.lower()).first()
            emailquery = User.query.filter_by(email=email.lower()).first()

            # if query returns a user or email address from database, check if it matches post data
            if userquery != None:
                # convert name data from POST to lowercase and check vs database (name also in lowercase)
                if usernm.lower() == userquery.username:
                    #flash("Username is already in use. Please choose another.")
                    form.username.errors.append(
                        "Username is already in use. Please choose another.")
                    return render_template('front.html',
                                           regform=form,
                                           loginform=LoginForm(),
                                           containerclass="frontcontainer")
            if emailquery != None:
                if email.lower() == emailquery.email:
                    form.email.errors.append(
                        "Email address is already in use. Please use another.")
                    return render_template('front.html',
                                           regform=form,
                                           loginform=LoginForm(),
                                           containerclass="frontcontainer")

            # if query does not return a name, check if form passwords match
            elif pw != verify:
                #flash("Password doesn't match verification. Please try again.")
                form.password.errors.append(
                    "Password doesn't match verification. Please try again.")
                return render_template('front.html',
                                       regform=form,
                                       loginform=LoginForm(),
                                       containerclass="frontcontainer")

            # if passwords match, hash the password and store the user in database, username in lowercase
            elif pw == verify:
                pwhash = generate_password_hash(pw)
                newuser = User(usernm.lower(), email.lower(), pwhash)
                db.session.add(newuser)

                newtask = Task(
                    "Welcome to List and Reward. Have a free point!", newuser,
                    None, 1)
                db.session.add(newtask)

                newreward = Reward("Pat yourself on the back. You deserve it.",
                                   newuser, None, 1)
                db.session.add(newreward)

                db.session.commit()

                msg = Message("L+R Registration Successful",
                              sender=("List and Reward",
                                      "*****@*****.**"),
                              recipients=[newuser.email])
                msg.body = "Thanks for registering!"
                mail.send(msg)

                session['userid'] = newuser.id
                return redirect(url_for('tasks'))

        else:
            return render_template('front.html',
                                   regform=form,
                                   loginform=LoginForm(),
                                   containerclass="frontcontainer")

    # form view when no POST data
    else:
        return render_template('front.html',
                               loginform=LoginForm(),
                               regform=RegistrationForm(),
                               containerclass="frontcontainer")
Esempio n. 19
0
    def __init__(self,
                 args,
                 logger,
                 state_size=2,
                 action_size=4,
                 context_size=1,
                 num_goals=4,
                 history_size=1,
                 dtype=torch.FloatTensor):
        super(InfoGAIL, self).__init__(args,
                                       logger,
                                       state_size=state_size,
                                       action_size=action_size,
                                       context_size=context_size,
                                       num_goals=num_goals,
                                       history_size=history_size,
                                       dtype=dtype)

        # Create networks
        self.policy_net = Policy(state_size=state_size * history_size,
                                 action_size=0,
                                 latent_size=context_size,
                                 output_size=action_size,
                                 hidden_size=64,
                                 output_activation='sigmoid')
        self.old_policy_net = Policy(state_size=state_size * history_size,
                                     action_size=0,
                                     latent_size=context_size,
                                     output_size=action_size,
                                     hidden_size=64,
                                     output_activation='sigmoid')

        # Use value network for calculating GAE. We should use this for
        # training the policy network.
        if args.use_value_net:
            # context_size contains num_goals
            self.value_net = Value(state_size * history_size + context_size,
                                   hidden_size=64)

        # Reward net is the discriminator network. Discriminator does not
        # receive the latent vector in InfoGAIL.
        self.reward_net = Reward(
            state_size * history_size,
            action_size,  # action size
            0,  # latent size
            hidden_size=64)

        self.posterior_net = DiscretePosterior(
            state_size=state_size * history_size,  # state
            action_size=0,  # action
            latent_size=0,  # context
            hidden_size=64,
            output_size=num_goals)

        self.opt_policy = optim.Adam(self.policy_net.parameters(), lr=0.0003)
        self.opt_reward = optim.Adam(self.reward_net.parameters(), lr=0.0003)
        self.opt_value = optim.Adam(self.value_net.parameters(), lr=0.0003)
        self.opt_posterior = optim.Adam(self.posterior_net.parameters(),
                                        lr=0.0003)

        # Create loss functions
        self.criterion = nn.BCELoss()
        self.criterion_posterior = nn.CrossEntropyLoss()

        self.create_environment()
Esempio n. 20
0
env = gym.make(args.env_name)

num_inputs = env.observation_space.shape[0]
num_actions = env.action_space.shape[0]

env.seed(args.seed)
torch.manual_seed(args.seed)

if args.use_joint_pol_val:
    ac_net = ActorCritic(num_inputs, num_actions)
    opt_ac = optim.Adam(ac_net.parameters(), lr=0.0003)
else:
    policy_net = Policy(num_inputs, num_actions)
    old_policy_net = Policy(num_inputs, num_actions)
    value_net = Value(num_inputs)
    reward_net = Reward(num_inputs, num_actions)
    opt_policy = optim.Adam(policy_net.parameters(), lr=0.0003)
    opt_value = optim.Adam(value_net.parameters(), lr=0.0003)
    opt_reward = optim.Adam(reward_net.parameters(), lr=0.0003)


def select_action(state):
    state = torch.from_numpy(state).unsqueeze(0)
    action_mean, _, action_std = policy_net(Variable(state))
    action = torch.normal(action_mean, action_std)
    return action


def select_action_actor_critic(state):
    state = torch.from_numpy(state).unsqueeze(0)
    action_mean, _, action_std, v = ac_net(Variable(state))
Esempio n. 21
0
def manage_reward(request, key):
	form = None
	riddle_form = None
	condition_form = None
	
	try:
		#loading existing reward
		reward = Reward.objects.get(key=key)
	except Reward.DoesNotExist:
		#creating new reward, initialize variables
		reward = Reward()
		reward.key = key
	
	riddle = reward.riddle_set.all()
	if riddle.count() == 0:
		riddle = Riddle()
	else:
		riddle = riddle[0]
	
	condition = reward.condition_set.all()
	if condition.count() == 0:
		condition = Condition()
	else:
		condition = condition[0]

	if request.method == 'POST':
		form = RewardManageForm(instance=reward, data=request.POST)
		if form.is_valid():
			reward = form.save()
			blob_infos = get_blobinfo_from_post(request)
			for blob_info in blob_infos:
				#print "blobkey: " + str(blob_info.key())
				image = Image()
				image.blob_key = str(blob_info.key())
				image.reward = reward
				image.save()
		riddle.reward = reward
		riddle_form = RiddleManageForm(instance=riddle, data=request.POST) 
		if riddle_form.is_valid():
			riddle_form.save()
		condition.reward = reward
		condition_form = ConditionManageForm(instance=condition, data=request.POST) 
		if condition_form.is_valid():
			condition_form.save()
		
	images = list(reward.image_set.all())
	images.append(Image())
	
	if form is None:
		form = RewardManageForm(instance=reward)
	
	image_forms = []
	for image in images:
		image_forms.append(ImageManageForm(instance=image))
	
	if riddle_form is None:
		riddle_form = RiddleManageForm(instance=riddle)
	
	if condition_form is None:
		condition_form = ConditionManageForm(instance=condition)

	url = reverse('manage_reward', kwargs={'key': key})
	upload_url = blobstore.create_upload_url(url)

	return render_to_response('reward/manage.html', {'upload_url': upload_url, 'form': form, 'reward': reward, 'image_forms':image_forms, 'riddle_form':riddle_form, 'condition_form':condition_form }, context_instance=RequestContext(request))
Esempio n. 22
0
        id=uuid.uuid4(),
        text=dummy_text,
        user_id=ted.id,
    )
    c.updates.append(ci)

for i in range(6):
    inventory_count = randint(1, 500)
    r = Reward(
        id=uuid.uuid4(),
        title='Something Awesome%s' % i,
        description=
        "This is the description and lets see it a little longer then normal.  This is the description and lets see it a little longer then normal.  This is the description and lets see it a little longer then normal.  ",
        thumbnail_url=
        "http://res.cloudinary.com/hzdmrhkl4/image/upload/v1398927057/hs9gf9a9lbvjenzpsomo.jpg",
        is_active=True,
        is_available=True,
        cost=randint(50, 500),
        delivery_date="2014-05-22 00:00:00",
        inventory=inventory_count,
        is_limited_quantity=True,
        is_shipping_required=True,
        claimed=randint(1, inventory_count))
    c.rewards.append(r)

db.session.add(c)
db.session.commit()

for i in range(30):
    b = User(first_name=str('Joe%s' % i),
             last_name=str('Backer%s' % i),