def RewardSave(request, vendor_id): vendor = get_object_or_404(Vendor, pk = vendor_id) new_reward = Reward( owner = request.user, vendor = vendor, title = request.POST['reward_title'], description = request.POST['reward_description'], image_url = request.POST['reward_image_url'], website_url = request.POST['reward_website_url'], cost = float(request.POST['reward_cost']) ) new_reward.save() return redirect('rewards-home')
def reward(id): if request.method == 'GET': rewards = Reward.objects(itemid=id) return get_items(request, rewards) if request.method == 'POST': item = Reward.objects(itemid=id).first() print 'Fetched Item' if request_has_json(): item.description = request.json['description'] item.save() return 'Updated' else: form = RewardForm(request.form) form.populate_obj(item) print 'Populated Item from Form' print item.description item.save() return render_template('list_items.html',items=[item])
def cache_Reward(self, rid): #returns {user:rewards} r = Reward.by_rid(rid) key = 'reward_%s' %(rid) #activeRewards_cache[uid] = r #cacheEntry = r #activeRewards_cache memcache.add(key, r)#cacheEntry) result = r #returns the reward obj. dict return result
def user_rewards(user_id): # Get the user account relevant to the user_id parameter user = User.query.get(user_id) # Verify the logged in user is actually the user or an attached user. verify_user(user) if request.method == 'GET': # Return the user's rewards in JSON format. return jsonify(rewards=[r.serialize() for r in user.rewards]) elif request.method == 'POST': required_json = ['name', 'cost'] json = request.json if not valid_json(json, required_json): abort(400) reward = Reward(name=json.get('name'), cost=json.get('cost'), user_id=user_id) db.session.add(reward) db.session.commit() notify_if_partner("A new reward is available in the store!") return jsonify(reward.serialize()), 201
def post(self, uid, rid): #link reward to user # get rid data # create linkedReward object # write object to CACHE # return 200, ok to client #get reward info from DB...Change to cache hit #use rid to pull up reward info reward = Reward.by_rid(rid) logging.error("got reward from db") for r in reward: mid = r.mid type = r.type summary = r.summary details = r.details fineprint = r.fineprint discount = r.discount credit = r.credit l_amount = r.l_amount l_visits = r.l_visits l_maxNumber = r.l_maxNumber exp = r.exp #insert active reward into datastore r = ActiveReward.register(uid, rid, mid, type, summary, details, fineprint, discount, credit, l_amount, l_visits, l_maxNumber, exp) #update cache if r: message = { 'status': 200, 'message': 'linked reward and updated cache', } resp = jsonify(message) resp.status_code = 200 #check for object in cache key = "activeRewards_%s" %(uid) activeRewards_cache = memcache.get(key) if activeRewards_cache: #if found, delete object to force a db read next get request memcache.delete(key) return resp else: return resp else: #if no r linking failed pass
def rewards(): if request.method == 'POST': form = RewardForm(request.form) if request_has_json(): try: json_data = json.dumps(request.json,default=json_util.default) model = Reward.from_json(json_data) model.save() except ValidationError as e: return jsonify(item=str(e)) else: if form.validate(): form.save() else: return render_template('add_reward.html',form=form) return list_rewards() else: return list_rewards()
def post(self, mid): # creates a reward # @param mid: merchant id # create rid data = request.json rid = new_id() mid = mid type = data['type'] summary = data['summary'] details = data['details'] fineprint = data['fineprint'] discount = data['discount'] credit = data['credit'] l_amount = data['l_amount'] l_visits = data['l_visits'] l_maxNumber = data['l_maxNumber'] exp = data['exp'] r = Reward.register(rid, mid, type, summary, details, fineprint, discount, credit, l_amount, l_visits, l_maxNumber, exp) if r: message = { 'status': 200, 'message': "reward created", } resp = jsonify(message) resp.status_code = 200 return resp else: message = { 'status': 404, 'message': 'bad data', } resp = jsonify(message) resp.status_code = 404 return resp
def stages(jobid): if request.method == 'GET': job = Job.objects(jobid=int(jobid)).first() stages = Stage.objects(job=job) itemLst = [dict(stage.to_mongo()) for stage in stages] return mongodoc_jsonify(items=itemLst) if request.method == 'POST': if request_has_json(): try: job = Job.objects(jobid=int(jobid)).first() reward = Reward.objects(itemid=request.json['reward']).first() model = Stage() model.job = job model.reward = reward model.rewardDesc = reward.itemid model.jobDesc = str(job.jobid) model.stage = request.json['stage'] model.save() model.reload() return mongodoc_jsonify(item=model.to_mongo()) except ValidationError as e: return jsonify(item=str(e))
def rewards(): if 'userid' in session: user = User.query.get_or_404(session['userid']) if request.method == 'POST': form = RewardForm(request.form) if form.validate(): print form.cost.data newreward = Reward(form.title.data, user, None, form.cost.data) db.session.add(newreward) db.session.commit() return redirect(url_for('rewards')) else: rewards = user.rewards.filter( Reward.redeemed == False).order_by(desc( Reward.dateadded)).all() return render_template('rewards.html', username=user.username, points=user.points, rewards=rewards, form=form, idform=RewardIdForm(), updateform=RewardUpdateForm(), containerclass="rewardcontainer") else: rewards = user.rewards.filter(Reward.redeemed == False).order_by( desc(Reward.dateadded)).all() return render_template('rewards.html', username=user.username, points=user.points, rewards=rewards, form=RewardForm(), idform=RewardIdForm(), updateform=RewardUpdateForm(), containerclass="rewardcontainer") # if no session, display default home page. TODO: convert to splash page else: return redirect(url_for('front'))
def create_new_reward(current_user): create_table = Reward(author = current_user, coins = 0) db.session.add(create_table) db.session.commit()
class InfoGAIL(BaseGAIL): def __init__(self, args, logger, state_size=2, action_size=4, context_size=1, num_goals=4, history_size=1, dtype=torch.FloatTensor): super(InfoGAIL, self).__init__(args, logger, state_size=state_size, action_size=action_size, context_size=context_size, num_goals=num_goals, history_size=history_size, dtype=dtype) # Create networks self.policy_net = Policy(state_size=state_size * history_size, action_size=0, latent_size=context_size, output_size=action_size, hidden_size=64, output_activation='sigmoid') self.old_policy_net = Policy(state_size=state_size * history_size, action_size=0, latent_size=context_size, output_size=action_size, hidden_size=64, output_activation='sigmoid') # Use value network for calculating GAE. We should use this for # training the policy network. if args.use_value_net: # context_size contains num_goals self.value_net = Value(state_size * history_size + context_size, hidden_size=64) # Reward net is the discriminator network. Discriminator does not # receive the latent vector in InfoGAIL. self.reward_net = Reward( state_size * history_size, action_size, # action size 0, # latent size hidden_size=64) self.posterior_net = DiscretePosterior( state_size=state_size * history_size, # state action_size=0, # action latent_size=0, # context hidden_size=64, output_size=num_goals) self.opt_policy = optim.Adam(self.policy_net.parameters(), lr=0.0003) self.opt_reward = optim.Adam(self.reward_net.parameters(), lr=0.0003) self.opt_value = optim.Adam(self.value_net.parameters(), lr=0.0003) self.opt_posterior = optim.Adam(self.posterior_net.parameters(), lr=0.0003) # Create loss functions self.criterion = nn.BCELoss() self.criterion_posterior = nn.CrossEntropyLoss() self.create_environment() def checkpoint_data_to_save(self): return { 'policy': self.policy_net, 'value': self.value_net, 'reward': self.reward_net, 'posterior': self.posterior_net, } def load_checkpoint_data(self, checkpoint_path): assert os.path.exists(checkpoint_path), \ 'Checkpoint path does not exists {}'.format(checkpoint_path) checkpoint_data = torch.load(checkpoint_path) self.policy_net = checkpoint_data['policy'] self.value_net = checkpoint_data['value'] self.reward_net = checkpoint_data['reward'] self.posterior_net = checkpoint_data['posterior'] def update_params_for_batch(self, states, actions, latent_c, targets, advantages, expert_states, expert_actions, optim_batch_size, optim_batch_size_exp, optim_iters): '''Update parameters for one batch of data. Update the policy network, discriminator (reward) network and the posterior network here. ''' args, dtype = self.args, self.dtype curr_id, curr_id_exp = 0, 0 for _ in range(optim_iters): curr_batch_size = min(optim_batch_size, actions.size(0) - curr_id) curr_batch_size_exp = min(optim_batch_size_exp, expert_actions.size(0) - curr_id_exp) start_idx, end_idx = curr_id, curr_id + curr_batch_size state_var = Variable(states[start_idx:end_idx]) action_var = Variable(actions[start_idx:end_idx]) latent_c_var = Variable(latent_c[start_idx:end_idx]) advantages_var = Variable(advantages[start_idx:end_idx]) start_idx, end_idx = curr_id_exp, curr_id_exp + curr_batch_size_exp expert_state_var = Variable(expert_states[start_idx:end_idx]) expert_action_var = Variable(expert_actions[start_idx:end_idx]) # Update reward net self.opt_reward.zero_grad() # Backprop with expert demonstrations expert_output = self.reward_net( torch.cat((expert_state_var, expert_action_var), 1)) expert_disc_loss = self.criterion( expert_output, Variable( torch.zeros(expert_action_var.size(0), 1).type(dtype))) expert_disc_loss.backward() # Backprop with generated demonstrations gen_output = self.reward_net(torch.cat((state_var, action_var), 1)) gen_disc_loss = self.criterion( gen_output, Variable(torch.ones(action_var.size(0), 1)).type(dtype)) gen_disc_loss.backward() # Add loss scalars. self.logger.summary_writer.add_scalars( 'loss/discriminator', { 'total': expert_disc_loss.data[0] + gen_disc_loss.data[0], 'expert': expert_disc_loss.data[0], 'gen': gen_disc_loss.data[0], }, self.gail_step_count) self.opt_reward.step() reward_l2_norm, reward_grad_l2_norm = \ get_weight_norm_for_network(self.reward_net) self.logger.summary_writer.add_scalar('weight/discriminator/param', reward_l2_norm, self.gail_step_count) self.logger.summary_writer.add_scalar('weight/discriminator/grad', reward_grad_l2_norm, self.gail_step_count) # Update posterior net. We need to do this by reparameterization # trick. predicted_posterior = self.posterior_net(state_var) # There is no GOAL info in latent_c_var here. # TODO: This 0 and -1 stuff is not needed here. Confirm? _, true_posterior = torch.max(latent_c_var.data, dim=1) posterior_loss = self.criterion_posterior(predicted_posterior, Variable(true_posterior)) posterior_loss.backward() self.logger.summary_writer.add_scalar('loss/posterior', posterior_loss.data[0], self.gail_step_count) # compute old and new action probabilities action_means, action_log_stds, action_stds = self.policy_net( torch.cat((state_var, latent_c_var), 1)) log_prob_cur = normal_log_density(action_var, action_means, action_log_stds, action_stds) action_means_old, action_log_stds_old, action_stds_old = \ self.old_policy_net(torch.cat( (state_var, latent_c_var), 1)) log_prob_old = normal_log_density(action_var, action_means_old, action_log_stds_old, action_stds_old) if args.use_value_net: # update value net self.opt_value.zero_grad() value_var = self.value_net( torch.cat((state_var, latent_c_var), 1)) value_loss = (value_var - \ targets[curr_id:curr_id+curr_batch_size]).pow(2.).mean() value_loss.backward() self.opt_value.step() # Update policy net (PPO step) self.opt_policy.zero_grad() ratio = torch.exp(log_prob_cur - log_prob_old) # pnew / pold surr1 = ratio * advantages_var[:, 0] surr2 = torch.clamp(ratio, 1.0 - self.args.clip_epsilon, 1.0 + self.args.clip_epsilon) * advantages_var[:, 0] policy_surr = -torch.min(surr1, surr2).mean() policy_surr.backward() # torch.nn.utils.clip_grad_norm(self.policy_net.parameters(), 40) self.opt_policy.step() self.logger.summary_writer.add_scalar('loss/policy', policy_surr.data[0], self.gail_step_count) policy_l2_norm, policy_grad_l2_norm = \ get_weight_norm_for_network(self.policy_net) self.logger.summary_writer.add_scalar('weight/policy/param', policy_l2_norm, self.gail_step_count) self.logger.summary_writer.add_scalar('weight/policy/grad', policy_grad_l2_norm, self.gail_step_count) # set new starting point for batch curr_id += curr_batch_size curr_id_exp += curr_batch_size_exp self.gail_step_count += 1 def update_params(self, gen_batch, expert_batch, episode_idx, optim_epochs, optim_batch_size): '''Update params for Policy (G), Reward (D) and Posterior (q) networks. ''' args, dtype = self.args, self.dtype self.opt_policy.lr = self.args.learning_rate \ * max(1.0 - float(episode_idx)/args.num_epochs, 0) clip_epsilon = self.args.clip_epsilon \ * max(1.0 - float(episode_idx)/args.num_epochs, 0) # generated trajectories states = torch.Tensor(np.array(gen_batch.state)).type(dtype) actions = torch.Tensor(np.array(gen_batch.action)).type(dtype) rewards = torch.Tensor(np.array(gen_batch.reward)).type(dtype) masks = torch.Tensor(np.array(gen_batch.mask)).type(dtype) ## Expand states to include history ## # Generated trajectories already have history in them. latent_c = torch.Tensor(np.array(gen_batch.c)).type(dtype) values = None if args.use_value_net: values = self.value_net(Variable(torch.cat((states, latent_c), 1))) # expert trajectories list_of_expert_states, list_of_expert_actions = [], [] list_of_masks = [] for i in range(len(expert_batch.state)): ## Expand expert states ## expanded_states = self.expand_states_numpy(expert_batch.state[i], self.history_size) list_of_expert_states.append(torch.Tensor(expanded_states)) list_of_expert_actions.append(torch.Tensor(expert_batch.action[i])) list_of_masks.append(torch.Tensor(expert_batch.mask[i])) expert_states = torch.cat(list_of_expert_states, 0).type(dtype) expert_actions = torch.cat(list_of_expert_actions, 0).type(dtype) expert_masks = torch.cat(list_of_masks, 0).type(dtype) assert expert_states.size(0) == expert_actions.size(0), \ "Expert transition size do not match" assert expert_states.size(0) == expert_masks.size(0), \ "Expert transition size do not match" # compute advantages returns, advantages = get_advantage_for_rewards(rewards, masks, self.args.gamma, values, dtype=dtype) targets = Variable(returns) advantages = (advantages - advantages.mean()) / advantages.std() # Backup params after computing probs but before updating new params for old_policy_param, policy_param in zip( self.old_policy_net.parameters(), self.policy_net.parameters()): old_policy_param.data.copy_(policy_param.data) # update value, reward and policy networks optim_iters = self.args.batch_size // optim_batch_size optim_batch_size_exp = expert_actions.size(0) // optim_iters # Remove extra 1 array shape from actions, since actions were added as # 1-hot vector of shape (1, A). actions = np.squeeze(actions) expert_actions = np.squeeze(expert_actions) for _ in range(optim_epochs): perm = np.random.permutation(np.arange(actions.size(0))) perm_exp = np.random.permutation(np.arange(expert_actions.size(0))) if args.cuda: perm = torch.cuda.LongTensor(perm) perm_exp = torch.cuda.LongTensor(perm_exp) else: perm, perm_exp = torch.LongTensor(perm), torch.LongTensor( perm_exp) self.update_params_for_batch( states[perm], actions[perm], latent_c[perm], targets[perm], advantages[perm], expert_states[perm_exp], expert_actions[perm_exp], optim_batch_size, optim_batch_size_exp, optim_iters) def train_gail(self, expert): '''Train Info-GAIL.''' args, dtype = self.args, self.dtype results = { 'average_reward': [], 'episode_reward': [], 'true_traj': {}, 'pred_traj': {} } self.train_step_count, self.gail_step_count = 0, 0 for ep_idx in range(args.num_epochs): memory = Memory() num_steps = 0 reward_batch, true_reward_batch = [], [] expert_true_reward_batch = [] true_traj_curr_episode, gen_traj_curr_episode = [], [] while num_steps < args.batch_size: traj_expert = expert.sample(size=1) state_expert, action_expert, _, _ = traj_expert # Expert state and actions state_expert = state_expert[0] action_expert = action_expert[0] expert_episode_len = len(state_expert) # Sample start state or should we just choose the start state # from the expert trajectory sampled above. # curr_state_obj = self.sample_start_state() curr_state_obj = State(state_expert[0], self.obstacles) curr_state_feat = self.get_state_features( curr_state_obj, self.args.use_state_features) # Add history to state if args.history_size > 1: curr_state = -1 * np.ones( (args.history_size * curr_state_feat.shape[0]), dtype=np.float32) curr_state[(args.history_size-1) \ * curr_state_feat.shape[0]:] = curr_state_feat else: curr_state = curr_state_feat # TODO: Make this a separate function. Can be parallelized. ep_reward, ep_true_reward, expert_true_reward = 0, 0, 0 true_traj, gen_traj = [], [] gen_traj_dict = { 'features': [], 'actions': [], 'c': [], 'mask': [] } disc_reward, posterior_reward = 0.0, 0.0 # Use a hard-coded list for memory to gather experience since we # need to mutate it before finally creating a memory object. c_sampled = np.zeros((self.num_goals), dtype=np.float32) c_sampled[np.random.randint(0, self.num_goals)] = 1.0 c_sampled_tensor = torch.zeros((1)).type(torch.LongTensor) c_sampled_tensor[0] = int(np.argmax(c_sampled)) if self.args.cuda: c_sampled_tensor = torch.cuda.LongTensor(c_sampled_tensor) memory_list = [] for t in range(expert_episode_len): action = self.select_action( np.concatenate((curr_state, c_sampled))) action_numpy = action.data.cpu().numpy() # Save generated and true trajectories true_traj.append((state_expert[t], action_expert[t])) gen_traj.append((curr_state_obj.coordinates, action_numpy)) gen_traj_dict['features'].append( self.get_state_features(curr_state_obj, self.args.use_state_features)) gen_traj_dict['actions'].append(action_numpy) gen_traj_dict['c'].append(c_sampled) action = epsilon_greedy_linear_decay(action_numpy, args.num_epochs * 0.5, ep_idx, self.action_size, low=0.05, high=0.3) # Get the discriminator reward disc_reward_t = float( self.reward_net( torch.cat((Variable( torch.from_numpy(curr_state).unsqueeze( 0)).type(dtype), Variable( torch.from_numpy( oned_to_onehot( action, self.action_size)). unsqueeze(0)).type(dtype)), 1)).data.cpu().numpy()[0, 0]) if args.use_log_rewards and disc_reward_t < 1e-6: disc_reward_t += 1e-6 disc_reward_t = -math.log(disc_reward_t) \ if args.use_log_rewards else -disc_reward_t disc_reward += disc_reward_t # Predict c given (x_t) predicted_posterior = self.posterior_net( Variable(torch.from_numpy(curr_state).unsqueeze( 0)).type(dtype)) posterior_reward_t = self.criterion_posterior( predicted_posterior, Variable(c_sampled_tensor)).data.cpu().numpy()[0] posterior_reward += (self.args.lambda_posterior * posterior_reward_t) # Update Rewards ep_reward += (disc_reward_t + posterior_reward_t) true_goal_state = [ int(x) for x in state_expert[-1].tolist() ] if self.args.flag_true_reward == 'grid_reward': ep_true_reward += self.true_reward.reward_at_location( curr_state_obj.coordinates, goals=[true_goal_state]) expert_true_reward += self.true_reward.reward_at_location( state_expert[t], goals=[true_goal_state]) elif self.args.flag_true_reward == 'action_reward': ep_true_reward += self.true_reward.reward_at_location( np.argmax(action_expert[t]), action) expert_true_reward += self.true_reward.corret_action_reward else: raise ValueError("Incorrect true reward type") # Update next state next_state_obj = self.transition_func( curr_state_obj, Action(action), 0) next_state_feat = self.get_state_features( next_state_obj, self.args.use_state_features) #next_state = running_state(next_state) mask = 0 if t == expert_episode_len - 1 else 1 # Push to memory memory_list.append([ curr_state, np.array([oned_to_onehot(action, self.action_size)]), mask, next_state_feat, disc_reward_t + posterior_reward_t, c_sampled, c_sampled ]) if args.render: env.render() if not mask: break curr_state_obj = next_state_obj curr_state_feat = next_state_feat if args.history_size > 1: curr_state[:(args.history_size-1) \ * curr_state_feat.shape[0]] = \ curr_state[curr_state_feat.shape[0]:] curr_state[(args.history_size-1) \ * curr_state_feat.shape[0]:] = curr_state_feat else: curr_state = curr_state_feat assert memory_list[-1][2] == 0, \ "Mask for final end state is not 0." for memory_t in memory_list: memory.push(*memory_t) self.logger.summary_writer.add_scalars( 'gen_traj/gen_reward', { 'discriminator': disc_reward, 'posterior': posterior_reward, }, self.train_step_count) num_steps += (t - 1) reward_batch.append(ep_reward) true_reward_batch.append(ep_true_reward) expert_true_reward_batch.append(expert_true_reward) results['episode_reward'].append(ep_reward) # Append trajectories true_traj_curr_episode.append(true_traj) gen_traj_curr_episode.append(gen_traj) results['average_reward'].append(np.mean(reward_batch)) # Add to tensorboard self.logger.summary_writer.add_scalars( 'gen_traj/reward', { 'average': np.mean(reward_batch), 'max': np.max(reward_batch), 'min': np.min(reward_batch) }, self.train_step_count) self.logger.summary_writer.add_scalars( 'gen_traj/true_reward', { 'average': np.mean(true_reward_batch), 'max': np.max(true_reward_batch), 'min': np.min(true_reward_batch), 'expert_true': np.mean(expert_true_reward_batch) }, self.train_step_count) # Add predicted and generated trajectories to results if ep_idx % self.args.save_interval == 0: results['true_traj'][ep_idx] = copy.deepcopy( true_traj_curr_episode) results['pred_traj'][ep_idx] = copy.deepcopy( gen_traj_curr_episode) # Update parameters gen_batch = memory.sample() # We do not get the context variable from expert trajectories. # Hence we need to fill it in later. expert_batch = expert.sample(size=args.num_expert_trajs) self.update_params(gen_batch, expert_batch, ep_idx, args.optim_epochs, args.optim_batch_size) self.train_step_count += 1 if ep_idx > 0 and ep_idx % args.log_interval == 0: print('Episode [{}/{}] Avg R: {:.2f} Max R: {:.2f} \t' \ 'True Avg {:.2f} True Max R: {:.2f} ' \ 'Expert (Avg): {:.2f}'.format( ep_idx, args.num_epochs, np.mean(reward_batch), np.max(reward_batch), np.mean(true_reward_batch), np.max(true_reward_batch), np.mean(expert_true_reward_batch))) results_path = os.path.join(args.results_dir, 'results.pkl') with open(results_path, 'wb') as results_f: pickle.dump((results), results_f, protocol=2) # print("Did save results to {}".format(results_path)) if ep_idx % args.save_interval == 0: checkpoint_filepath = self.model_checkpoint_filepath(ep_idx) torch.save(self.checkpoint_data_to_save(), checkpoint_filepath) print("Did save checkpoint: {}".format(checkpoint_filepath))
def save_draft_to_campaign(self,campaign,draft_data): form_campaign_info = CampaignInfoForm.from_json(draft_data["form_campaign_info"]) form_basics = CampaignBasicForm.from_json(draft_data["form_basics"]) form_paid = CampaignPaidForm.from_json(draft_data["form_paid"]) form_bonus_reward = CampaignBonusRewardForm.from_json(draft_data["form_bonus_reward"]) form_confirmation_message = CampaignConfirmationMessageForm.from_json(draft_data["form_confirmation_message"]) campaign.campaign_type_id = form_basics.campaign_type_id.data campaign.category_id = form_basics.category_id.data campaign.campaign_receiver_id = form_basics.campaign_receiver_id.data campaign.funding_goal = float(str(form_basics.dollar_amount.data)) campaign.expiration_date = form_basics.deadline.data campaign.fulfillment_service = form_basics.fulfillment_service.data campaign.evergreen_campaign_page = form_basics.evergreen_campaign_page.data campaign.fulfillment_service = form_basics.fulfillment_service.data campaign.campaign_management = form_basics.campaign_management.data campaign.description = form_campaign_info.full_description.data campaign.short_description = form_campaign_info.description.data campaign.title = form_campaign_info.campaign_title.data campaign.thumbnail_url = form_campaign_info.thumbnail_url.data campaign.confirmation_message = form_confirmation_message.confirmation_message.data campaign.vanity_url = urlify(campaign.title) for i_reward in campaign.rewards: i_reward.is_active=False for idx, reward_forms in enumerate(draft_data["form_rewards"]): form_reward = CampaignRewardForm.from_json(reward_forms); current_reward = None for i_reward in campaign.rewards: if str(i_reward.id).lower() == str(form_reward.id.data).lower(): current_reward = i_reward i_reward.is_active=True break if current_reward != None: pass else: current_reward = Reward() current_reward.claimed = 0 current_reward.id = form_reward.id.data campaign.rewards.append(current_reward) current_reward.ordinal = idx current_reward.is_featured = form_reward.is_featured.data current_reward.title = form_reward.name.data current_reward.inventory = form_reward.quantity.data if form_reward.quantity.data else 0 current_reward.cost = form_reward.dollar_amount.data current_reward.thumbnail_url = form_reward.thumbnail_url.data current_reward.description = form_reward.description.data current_reward.delivery_date = form_reward.delivery_date.data current_reward.is_shipping_required = form_reward.shipping_required.data current_reward.is_limited_quantity = form_reward.limited_quantity.data current_reward.international_shipping_fee = form_reward.international_shipping_fee.data if form_reward.international_shipping_fee.data else 0 current_reward.shipping_fee = form_reward.shipping_fee.data if form_reward.shipping_fee.data else 0 if campaign.bonus_reward == None: campaign.bonus_reward = Reward() campaign.bonus_reward.campaign = campaign campaign.bonus_reward.claimed = 0 campaign.bonus_reward.referrals_needed = form_bonus_reward.referrals_needed.data campaign.bonus_reward.title = form_bonus_reward.name.data campaign.bonus_reward.inventory = form_bonus_reward.quantity.data if form_bonus_reward.quantity.data else 0 campaign.bonus_reward.cost = 0 campaign.bonus_reward.thumbnail_url = form_bonus_reward.thumbnail_url.data campaign.bonus_reward.description = form_bonus_reward.description.data campaign.bonus_reward.delivery_date = form_bonus_reward.delivery_date.data campaign.bonus_reward.is_shipping_required = form_bonus_reward.shipping_required.data campaign.bonus_reward.is_limited_quantity = form_bonus_reward.limited_quantity.data campaign.bonus_reward.international_shipping_fee = form_bonus_reward.international_shipping_fee.data if form_bonus_reward.international_shipping_fee.data else 0 campaign.bonus_reward.shipping_fee = form_bonus_reward.shipping_fee.data if form_bonus_reward.shipping_fee.data else 0 return campaign
# Initialise training environment and experience replay memory env = GymEnv(cfg) replay = Replay(cfg, env.action_size) # Initialise dataset replay with S random seed episodes for s in range(cfg['seed_episodes']): observation = env.reset() done = False while not done: next_observation, action, reward, done = env.step() replay.append(observation, action, reward, done) observation = next_observation # Init PlaNet transition_model = Transition(cfg) observation_model = Observation(cfg) reward_model = Reward(cfg) encoder = Encoder(cfg) optim = tf.train.AdamOptimizer(cfg['learning_rate'], epsilon=cfg['optim_eps']) planner = MPCPlanner(cfg, env.action_size, transition_model, reward_model) global_prior = tf.distributions.Normal( tf.zeros([cfg['batch_size'], cfg['state_size']]), tf.ones([cfg['batch_size'], cfg['state_size']])) # Global prior N(0, I) free_nats = tf.fill(dims=[ 1, ], value=cfg['free_nats']) # Allowed deviation in KL divergence # Training for episode in trange(cfg['train']['episodes']): # Model fitting losses = []
def save_draft_to_campaign(self, campaign, draft_data): form_campaign_info = CampaignInfoForm.from_json( draft_data["form_campaign_info"]) form_basics = CampaignBasicForm.from_json(draft_data["form_basics"]) form_paid = CampaignPaidForm.from_json(draft_data["form_paid"]) form_bonus_reward = CampaignBonusRewardForm.from_json( draft_data["form_bonus_reward"]) form_confirmation_message = CampaignConfirmationMessageForm.from_json( draft_data["form_confirmation_message"]) campaign.campaign_type_id = form_basics.campaign_type_id.data campaign.category_id = form_basics.category_id.data campaign.campaign_receiver_id = form_basics.campaign_receiver_id.data campaign.funding_goal = float(str(form_basics.dollar_amount.data)) campaign.expiration_date = form_basics.deadline.data campaign.fulfillment_service = form_basics.fulfillment_service.data campaign.evergreen_campaign_page = form_basics.evergreen_campaign_page.data campaign.fulfillment_service = form_basics.fulfillment_service.data campaign.campaign_management = form_basics.campaign_management.data campaign.description = form_campaign_info.full_description.data campaign.short_description = form_campaign_info.description.data campaign.title = form_campaign_info.campaign_title.data campaign.thumbnail_url = form_campaign_info.thumbnail_url.data campaign.confirmation_message = form_confirmation_message.confirmation_message.data campaign.vanity_url = urlify(campaign.title) for i_reward in campaign.rewards: i_reward.is_active = False for idx, reward_forms in enumerate(draft_data["form_rewards"]): form_reward = CampaignRewardForm.from_json(reward_forms) current_reward = None for i_reward in campaign.rewards: if str(i_reward.id).lower() == str( form_reward.id.data).lower(): current_reward = i_reward i_reward.is_active = True break if current_reward != None: pass else: current_reward = Reward() current_reward.claimed = 0 current_reward.id = form_reward.id.data campaign.rewards.append(current_reward) current_reward.ordinal = idx current_reward.is_featured = form_reward.is_featured.data current_reward.title = form_reward.name.data current_reward.inventory = form_reward.quantity.data if form_reward.quantity.data else 0 current_reward.cost = form_reward.dollar_amount.data current_reward.thumbnail_url = form_reward.thumbnail_url.data current_reward.description = form_reward.description.data current_reward.delivery_date = form_reward.delivery_date.data current_reward.is_shipping_required = form_reward.shipping_required.data current_reward.is_limited_quantity = form_reward.limited_quantity.data current_reward.international_shipping_fee = form_reward.international_shipping_fee.data if form_reward.international_shipping_fee.data else 0 current_reward.shipping_fee = form_reward.shipping_fee.data if form_reward.shipping_fee.data else 0 if campaign.bonus_reward == None: campaign.bonus_reward = Reward() campaign.bonus_reward.campaign = campaign campaign.bonus_reward.claimed = 0 campaign.bonus_reward.referrals_needed = form_bonus_reward.referrals_needed.data campaign.bonus_reward.title = form_bonus_reward.name.data campaign.bonus_reward.inventory = form_bonus_reward.quantity.data if form_bonus_reward.quantity.data else 0 campaign.bonus_reward.cost = 0 campaign.bonus_reward.thumbnail_url = form_bonus_reward.thumbnail_url.data campaign.bonus_reward.description = form_bonus_reward.description.data campaign.bonus_reward.delivery_date = form_bonus_reward.delivery_date.data campaign.bonus_reward.is_shipping_required = form_bonus_reward.shipping_required.data campaign.bonus_reward.is_limited_quantity = form_bonus_reward.limited_quantity.data campaign.bonus_reward.international_shipping_fee = form_bonus_reward.international_shipping_fee.data if form_bonus_reward.international_shipping_fee.data else 0 campaign.bonus_reward.shipping_fee = form_bonus_reward.shipping_fee.data if form_bonus_reward.shipping_fee.data else 0 return campaign
def list_rewards(): rewards = Reward.objects() return get_items(request, rewards)
torch.manual_seed(args.seed) policy_net = Policy(num_inputs, 0, num_c, num_actions, hidden_size=64, output_activation='sigmoid').type(dtype) old_policy_net = Policy(num_inputs, 0, num_c, num_actions, hidden_size=64, output_activation='sigmoid').type(dtype) #value_net = Value(num_inputs+num_c, hidden_size=64).type(dtype) reward_net = Reward(num_inputs, num_actions, num_c, hidden_size=64).type(dtype) posterior_net = Posterior(num_inputs, num_actions, num_c, hidden_size=64).type(dtype) opt_policy = optim.Adam(policy_net.parameters(), lr=0.0003) #opt_value = optim.Adam(value_net.parameters(), lr=0.0003) opt_reward = optim.Adam(reward_net.parameters(), lr=0.0003) opt_posterior = optim.Adam(posterior_net.parameters(), lr=0.0003) def epsilon_greedy_linear_decay(action_vector, n_episodes, n, low=0.1, high=0.9): if n <= n_episodes:
def edit_reward_form(id): item = Reward.objects(itemid=id).first() form = RewardForm(request.form, obj=item) return render_template('add_reward.html',form=form, edit=True)
def register(): # if post data exists if request.method == 'POST': form = RegistrationForm(request.form) if form.validate(): usernm = form.username.data pw = form.password.data verify = form.verify.data email = form.email.data userquery = User.query.filter_by(username=usernm.lower()).first() emailquery = User.query.filter_by(email=email.lower()).first() # if query returns a user or email address from database, check if it matches post data if userquery != None: # convert name data from POST to lowercase and check vs database (name also in lowercase) if usernm.lower() == userquery.username: #flash("Username is already in use. Please choose another.") form.username.errors.append( "Username is already in use. Please choose another.") return render_template('front.html', regform=form, loginform=LoginForm(), containerclass="frontcontainer") if emailquery != None: if email.lower() == emailquery.email: form.email.errors.append( "Email address is already in use. Please use another.") return render_template('front.html', regform=form, loginform=LoginForm(), containerclass="frontcontainer") # if query does not return a name, check if form passwords match elif pw != verify: #flash("Password doesn't match verification. Please try again.") form.password.errors.append( "Password doesn't match verification. Please try again.") return render_template('front.html', regform=form, loginform=LoginForm(), containerclass="frontcontainer") # if passwords match, hash the password and store the user in database, username in lowercase elif pw == verify: pwhash = generate_password_hash(pw) newuser = User(usernm.lower(), email.lower(), pwhash) db.session.add(newuser) newtask = Task( "Welcome to List and Reward. Have a free point!", newuser, None, 1) db.session.add(newtask) newreward = Reward("Pat yourself on the back. You deserve it.", newuser, None, 1) db.session.add(newreward) db.session.commit() msg = Message("L+R Registration Successful", sender=("List and Reward", "*****@*****.**"), recipients=[newuser.email]) msg.body = "Thanks for registering!" mail.send(msg) session['userid'] = newuser.id return redirect(url_for('tasks')) else: return render_template('front.html', regform=form, loginform=LoginForm(), containerclass="frontcontainer") # form view when no POST data else: return render_template('front.html', loginform=LoginForm(), regform=RegistrationForm(), containerclass="frontcontainer")
def __init__(self, args, logger, state_size=2, action_size=4, context_size=1, num_goals=4, history_size=1, dtype=torch.FloatTensor): super(InfoGAIL, self).__init__(args, logger, state_size=state_size, action_size=action_size, context_size=context_size, num_goals=num_goals, history_size=history_size, dtype=dtype) # Create networks self.policy_net = Policy(state_size=state_size * history_size, action_size=0, latent_size=context_size, output_size=action_size, hidden_size=64, output_activation='sigmoid') self.old_policy_net = Policy(state_size=state_size * history_size, action_size=0, latent_size=context_size, output_size=action_size, hidden_size=64, output_activation='sigmoid') # Use value network for calculating GAE. We should use this for # training the policy network. if args.use_value_net: # context_size contains num_goals self.value_net = Value(state_size * history_size + context_size, hidden_size=64) # Reward net is the discriminator network. Discriminator does not # receive the latent vector in InfoGAIL. self.reward_net = Reward( state_size * history_size, action_size, # action size 0, # latent size hidden_size=64) self.posterior_net = DiscretePosterior( state_size=state_size * history_size, # state action_size=0, # action latent_size=0, # context hidden_size=64, output_size=num_goals) self.opt_policy = optim.Adam(self.policy_net.parameters(), lr=0.0003) self.opt_reward = optim.Adam(self.reward_net.parameters(), lr=0.0003) self.opt_value = optim.Adam(self.value_net.parameters(), lr=0.0003) self.opt_posterior = optim.Adam(self.posterior_net.parameters(), lr=0.0003) # Create loss functions self.criterion = nn.BCELoss() self.criterion_posterior = nn.CrossEntropyLoss() self.create_environment()
env = gym.make(args.env_name) num_inputs = env.observation_space.shape[0] num_actions = env.action_space.shape[0] env.seed(args.seed) torch.manual_seed(args.seed) if args.use_joint_pol_val: ac_net = ActorCritic(num_inputs, num_actions) opt_ac = optim.Adam(ac_net.parameters(), lr=0.0003) else: policy_net = Policy(num_inputs, num_actions) old_policy_net = Policy(num_inputs, num_actions) value_net = Value(num_inputs) reward_net = Reward(num_inputs, num_actions) opt_policy = optim.Adam(policy_net.parameters(), lr=0.0003) opt_value = optim.Adam(value_net.parameters(), lr=0.0003) opt_reward = optim.Adam(reward_net.parameters(), lr=0.0003) def select_action(state): state = torch.from_numpy(state).unsqueeze(0) action_mean, _, action_std = policy_net(Variable(state)) action = torch.normal(action_mean, action_std) return action def select_action_actor_critic(state): state = torch.from_numpy(state).unsqueeze(0) action_mean, _, action_std, v = ac_net(Variable(state))
def manage_reward(request, key): form = None riddle_form = None condition_form = None try: #loading existing reward reward = Reward.objects.get(key=key) except Reward.DoesNotExist: #creating new reward, initialize variables reward = Reward() reward.key = key riddle = reward.riddle_set.all() if riddle.count() == 0: riddle = Riddle() else: riddle = riddle[0] condition = reward.condition_set.all() if condition.count() == 0: condition = Condition() else: condition = condition[0] if request.method == 'POST': form = RewardManageForm(instance=reward, data=request.POST) if form.is_valid(): reward = form.save() blob_infos = get_blobinfo_from_post(request) for blob_info in blob_infos: #print "blobkey: " + str(blob_info.key()) image = Image() image.blob_key = str(blob_info.key()) image.reward = reward image.save() riddle.reward = reward riddle_form = RiddleManageForm(instance=riddle, data=request.POST) if riddle_form.is_valid(): riddle_form.save() condition.reward = reward condition_form = ConditionManageForm(instance=condition, data=request.POST) if condition_form.is_valid(): condition_form.save() images = list(reward.image_set.all()) images.append(Image()) if form is None: form = RewardManageForm(instance=reward) image_forms = [] for image in images: image_forms.append(ImageManageForm(instance=image)) if riddle_form is None: riddle_form = RiddleManageForm(instance=riddle) if condition_form is None: condition_form = ConditionManageForm(instance=condition) url = reverse('manage_reward', kwargs={'key': key}) upload_url = blobstore.create_upload_url(url) return render_to_response('reward/manage.html', {'upload_url': upload_url, 'form': form, 'reward': reward, 'image_forms':image_forms, 'riddle_form':riddle_form, 'condition_form':condition_form }, context_instance=RequestContext(request))
id=uuid.uuid4(), text=dummy_text, user_id=ted.id, ) c.updates.append(ci) for i in range(6): inventory_count = randint(1, 500) r = Reward( id=uuid.uuid4(), title='Something Awesome%s' % i, description= "This is the description and lets see it a little longer then normal. This is the description and lets see it a little longer then normal. This is the description and lets see it a little longer then normal. ", thumbnail_url= "http://res.cloudinary.com/hzdmrhkl4/image/upload/v1398927057/hs9gf9a9lbvjenzpsomo.jpg", is_active=True, is_available=True, cost=randint(50, 500), delivery_date="2014-05-22 00:00:00", inventory=inventory_count, is_limited_quantity=True, is_shipping_required=True, claimed=randint(1, inventory_count)) c.rewards.append(r) db.session.add(c) db.session.commit() for i in range(30): b = User(first_name=str('Joe%s' % i), last_name=str('Backer%s' % i),