def sms_maama_weekly(request, as_pdf=False): groups = Group.get_sms_maama_groups() contacts = Contact.get_sms_maama_weekly_contacts() sms_maama_contacts = Contact.get_sms_maama_contacts() sent_messages = Message.get_sms_maama_sent_messages() delivered_messages = Message.get_sms_maama_delivered_messages() failed_messages = Message.get_sms_maama_failed_messages() failed_messages_count = Message.get_sms_maama_failed_messages_count() contacts_count = Contact.get_sms_maama_contacts_count() weekly_contacts_count = Contact.get_sms_maama_weekly_contacts_count() messages_count = Message.get_sms_maama_sent_messages_count() read_messages_count = Message.get_sms_maama_read_messages_count() hanging_messages_count = Message.get_sms_maama_hanging_messages_count() unread_messages = Message.get_sms_maama_unread_messages() flow_responses = Message.get_sms_maama_flow_responses() flow_responses_count = Message.get_sms_maama_flow_responses_count() # responses = Message.get_specific_flow_response() baby_responses = Message.get_sms_maama_flow_responses_baby() baby_responses_count = Message.get_sms_maama_flow_responses_baby_count() stops = Message.get_sms_maama_opted_out() stops_count = Message.get_sms_maama_opted_out_count() enrollments = Message.get_sms_maama_flow_responses_enrollment() flows = Value.sms_maama_contact_flows_values() antenatal_responses = Value.sms_maama_contact_flows_antenatal_values() start_date = datetime.datetime.now() - datetime.timedelta(days=7) end_date = datetime.datetime.now() this_day = now() target = str(this_day)[:-22] payload = { 'groups': groups, 'contacts': contacts, 'sms_maama_contacts': sms_maama_contacts, 'sent_messages': sent_messages, 'delivered_messages': delivered_messages, 'failed_messages': failed_messages, 'failed_messages_count': failed_messages_count, 'contacts_count': contacts_count, 'weekly_contacts_count': weekly_contacts_count, 'messages_count': messages_count, 'read_messages_count': read_messages_count, 'hanging_messages_count': hanging_messages_count, 'unread_messages': unread_messages, 'flow_responses': flow_responses, 'flow_responses_count': flow_responses_count, 'baby_responses': baby_responses, 'baby_responses_count': baby_responses_count, 'stops': stops, 'stops_count': stops_count, 'flows': flows, 'antenatal_responses': antenatal_responses, 'enrollments': enrollments, 'start_date': start_date, 'end_date': end_date, 'this_day': this_day } if as_pdf: return payload return render_to_response('qcreports/sms_maama_weekly_report.html', payload, RequestContext(request))
def insert_or_replace(self, session, key, value): entry = self.lookup_entry(session, key) if entry: # Update existing entry entry.value = self.lookup_value(session, value) or Value( hash=self.hash_value(value), blob=value) else: # Create new entry. entry = Entry(key=key, value=self.lookup_value(session, value) or Value(hash=self.hash_value(value), blob=value)) session.add(entry)
def update_or_create(testcase, env, build, metric, value=None, comment=None, color=None): """Update testresults/settings if exist, otherwise create new ones. :return created True if created new results, otherwise False """ settings = Settings.objects.get_or_create(testcase=testcase, metric=metric)[0] testresults, created = TestResults.objects.get_or_create( build=build, testcase=testcase, env=env, metric=metric, tag=gen_tag(build), settings=settings) testresults.timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) if value: v = Value(value=value) testresults.value_set.add(v) if comment: testresults.comment = comment if color: testresults.color = color testresults.save() return created
def sensor(request): sensor_name = request.GET.get("sensor", "") value = request.GET.get("value", "") time = request.GET.get("time", "") from django.utils.timezone import get_current_timezone from django.utils import timezone if len(time) == 0: time_p = timezone.now() else: time_p = datetime.datetime.fromtimestamp(int(time), tz=get_current_timezone()) print time_p objs = Sensor.objects.filter(name=sensor_name) sensor = None if len(objs) > 0: sensor = objs[0] else: sensor = Sensor() sensor.name = sensor_name sensor.save() try: latest = Value.objects.filter(sensor=sensor).order_by("-pub_date")[0] # print "latest",str(latest) if time_p < latest.pub_date: print "WARNNING: Sensor time should be greater than last in DB" if latest.value == float(value): result = "No need to update data for sensor %s" % sensor print result HttpResponse(result) except IndexError: pass v = Value(sensor=sensor, pub_date=time_p, value=float(value)) v.save() return HttpResponse(str(sensor.name) + ":" + str(value))
def post_evaluate(models_path, sigma, n_post_episodes=5, add_noise=False): # print('----------------Post evaluation----------------') policy_path = models_path + "_policy" value_path = models_path + "_value" if args.use_parameter_noise: policy_post = PolicyLayerNorm(num_inputs, num_actions) value_post = Value(num_inputs) else: policy_post = Policy(num_inputs, num_actions) value_post = Value(num_inputs) # print('------------------') value_post.load_state_dict(torch.load(value_path)) policy_post.load_state_dict(torch.load(policy_path)) reward_post = 0 for i in range(n_post_episodes): state = env.reset() ##seeding # env.seed(i) # torch.manual_seed(i) # state = running_state(state) for t in range(1000): if args.use_parameter_noise and add_noise: action = select_action(policy_post, state, sigma, add_noise=True) else: action = select_action(policy_post, state) action = action.data[0].numpy() next_state, reward, done, _ = env.step(action) reward_post += reward # next_state = running_state(next_state) if done: break # state = running_state(next_state) state = next_state print('___Post evaluation reward___') print(reward_post / n_post_episodes) return reward_post / n_post_episodes
def merge_person_property(dbsession, person, property, value, source): """Merge the given ``property`` with ``value`` into the ``person``. Attribute the change to the ``source``. ``value`` can be a string or a dictionary with keys "label", "lang", and "value". ``source`` is a dictionary with keys "label", "source", and "timestamp". """ if isinstance(value, dict): label = value['label'] if 'label' in value else None lang = value['lang'] if 'lang' in value else None value = value['value'] else: label = None lang = None db_property = dbsession.query(PersonProperty).join(Value).filter(and_(PersonProperty.person_id == person.id, PersonProperty.name == property, Value.value == value, Value.lang == lang)).first() if not db_property: db_value = dbsession.query(Value).filter(and_(Value.value == value, Value.lang == lang)).first() if not db_value: db_value = Value(label=label, value=value, lang=lang) dbsession.add(db_value) db_property = PersonProperty(person=person, name=property, value=db_value, status='unconfirmed') dbsession.add(db_property) property_source = dbsession.query(PersonPropertySource).join(Source).filter(and_(PersonPropertySource.property == db_property, Source.url == source['url'])).first() if not property_source: db_source = dbsession.query(Source).filter(Source.url == source['url']).first() if not db_source: db_source = Source(label=source['label'], url=source['url']) dbsession.add(db_source) property_source = PersonPropertySource(property=db_property, source=db_source, timestamp=source['timestamp']) dbsession.add(property_source) else: property_source.timestamp = source['timestamp'] dbsession.add(property_source) dbsession.commit() return db_property
def main(gamma=0.995, env_name='Walker2d-v2', tau=0.97, seed=543, number_of_batches=500,\ batch_size=5000, maximum_steps=10000, render=False, log_interval=1, entropy_coeff=0.0,\ clip_epsilon=0.2, use_joint_pol_val=False): torch.set_default_tensor_type('torch.DoubleTensor') PI = torch.DoubleTensor([3.1415926]) env = gym.make(env_name) num_inputs = env.observation_space.shape[0] num_actions = env.action_space.shape[0] env.seed(seed) torch.manual_seed(seed) policy_net = Policy(num_inputs, num_actions) value_net = Value(num_inputs) opt_policy = optim.Adam(policy_net.parameters(), lr=0.001) opt_value = optim.Adam(value_net.parameters(), lr=0.001) running_state = ZFilter((num_inputs,), clip=5) running_reward = ZFilter((1,), demean=False, clip=10) episode_lengths = [] plot_rew = [] for i_episode in range(number_of_batches): memory = Memory() num_steps = 0 reward_batch = 0 num_episodes = 0 while num_steps < batch_size: state = env.reset() state = running_state(state) reward_sum = 0 for t in range(maximum_steps): # Don't infinite loop while learning action = select_action(state, policy_net) action = action.data[0].numpy() next_state, reward, done, _ = env.step(action) reward_sum += reward next_state = running_state(next_state) mask = 1 if done: mask = 0 memory.push(state, np.array([action]), mask, next_state, reward) if render: env.render() if done: break state = next_state num_steps += (t-1) num_episodes += 1 reward_batch += reward_sum reward_batch /= num_episodes batch = memory.sample() plot_rew.append(reward_batch) update_params(batch, policy_net, value_net, gamma, opt_policy, opt_value) if i_episode % args.log_interval == 0: print('Episode {}\tLast reward: {}\tAverage reward {:.2f}'.format( i_episode, reward_sum, reward_batch)) plot_epi = [] for i in range (number_of_batches): plot_epi.append(i) trace = go.Scatter( x = plot_epi, y = plot_rew) layout = go.Layout(title='PPO',xaxis=dict(title='Episodes', titlefont=dict(family='Courier New, monospace',size=18,color='#7f7f7f')), yaxis=dict(title='Average Reward', titlefont=dict(family='Courier New, monospace',size=18,color='#7f7f7f'))) plotly.offline.plot({"data": [trace], "layout": layout},filename='PPO.html',image='jpeg')
def __init__(self, args, logger, state_size=2, action_size=4, context_size=1, num_goals=4, history_size=1, dtype=torch.FloatTensor): super(InfoGAIL, self).__init__(args, logger, state_size=state_size, action_size=action_size, context_size=context_size, num_goals=num_goals, history_size=history_size, dtype=dtype) # Create networks self.policy_net = Policy(state_size=state_size * history_size, action_size=0, latent_size=context_size, output_size=action_size, hidden_size=64, output_activation='sigmoid') self.old_policy_net = Policy(state_size=state_size * history_size, action_size=0, latent_size=context_size, output_size=action_size, hidden_size=64, output_activation='sigmoid') # Use value network for calculating GAE. We should use this for # training the policy network. if args.use_value_net: # context_size contains num_goals self.value_net = Value(state_size * history_size + context_size, hidden_size=64) # Reward net is the discriminator network. Discriminator does not # receive the latent vector in InfoGAIL. self.reward_net = Reward( state_size * history_size, action_size, # action size 0, # latent size hidden_size=64) self.posterior_net = DiscretePosterior( state_size=state_size * history_size, # state action_size=0, # action latent_size=0, # context hidden_size=64, output_size=num_goals) self.opt_policy = optim.Adam(self.policy_net.parameters(), lr=0.0003) self.opt_reward = optim.Adam(self.reward_net.parameters(), lr=0.0003) self.opt_value = optim.Adam(self.value_net.parameters(), lr=0.0003) self.opt_posterior = optim.Adam(self.posterior_net.parameters(), lr=0.0003) # Create loss functions self.criterion = nn.BCELoss() self.criterion_posterior = nn.CrossEntropyLoss() self.create_environment()
class InfoGAIL(BaseGAIL): def __init__(self, args, logger, state_size=2, action_size=4, context_size=1, num_goals=4, history_size=1, dtype=torch.FloatTensor): super(InfoGAIL, self).__init__(args, logger, state_size=state_size, action_size=action_size, context_size=context_size, num_goals=num_goals, history_size=history_size, dtype=dtype) # Create networks self.policy_net = Policy(state_size=state_size * history_size, action_size=0, latent_size=context_size, output_size=action_size, hidden_size=64, output_activation='sigmoid') self.old_policy_net = Policy(state_size=state_size * history_size, action_size=0, latent_size=context_size, output_size=action_size, hidden_size=64, output_activation='sigmoid') # Use value network for calculating GAE. We should use this for # training the policy network. if args.use_value_net: # context_size contains num_goals self.value_net = Value(state_size * history_size + context_size, hidden_size=64) # Reward net is the discriminator network. Discriminator does not # receive the latent vector in InfoGAIL. self.reward_net = Reward( state_size * history_size, action_size, # action size 0, # latent size hidden_size=64) self.posterior_net = DiscretePosterior( state_size=state_size * history_size, # state action_size=0, # action latent_size=0, # context hidden_size=64, output_size=num_goals) self.opt_policy = optim.Adam(self.policy_net.parameters(), lr=0.0003) self.opt_reward = optim.Adam(self.reward_net.parameters(), lr=0.0003) self.opt_value = optim.Adam(self.value_net.parameters(), lr=0.0003) self.opt_posterior = optim.Adam(self.posterior_net.parameters(), lr=0.0003) # Create loss functions self.criterion = nn.BCELoss() self.criterion_posterior = nn.CrossEntropyLoss() self.create_environment() def checkpoint_data_to_save(self): return { 'policy': self.policy_net, 'value': self.value_net, 'reward': self.reward_net, 'posterior': self.posterior_net, } def load_checkpoint_data(self, checkpoint_path): assert os.path.exists(checkpoint_path), \ 'Checkpoint path does not exists {}'.format(checkpoint_path) checkpoint_data = torch.load(checkpoint_path) self.policy_net = checkpoint_data['policy'] self.value_net = checkpoint_data['value'] self.reward_net = checkpoint_data['reward'] self.posterior_net = checkpoint_data['posterior'] def update_params_for_batch(self, states, actions, latent_c, targets, advantages, expert_states, expert_actions, optim_batch_size, optim_batch_size_exp, optim_iters): '''Update parameters for one batch of data. Update the policy network, discriminator (reward) network and the posterior network here. ''' args, dtype = self.args, self.dtype curr_id, curr_id_exp = 0, 0 for _ in range(optim_iters): curr_batch_size = min(optim_batch_size, actions.size(0) - curr_id) curr_batch_size_exp = min(optim_batch_size_exp, expert_actions.size(0) - curr_id_exp) start_idx, end_idx = curr_id, curr_id + curr_batch_size state_var = Variable(states[start_idx:end_idx]) action_var = Variable(actions[start_idx:end_idx]) latent_c_var = Variable(latent_c[start_idx:end_idx]) advantages_var = Variable(advantages[start_idx:end_idx]) start_idx, end_idx = curr_id_exp, curr_id_exp + curr_batch_size_exp expert_state_var = Variable(expert_states[start_idx:end_idx]) expert_action_var = Variable(expert_actions[start_idx:end_idx]) # Update reward net self.opt_reward.zero_grad() # Backprop with expert demonstrations expert_output = self.reward_net( torch.cat((expert_state_var, expert_action_var), 1)) expert_disc_loss = self.criterion( expert_output, Variable( torch.zeros(expert_action_var.size(0), 1).type(dtype))) expert_disc_loss.backward() # Backprop with generated demonstrations gen_output = self.reward_net(torch.cat((state_var, action_var), 1)) gen_disc_loss = self.criterion( gen_output, Variable(torch.ones(action_var.size(0), 1)).type(dtype)) gen_disc_loss.backward() # Add loss scalars. self.logger.summary_writer.add_scalars( 'loss/discriminator', { 'total': expert_disc_loss.data[0] + gen_disc_loss.data[0], 'expert': expert_disc_loss.data[0], 'gen': gen_disc_loss.data[0], }, self.gail_step_count) self.opt_reward.step() reward_l2_norm, reward_grad_l2_norm = \ get_weight_norm_for_network(self.reward_net) self.logger.summary_writer.add_scalar('weight/discriminator/param', reward_l2_norm, self.gail_step_count) self.logger.summary_writer.add_scalar('weight/discriminator/grad', reward_grad_l2_norm, self.gail_step_count) # Update posterior net. We need to do this by reparameterization # trick. predicted_posterior = self.posterior_net(state_var) # There is no GOAL info in latent_c_var here. # TODO: This 0 and -1 stuff is not needed here. Confirm? _, true_posterior = torch.max(latent_c_var.data, dim=1) posterior_loss = self.criterion_posterior(predicted_posterior, Variable(true_posterior)) posterior_loss.backward() self.logger.summary_writer.add_scalar('loss/posterior', posterior_loss.data[0], self.gail_step_count) # compute old and new action probabilities action_means, action_log_stds, action_stds = self.policy_net( torch.cat((state_var, latent_c_var), 1)) log_prob_cur = normal_log_density(action_var, action_means, action_log_stds, action_stds) action_means_old, action_log_stds_old, action_stds_old = \ self.old_policy_net(torch.cat( (state_var, latent_c_var), 1)) log_prob_old = normal_log_density(action_var, action_means_old, action_log_stds_old, action_stds_old) if args.use_value_net: # update value net self.opt_value.zero_grad() value_var = self.value_net( torch.cat((state_var, latent_c_var), 1)) value_loss = (value_var - \ targets[curr_id:curr_id+curr_batch_size]).pow(2.).mean() value_loss.backward() self.opt_value.step() # Update policy net (PPO step) self.opt_policy.zero_grad() ratio = torch.exp(log_prob_cur - log_prob_old) # pnew / pold surr1 = ratio * advantages_var[:, 0] surr2 = torch.clamp(ratio, 1.0 - self.args.clip_epsilon, 1.0 + self.args.clip_epsilon) * advantages_var[:, 0] policy_surr = -torch.min(surr1, surr2).mean() policy_surr.backward() # torch.nn.utils.clip_grad_norm(self.policy_net.parameters(), 40) self.opt_policy.step() self.logger.summary_writer.add_scalar('loss/policy', policy_surr.data[0], self.gail_step_count) policy_l2_norm, policy_grad_l2_norm = \ get_weight_norm_for_network(self.policy_net) self.logger.summary_writer.add_scalar('weight/policy/param', policy_l2_norm, self.gail_step_count) self.logger.summary_writer.add_scalar('weight/policy/grad', policy_grad_l2_norm, self.gail_step_count) # set new starting point for batch curr_id += curr_batch_size curr_id_exp += curr_batch_size_exp self.gail_step_count += 1 def update_params(self, gen_batch, expert_batch, episode_idx, optim_epochs, optim_batch_size): '''Update params for Policy (G), Reward (D) and Posterior (q) networks. ''' args, dtype = self.args, self.dtype self.opt_policy.lr = self.args.learning_rate \ * max(1.0 - float(episode_idx)/args.num_epochs, 0) clip_epsilon = self.args.clip_epsilon \ * max(1.0 - float(episode_idx)/args.num_epochs, 0) # generated trajectories states = torch.Tensor(np.array(gen_batch.state)).type(dtype) actions = torch.Tensor(np.array(gen_batch.action)).type(dtype) rewards = torch.Tensor(np.array(gen_batch.reward)).type(dtype) masks = torch.Tensor(np.array(gen_batch.mask)).type(dtype) ## Expand states to include history ## # Generated trajectories already have history in them. latent_c = torch.Tensor(np.array(gen_batch.c)).type(dtype) values = None if args.use_value_net: values = self.value_net(Variable(torch.cat((states, latent_c), 1))) # expert trajectories list_of_expert_states, list_of_expert_actions = [], [] list_of_masks = [] for i in range(len(expert_batch.state)): ## Expand expert states ## expanded_states = self.expand_states_numpy(expert_batch.state[i], self.history_size) list_of_expert_states.append(torch.Tensor(expanded_states)) list_of_expert_actions.append(torch.Tensor(expert_batch.action[i])) list_of_masks.append(torch.Tensor(expert_batch.mask[i])) expert_states = torch.cat(list_of_expert_states, 0).type(dtype) expert_actions = torch.cat(list_of_expert_actions, 0).type(dtype) expert_masks = torch.cat(list_of_masks, 0).type(dtype) assert expert_states.size(0) == expert_actions.size(0), \ "Expert transition size do not match" assert expert_states.size(0) == expert_masks.size(0), \ "Expert transition size do not match" # compute advantages returns, advantages = get_advantage_for_rewards(rewards, masks, self.args.gamma, values, dtype=dtype) targets = Variable(returns) advantages = (advantages - advantages.mean()) / advantages.std() # Backup params after computing probs but before updating new params for old_policy_param, policy_param in zip( self.old_policy_net.parameters(), self.policy_net.parameters()): old_policy_param.data.copy_(policy_param.data) # update value, reward and policy networks optim_iters = self.args.batch_size // optim_batch_size optim_batch_size_exp = expert_actions.size(0) // optim_iters # Remove extra 1 array shape from actions, since actions were added as # 1-hot vector of shape (1, A). actions = np.squeeze(actions) expert_actions = np.squeeze(expert_actions) for _ in range(optim_epochs): perm = np.random.permutation(np.arange(actions.size(0))) perm_exp = np.random.permutation(np.arange(expert_actions.size(0))) if args.cuda: perm = torch.cuda.LongTensor(perm) perm_exp = torch.cuda.LongTensor(perm_exp) else: perm, perm_exp = torch.LongTensor(perm), torch.LongTensor( perm_exp) self.update_params_for_batch( states[perm], actions[perm], latent_c[perm], targets[perm], advantages[perm], expert_states[perm_exp], expert_actions[perm_exp], optim_batch_size, optim_batch_size_exp, optim_iters) def train_gail(self, expert): '''Train Info-GAIL.''' args, dtype = self.args, self.dtype results = { 'average_reward': [], 'episode_reward': [], 'true_traj': {}, 'pred_traj': {} } self.train_step_count, self.gail_step_count = 0, 0 for ep_idx in range(args.num_epochs): memory = Memory() num_steps = 0 reward_batch, true_reward_batch = [], [] expert_true_reward_batch = [] true_traj_curr_episode, gen_traj_curr_episode = [], [] while num_steps < args.batch_size: traj_expert = expert.sample(size=1) state_expert, action_expert, _, _ = traj_expert # Expert state and actions state_expert = state_expert[0] action_expert = action_expert[0] expert_episode_len = len(state_expert) # Sample start state or should we just choose the start state # from the expert trajectory sampled above. # curr_state_obj = self.sample_start_state() curr_state_obj = State(state_expert[0], self.obstacles) curr_state_feat = self.get_state_features( curr_state_obj, self.args.use_state_features) # Add history to state if args.history_size > 1: curr_state = -1 * np.ones( (args.history_size * curr_state_feat.shape[0]), dtype=np.float32) curr_state[(args.history_size-1) \ * curr_state_feat.shape[0]:] = curr_state_feat else: curr_state = curr_state_feat # TODO: Make this a separate function. Can be parallelized. ep_reward, ep_true_reward, expert_true_reward = 0, 0, 0 true_traj, gen_traj = [], [] gen_traj_dict = { 'features': [], 'actions': [], 'c': [], 'mask': [] } disc_reward, posterior_reward = 0.0, 0.0 # Use a hard-coded list for memory to gather experience since we # need to mutate it before finally creating a memory object. c_sampled = np.zeros((self.num_goals), dtype=np.float32) c_sampled[np.random.randint(0, self.num_goals)] = 1.0 c_sampled_tensor = torch.zeros((1)).type(torch.LongTensor) c_sampled_tensor[0] = int(np.argmax(c_sampled)) if self.args.cuda: c_sampled_tensor = torch.cuda.LongTensor(c_sampled_tensor) memory_list = [] for t in range(expert_episode_len): action = self.select_action( np.concatenate((curr_state, c_sampled))) action_numpy = action.data.cpu().numpy() # Save generated and true trajectories true_traj.append((state_expert[t], action_expert[t])) gen_traj.append((curr_state_obj.coordinates, action_numpy)) gen_traj_dict['features'].append( self.get_state_features(curr_state_obj, self.args.use_state_features)) gen_traj_dict['actions'].append(action_numpy) gen_traj_dict['c'].append(c_sampled) action = epsilon_greedy_linear_decay(action_numpy, args.num_epochs * 0.5, ep_idx, self.action_size, low=0.05, high=0.3) # Get the discriminator reward disc_reward_t = float( self.reward_net( torch.cat((Variable( torch.from_numpy(curr_state).unsqueeze( 0)).type(dtype), Variable( torch.from_numpy( oned_to_onehot( action, self.action_size)). unsqueeze(0)).type(dtype)), 1)).data.cpu().numpy()[0, 0]) if args.use_log_rewards and disc_reward_t < 1e-6: disc_reward_t += 1e-6 disc_reward_t = -math.log(disc_reward_t) \ if args.use_log_rewards else -disc_reward_t disc_reward += disc_reward_t # Predict c given (x_t) predicted_posterior = self.posterior_net( Variable(torch.from_numpy(curr_state).unsqueeze( 0)).type(dtype)) posterior_reward_t = self.criterion_posterior( predicted_posterior, Variable(c_sampled_tensor)).data.cpu().numpy()[0] posterior_reward += (self.args.lambda_posterior * posterior_reward_t) # Update Rewards ep_reward += (disc_reward_t + posterior_reward_t) true_goal_state = [ int(x) for x in state_expert[-1].tolist() ] if self.args.flag_true_reward == 'grid_reward': ep_true_reward += self.true_reward.reward_at_location( curr_state_obj.coordinates, goals=[true_goal_state]) expert_true_reward += self.true_reward.reward_at_location( state_expert[t], goals=[true_goal_state]) elif self.args.flag_true_reward == 'action_reward': ep_true_reward += self.true_reward.reward_at_location( np.argmax(action_expert[t]), action) expert_true_reward += self.true_reward.corret_action_reward else: raise ValueError("Incorrect true reward type") # Update next state next_state_obj = self.transition_func( curr_state_obj, Action(action), 0) next_state_feat = self.get_state_features( next_state_obj, self.args.use_state_features) #next_state = running_state(next_state) mask = 0 if t == expert_episode_len - 1 else 1 # Push to memory memory_list.append([ curr_state, np.array([oned_to_onehot(action, self.action_size)]), mask, next_state_feat, disc_reward_t + posterior_reward_t, c_sampled, c_sampled ]) if args.render: env.render() if not mask: break curr_state_obj = next_state_obj curr_state_feat = next_state_feat if args.history_size > 1: curr_state[:(args.history_size-1) \ * curr_state_feat.shape[0]] = \ curr_state[curr_state_feat.shape[0]:] curr_state[(args.history_size-1) \ * curr_state_feat.shape[0]:] = curr_state_feat else: curr_state = curr_state_feat assert memory_list[-1][2] == 0, \ "Mask for final end state is not 0." for memory_t in memory_list: memory.push(*memory_t) self.logger.summary_writer.add_scalars( 'gen_traj/gen_reward', { 'discriminator': disc_reward, 'posterior': posterior_reward, }, self.train_step_count) num_steps += (t - 1) reward_batch.append(ep_reward) true_reward_batch.append(ep_true_reward) expert_true_reward_batch.append(expert_true_reward) results['episode_reward'].append(ep_reward) # Append trajectories true_traj_curr_episode.append(true_traj) gen_traj_curr_episode.append(gen_traj) results['average_reward'].append(np.mean(reward_batch)) # Add to tensorboard self.logger.summary_writer.add_scalars( 'gen_traj/reward', { 'average': np.mean(reward_batch), 'max': np.max(reward_batch), 'min': np.min(reward_batch) }, self.train_step_count) self.logger.summary_writer.add_scalars( 'gen_traj/true_reward', { 'average': np.mean(true_reward_batch), 'max': np.max(true_reward_batch), 'min': np.min(true_reward_batch), 'expert_true': np.mean(expert_true_reward_batch) }, self.train_step_count) # Add predicted and generated trajectories to results if ep_idx % self.args.save_interval == 0: results['true_traj'][ep_idx] = copy.deepcopy( true_traj_curr_episode) results['pred_traj'][ep_idx] = copy.deepcopy( gen_traj_curr_episode) # Update parameters gen_batch = memory.sample() # We do not get the context variable from expert trajectories. # Hence we need to fill it in later. expert_batch = expert.sample(size=args.num_expert_trajs) self.update_params(gen_batch, expert_batch, ep_idx, args.optim_epochs, args.optim_batch_size) self.train_step_count += 1 if ep_idx > 0 and ep_idx % args.log_interval == 0: print('Episode [{}/{}] Avg R: {:.2f} Max R: {:.2f} \t' \ 'True Avg {:.2f} True Max R: {:.2f} ' \ 'Expert (Avg): {:.2f}'.format( ep_idx, args.num_epochs, np.mean(reward_batch), np.max(reward_batch), np.mean(true_reward_batch), np.max(true_reward_batch), np.mean(expert_true_reward_batch))) results_path = os.path.join(args.results_dir, 'results.pkl') with open(results_path, 'wb') as results_f: pickle.dump((results), results_f, protocol=2) # print("Did save results to {}".format(results_path)) if ep_idx % args.save_interval == 0: checkpoint_filepath = self.model_checkpoint_filepath(ep_idx) torch.save(self.checkpoint_data_to_save(), checkpoint_filepath) print("Did save checkpoint: {}".format(checkpoint_filepath))
parser.add_argument('--max-steps', type=int, default=1000000) parser.add_argument('--log-dir', type=str) args = parser.parse_args() env = bench.Monitor(gym.make(args.env_name), os.path.join(args.log_dir, '0'), allow_early_resets=False) num_inputs = env.observation_space.shape[0] num_actions = env.action_space.shape[0] env.seed(args.seed) torch.manual_seed(args.seed) policy_net = Policy(num_inputs, num_actions) value_net = Value(num_inputs) def select_action(state): state = torch.from_numpy(state).unsqueeze(0) action_mean, _, action_std = policy_net(Variable(state)) action = torch.normal(action_mean, action_std) return action def update_params(batch): rewards = torch.Tensor(batch.reward) masks = torch.Tensor(batch.mask) actions = torch.Tensor(np.concatenate(batch.action, 0)) states = torch.Tensor(batch.state) values = value_net(Variable(states))
def train(args): # Initialize data type dtype = torch.float32 torch.set_default_dtype(dtype) device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # Initialize environment env = gym.make(args.env_id) envname = env.spec.id obs_dim = env.observation_space.shape[0] act_dim = env.action_space.shape[0] # Initialize random seeds torch.manual_seed(args.seed) np.random.seed(args.seed) env.seed(args.seed) # Initialize neural nets policy = GaussianPolicy(obs_dim, act_dim, args.hidden_size, args.activation, args.logstd) value_net = Value(obs_dim, args.hidden_size, args.activation) cvalue_net = Value(obs_dim, args.hidden_size, args.activation) policy.to(device) value_net.to(device) cvalue_net.to(device) # Initialize optimizer pi_optimizer = torch.optim.Adam(policy.parameters(), args.pi_lr) vf_optimizer = torch.optim.Adam(value_net.parameters(), args.vf_lr) cvf_optimizer = torch.optim.Adam(cvalue_net.parameters(), args.cvf_lr) # Initialize learning rate scheduler lr_lambda = lambda it: max(1.0 - it / args.max_iter_num, 0) pi_scheduler = torch.optim.lr_scheduler.LambdaLR(pi_optimizer, lr_lambda=lr_lambda) vf_scheduler = torch.optim.lr_scheduler.LambdaLR(vf_optimizer, lr_lambda=lr_lambda) cvf_scheduler = torch.optim.lr_scheduler.LambdaLR(cvf_optimizer, lr_lambda=lr_lambda) # Store hyperparameters for log hyperparams = vars(args) # Initialize RunningStat for state normalization, score queue, logger running_stat = RunningStats(clip=5) score_queue = deque(maxlen=100) cscore_queue = deque(maxlen=100) logger = Logger(hyperparams) # Get constraint bounds cost_lim = get_threshold(envname, constraint=args.constraint) # Initialize and train FOCOPS agent agent = FOCOPS(env, policy, value_net, cvalue_net, pi_optimizer, vf_optimizer, cvf_optimizer, args.num_epochs, args.mb_size, args.c_gamma, args.lam, args.delta, args.eta, args.nu, args.nu_lr, args.nu_max, cost_lim, args.l2_reg, score_queue, cscore_queue, logger) start_time = time.time() for iter in range(args.max_iter_num): # Update iteration for model agent.logger.save_model('iter', iter) # Collect trajectories data_generator = DataGenerator(obs_dim, act_dim, args.batch_size, args.max_eps_len) rollout = data_generator.run_traj(env, agent.policy, agent.value_net, agent.cvalue_net, running_stat, agent.score_queue, agent.cscore_queue, args.gamma, args.c_gamma, args.gae_lam, args.c_gae_lam, dtype, device, args.constraint) # Update FOCOPS parameters agent.update_params(rollout, dtype, device) # Update learning rates pi_scheduler.step() vf_scheduler.step() cvf_scheduler.step() # Update time and running stat agent.logger.update('time', time.time() - start_time) agent.logger.update('running_stat', running_stat) # Save and print values agent.logger.dump()
print(args.use_parameter_noise) env = gym.make(args.env_name) num_inputs = env.observation_space.shape[0] num_actions = env.action_space.shape[0] env.seed(args.seed) torch.manual_seed(args.seed) if args.use_joint_pol_val: ac_net = ActorCritic(num_inputs, num_actions) opt_ac = optim.Adam(ac_net.parameters(), lr=0.001) #Here if we are using parameter noise we should use modified policy network elif args.use_parameter_noise: policy_net = PolicyLayerNorm(num_inputs, num_actions) value_net = Value(num_inputs) opt_policy = optim.Adam(policy_net.parameters(), lr=0.001) opt_value = optim.Adam(value_net.parameters(), lr=0.001) else: policy_net = Policy(num_inputs, num_actions) value_net = Value(num_inputs) opt_policy = optim.Adam(policy_net.parameters(), lr=0.001) opt_value = optim.Adam(value_net.parameters(), lr=0.001) # def select_action(state,sigma): # state = torch.from_numpy(state).unsqueeze(0) # if args.use_parameter_noise: # action_mean, _, action_std = policy_net(Variable(state),sigma,param_noise=True) # else: # action_mean, _, action_std = policy_net(Variable(state)) # action = torch.normal(action_mean, action_std)
def sms_maama_report(): doc = SimpleDocTemplate( "qc/static/qc/reports/sms_maama_weekly_report_{end_date}.pdf", pagesize=letter, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18) report = [] logo = "qc/static/images/logo.jpg" logo2 = "qc/static/images/sms_maama_logo.jpg" project_name = "SMS MAAMA Project" report_title = "SMS Maama Weekly Report" prepared_by = "Faith Nassiwa" groups = Group.get_sms_maama_groups() contacts = Contact.get_sms_maama_weekly_contacts() sms_maama_contacts = Contact.get_sms_maama_contacts() sent_messages = Message.get_sms_maama_sent_messages() delivered_messages = Message.get_sms_maama_delivered_messages() failed_messages = Message.get_sms_maama_failed_messages() failed_messages_count = Message.get_sms_maama_failed_messages_count() contacts_count = Contact.get_sms_maama_contacts_count() weekly_contacts_count = Contact.get_sms_maama_weekly_contacts_count() messages_count = Message.get_sms_maama_sent_messages_count() read_messages_count = Message.get_sms_maama_read_messages_count() hanging_messages_count = Message.get_sms_maama_hanging_messages_count() hanging_messages = Message.get_sms_maama_hanging_messages() flow_responses_weekly = Message.get_sms_maama_weekly_flow_responses() flow_responses_count = Message.get_sms_maama_flow_responses_count() baby_responses = Message.get_sms_maama_flow_responses_baby() baby_responses_count = Message.get_sms_maama_flow_responses_baby_count() stops = Message.get_sms_maama_opted_out() stops_count = Message.get_sms_maama_opted_out_count() screening_responses = Value.sms_maama_contact_flows_screening_values() antenatal_responses = Value.sms_maama_contact_flows_antenatal_values() enrollments = Message.get_sms_maama_flow_responses_enrollment() concerning = Message.get_concerning_messages() start_date = datetime.date.today() - datetime.timedelta(days=7) end_date = datetime.date.today() - datetime.timedelta(days=1) this_day = datetime.datetime.now( pytz.timezone('Africa/Kampala')).strftime('%Y-%m-%d %H:%M %Z') im = Image(logo, 2 * inch, 1 * inch) im2 = Image(logo2, 2 * inch, 1 * inch) tabele_data = [[im, im2]] t = Table(tabele_data) report.append(t) report.append(Spacer(1, 12)) styles = getSampleStyleSheet() styles.add(ParagraphStyle(name='Left', alignment=TA_LEFT)) ptext = '<font size=14><b>%s</b></font>' % report_title report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) ptext = '<font size=12>Date: %s</font>' % this_day report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) ptext = '<font size=12> Report Date: %s - %s</font>' % (start_date, end_date) report.append(Paragraph(ptext, styles["Normal"])) ptext = '<font size=12> Prepared By: %s</font>' % prepared_by report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) styles.add(ParagraphStyle(name='Center', alignment=TA_CENTER)) ptext = '<font size=12> <b>All SMS Maama Contacts.</b></font>' report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) all_sms_maama_contact_titles = [ 'Phone Number', 'Name', 'Points', 'Enrolled On', 'Week Enrolled' ] data = [all_sms_maama_contact_titles] colwidths = (100, 120, 40, 120, 80) for i, contact in enumerate(sms_maama_contacts): data.append([ contact.urns, contact.name, contact.points, contact.sms_maama_enrollment_date, contact.number_of_weeks ]) t = Table(data, colwidths, style=[ ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black), ('BOX', (0, 0), (-1, -1), 0.5, colors.black), ('VALIGN', (0, 0), (-1, 0), 'MIDDLE'), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('BACKGROUND', (0, 0), (-1, 0), colors.gray), ]) report.append(t) report.append(Spacer(1, 12)) report.append(Spacer(1, 12)) ptext = '<font size=12> <b>SMS Maama Week of Pregnancy Upon Enrollment Status</b></font>' report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) groups_titles = ['SMS Maama Week', 'Number of Participants'] data = [groups_titles] colwidths = (230, 230) for i, group in enumerate(groups): data.append([group.name, group.count]) t = Table(data, colwidths, style=[ ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black), ('BOX', (0, 0), (-1, -1), 0.5, colors.black), ('VALIGN', (0, 0), (-1, 0), 'MIDDLE'), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('BACKGROUND', (0, 0), (-1, 0), colors.gray), ]) report.append(t) ptext = '<font size=12> <center>Total Participants: %s</center></font>' % contacts_count report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) report.append(Spacer(1, 12)) ptext = '<font size=12> <b> Weekly Enrolled Contacts</b></font>' report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) contacts_titles = ['Phone Number', 'Created On', 'Enrolled On', 'Language'] data = [contacts_titles] colwidths = (100, 120, 120, 100) for i, weekly_contact in enumerate(contacts): data.append([ weekly_contact.urns, localtime(weekly_contact.created_on).strftime("%Y-%m-%d %H:%M"), weekly_contact.sms_maama_enrollment_date, weekly_contact.language ]) t = Table(data, colwidths, style=[ ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black), ('BOX', (0, 0), (-1, -1), 0.5, colors.black), ('VALIGN', (0, 0), (-1, 0), 'MIDDLE'), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('BACKGROUND', (0, 0), (-1, 0), colors.gray), ]) report.append(t) ptext = '<font size=12> <center>Total Weekly Participants: %s</center></font>' % weekly_contacts_count report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) report.append(Spacer(1, 12)) ptext = '<font size=12> <b>Weekly Message Count Summary</b></font>' report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) ptext = '<font size=12> <center>Total Messages Sent: %s</center></font>' % messages_count report.append(Paragraph(ptext, styles["Normal"])) ptext = '<font size=12> <center>Total Messages Delivered: %s</center></font>' % read_messages_count report.append(Paragraph(ptext, styles["Normal"])) ptext = '<font size=12> <center>Total Messages Hanging(No delivery receipt): %s</center></font>' \ % hanging_messages_count report.append(Paragraph(ptext, styles["Normal"])) ptext = '<font size=12> <center>Total Failed to Send Messages: %s</center></font>' % failed_messages_count report.append(Paragraph(ptext, styles["Normal"])) ptext = '<font size=12> <center>Total Weekly Responses: %s</center></font>' % flow_responses_count report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) report.append(Spacer(1, 12)) ptext = '<font size=12> <b> Weekly Baby, Post-Partum Initiations </b></font>' report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) baby_responses_titles = ['Phone Number', 'Message', 'Status', 'Sent On'] data = [baby_responses_titles] colwidths = (100, 130, 100, 130) for i, baby_response in enumerate(baby_responses): data.append([ baby_response.urn, Paragraph(baby_response.text, styles["BodyText"]), baby_response.status, localtime(baby_response.sent_on).strftime('%Y-%m-%d %H:%M') ]) t = Table(data, colwidths, style=[ ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black), ('BOX', (0, 0), (-1, -1), 0.5, colors.black), ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('BACKGROUND', (0, 0), (-1, 0), colors.gray), ]) report.append(t) ptext = '<font size=12> <center>Total Weekly Baby Responses: %s</center></font>' % baby_responses_count report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) report.append(Spacer(1, 12)) ptext = '<font size=12> <b> Weekly Terminations </b></font>' report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) stops_titles = ['Phone Number', 'Message', 'Status', 'Sent On'] data = [stops_titles] colwidths = (100, 130, 100, 130) for i, stop in enumerate(stops): data.append([ stop.urn, Paragraph(stop.text, styles["BodyText"]), stop.status, localtime(stop.sent_on).strftime('%Y-%m-%d %H:%M') ]) t = Table(data, colwidths, style=[ ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black), ('BOX', (0, 0), (-1, -1), 0.5, colors.black), ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('BACKGROUND', (0, 0), (-1, 0), colors.gray), ]) report.append(t) ptext = '<font size=12> <center>Total Weekly Terminations: %s</center></font>' % stops_count report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) report.append(Spacer(1, 12)) ptext = '<font size=12><b>Responses to Screening Questions</b></font>' report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) flow_responsess_titles = [ 'Phone Number', 'Screening', 'Question Sent On', 'Response', 'Response Sent On' ] data = [flow_responsess_titles] colwidths = (100, 100, 100, 60, 100) for screening_response in screening_responses: data.append([ screening_response.run.contact.urns, Paragraph(screening_response.run.flow.name, styles["BodyText"]), localtime( screening_response.run.created_on).strftime('%Y-%m-%d %H:%M'), Paragraph(screening_response.value, styles["BodyText"]), localtime(screening_response.time).strftime('%Y-%m-%d %H:%M') ]) t = Table(data, colwidths, style=[ ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black), ('BOX', (0, 0), (-1, -1), 0.5, colors.black), ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('BACKGROUND', (0, 0), (-1, 0), colors.gray), ]) report.append(t) report.append(Spacer(1, 12)) report.append(Spacer(1, 12)) ptext = '<font size=12><b>Responses to Antenatal Reminders</b></font>' report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) antenatal_responses_titles = [ 'Phone Number', 'Appointment Reminder', 'Reminder Sent On', 'Response', 'Response Sent On' ] data = [antenatal_responses_titles] colwidths = (85, 130, 95, 55, 95) if antenatal_responses.count() >= 1: for antenatal_response in antenatal_responses: data.append([ antenatal_response.run.contact.urns, Paragraph(antenatal_response.run.flow.name, styles["BodyText"]), localtime(antenatal_response.run.created_on).strftime( '%Y-%m-%d %H:%M'), Paragraph(antenatal_response.value, styles["BodyText"]), localtime(antenatal_response.time).strftime('%Y-%m-%d %H:%M') ]) t = Table(data, colwidths, style=[ ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black), ('BOX', (0, 0), (-1, -1), 0.5, colors.black), ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('BACKGROUND', (0, 0), (-1, 0), colors.gray), ]) report.append(t) else: ptext = '<font size=12>No responses to Antenatal Reminders yet. </font>' report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) report.append(Spacer(1, 12)) report.append(Spacer(1, 12)) ptext = '<font size=12><b>TMCG Call Interactions</b></font>' report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) ptext = '<font size=12>No TMCG voice call interactions yet. </font>' report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) report.append(Spacer(1, 12)) ptext = '<font size=12> <b> Weekly Responses </b></font>' report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) flow_responses_titles = ['Phone Number', 'Message', 'Status', 'Sent On'] data = [flow_responses_titles] colwidths = (100, 130, 100, 130) for i, flow_response in enumerate(flow_responses_weekly): data.append([ flow_response.urn, Paragraph(flow_response.text, styles["BodyText"]), flow_response.status, localtime(flow_response.sent_on).strftime('%Y-%m-%d %H:%M') ]) t = Table(data, colwidths, style=[ ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black), ('BOX', (0, 0), (-1, -1), 0.5, colors.black), ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('BACKGROUND', (0, 0), (-1, 0), colors.gray), ]) report.append(t) report.append(Spacer(1, 12)) ptext = '<font size=12> <b> Weekly failed to send messages </b></font>' report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) failed_messages_titles = ['Phone Number', 'Message', 'Status', 'Sent On'] data = [failed_messages_titles] colwidths = (100, 160, 100, 100) for i, failed_message in enumerate(failed_messages): data.append([ failed_message.urn, Paragraph(failed_message.text, styles["BodyText"]), failed_message.status, localtime(failed_message.sent_on).strftime('%Y-%m-%d %H:%M') ]) t = Table(data, colwidths, style=[ ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black), ('BOX', (0, 0), (-1, -1), 0.5, colors.black), ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('BACKGROUND', (0, 0), (-1, 0), colors.gray), ]) report.append(t) report.append(Spacer(1, 12)) report.append(Spacer(1, 12)) ptext = '<font size=12> <b> Weekly hanging messages </b></font>' report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) read_messages_titles = ['Phone Number', 'Message', 'Status', 'Sent On'] data = [read_messages_titles] colwidths = (100, 160, 100, 100) for i, message in enumerate(hanging_messages): data.append([ message.urn, Paragraph(message.text, styles["BodyText"]), message.status, localtime(message.sent_on).strftime("%Y-%m-%d %H:%M") ]) t = Table(data, colwidths, style=[ ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black), ('BOX', (0, 0), (-1, -1), 0.5, colors.black), ('VALIGN', (0, 0), (-1, 0), 'MIDDLE'), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('BACKGROUND', (0, 0), (-1, 0), colors.gray), ]) report.append(t) report.append(Spacer(1, 12)) report.append(Spacer(1, 12)) ptext = '<font size=12> <b> Weekly read/delivered messages </b></font>' report.append(Paragraph(ptext, styles["Normal"])) report.append(Spacer(1, 12)) read_messages_titles = ['Phone Number', 'Message', 'Status', 'Sent On'] data = [read_messages_titles] colwidths = (100, 160, 100, 100) for i, delivered_message in enumerate(delivered_messages): data.append([ delivered_message.urn, Paragraph(delivered_message.text, styles["BodyText"]), delivered_message.status, localtime(delivered_message.sent_on).strftime("%Y-%m-%d %H:%M") ]) t = Table(data, colwidths, style=[ ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black), ('BOX', (0, 0), (-1, -1), 0.5, colors.black), ('VALIGN', (0, 0), (-1, 0), 'MIDDLE'), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('BACKGROUND', (0, 0), (-1, 0), colors.gray), ]) report.append(t) report.append(Spacer(1, 12)) doc.build(report)
def save(self, *args, **kwargs): # 如果EObject还不是数据库里存储的, 那么就先存储一下 if self._eobject.pk == None: self._eobject.save() #特别需要注意的!!! 填写时,同一个项目在一个页面填写过一个field, 就不要让他在另外一个页面填写同一个field了 !!!! if self._group == None: # 如果是新建group,就为之赋值一个 group,取值规则是max(现有groups) + 1 try: self._group = max(self.eobject_eform_groups) + 1 except ValueError: self._group = 1 new_values = [] for key, value in self.cleaned_data.items(): if value == None: continue # 非必填项,就跳过 efield = self._key_field_dict[key] if efield.field_type in EField.MULT_CHOICES_FIELD: for item in value: new_values.append( Value(eobject=self._eobject, efield=efield, value=efield.get_db_value(item), group=self._group)) elif efield.field_type == u'SimpleModelChoiceField': content_type_id, object_id = value.split("-")[0], value.split( "-")[1] new_values.append( Value(eobject=self._eobject, efield=efield, content_type=ContentType.objects.get( pk=content_type_id), object_id=object_id, group=self._group)) elif efield.field_type in [u"VideoField", u'FileField']: new_values.append( Value(eobject=self._eobject, efield=efield, vfile=value, group=self._group)) elif efield.field_type == u"ImageField": commonImage = CommonImage.objects.create( image=value) if isinstance(value, UploadedFile) else value new_values.append( Value(eobject=self._eobject, efield=efield, content_type=ContentType.objects.get_for_model( commonImage), object_id=commonImage.id, group=self._group)) else: new_values.append( Value(eobject=self._eobject, efield=efield, value=efield.get_db_value(value), group=self._group)) self.eobject_values.filter( group=self._group, eobject=self._eobject ).delete( ) # 初始值可以是其他的 eobject的, 所以这个删除必须加上 eobject.self._eobject, 否则可能将别人的数值删除 Value.objects.bulk_create(new_values) return self._eobject
env = gym.make(args.env_name) num_inputs = env.observation_space.shape[0] num_actions = env.action_space.shape[0] env.seed(args.seed) torch.manual_seed(args.seed) if args.use_joint_pol_val: ac_net = ActorCritic(num_inputs, num_actions) opt_ac = optim.Adam(ac_net.parameters(), lr=0.0003) else: policy_net = GRU(num_inputs, num_actions) old_policy_net = GRU(num_inputs, num_actions) value_net = Value(num_inputs) opt_policy = optim.Adam(policy_net.parameters(), lr=0.0003) opt_value = optim.Adam(value_net.parameters(), lr=0.0003) def create_batch_inputs(batch_states_list, batch_actions_list, batch_advantages_list, batch_targets_list): lengths = [] for states in batch_states_list: lengths.append(states.size(0)) max_length = max(lengths) batch_states = torch.zeros(len(batch_states_list), max_length, num_inputs) batch_actions = torch.zeros(len(batch_actions_list), max_length, num_actions) batch_advantages = torch.zeros(len(batch_advantages_list), max_length)
self.num_steps = 20 self.max_episode_length = 10000 self.seed = 1 self.env_name = 'Pendulum-v0' if __name__ == '__main__': os.environ['OMP_NUM_THREADS'] = '1' params = Params() torch.manual_seed(params.seed) env = gym.make(params.env_name) num_inputs = env.observation_space.shape[0] num_outputs = env.action_space.shape[0] shared_p = Policy(num_inputs, num_outputs) shared_v = Value(num_inputs) shared_p.share_memory() shared_v.share_memory() optimizer_p = my_optim.SharedAdam(shared_p.parameters(), lr=params.lr) optimizer_v = my_optim.SharedAdam(shared_v.parameters(), lr=params.lr) processes = [] p = mp.Process(target=test, args=(params.num_processes, params, shared_p)) p.start() processes.append(p) for rank in range(0, params.num_processes): p = mp.Process(target=train, args=(rank, params, shared_p, shared_v, optimizer_p, optimizer_v)) p.start() processes.append(p)
def train(rank, params, shared_p, shared_v, optimizer_p, optimizer_v): torch.manual_seed(params.seed + rank) env = gym.make(params.env_name) num_inputs = env.observation_space.shape[0] num_outputs = env.action_space.shape[0] policy = Policy(num_inputs, num_outputs) value = Value(num_inputs) memory = ReplayMemory(1e6) batch_size = 10000 state = env.reset() state = Variable(torch.Tensor(state).unsqueeze(0)) done = True episode_length = 0 while True: episode_length += 1 policy.load_state_dict(shared_p.state_dict()) value.load_state_dict(shared_v.state_dict()) w = -1 while w < batch_size: states = [] actions = [] rewards = [] values = [] returns = [] advantages = [] # Perform K steps for step in range(params.num_steps): w += 1 states.append(state) mu, sigma_sq = policy(state) eps = torch.randn(mu.size()) action = (mu + sigma_sq.sqrt()*Variable(eps)) actions.append(action) v = value(state) values.append(v) env_action = action.data.squeeze().numpy() state, reward, done, _ = env.step(env_action) done = (done or episode_length >= params.max_episode_length) reward = max(min(reward, 1), -1) rewards.append(reward) if done: episode_length = 0 state = env.reset() state = Variable(torch.Tensor(state).unsqueeze(0)) if done: break R = torch.zeros(1, 1) if not done: v = value(state) R = v.data # compute returns and advantages: values.append(Variable(R)) R = Variable(R) for i in reversed(range(len(rewards))): R = params.gamma * R + rewards[i] returns.insert(0, R) A = R - values[i] advantages.insert(0, A) # store usefull info: memory.push([states, actions, returns, advantages]) batch_states, batch_actions, batch_returns, batch_advantages = memory.sample(batch_size) # policy grad updates: mu_old, sigma_sq_old = policy(batch_states) probs_old = normal(batch_actions, mu_old, sigma_sq_old) policy_new = Policy(num_inputs, num_outputs) kl = 0. kl_coef = 1. kl_target = Variable(torch.Tensor([params.kl_target])) for m in range(100): policy_new.load_state_dict(shared_p.state_dict()) mu_new, sigma_sq_new = policy_new(batch_states) probs_new = normal(batch_actions, mu_new, sigma_sq_new) policy_loss = torch.mean(batch_advantages * torch.sum(probs_new/probs_old,1)) kl = torch.mean(probs_old * torch.log(probs_old/probs_new)) kl_loss = kl_coef * kl + \ params.ksi * torch.clamp(kl-2*kl_target, max=0)**2 total_policy_loss = - policy_loss + kl_loss if kl > 4*kl_target: break # assynchronous update: optimizer_p.zero_grad() total_policy_loss.backward() ensure_shared_grads(policy_new, shared_p) optimizer_p.step() # value grad updates: for b in range(100): value.load_state_dict(shared_v.state_dict()) v = value(batch_states) value_loss = torch.mean((batch_returns - v)**2) # assynchronous update: optimizer_v.zero_grad() value_loss.backward() ensure_shared_grads(value, shared_v) optimizer_v.step() if kl > params.beta_hight*kl_target: kl_coef *= params.alpha if kl < params.beta_low*kl_target: kl_coef /= params.alpha print("update done !")
#env.seed(args.seed) torch.manual_seed(args.seed) if args.resume: print("=> loading checkpoint ") checkpoint = torch.load('../models/ss/3.t7') #args.start_epoch = checkpoint['epoch'] #best_prec1 = checkpoint['best_prec1'] ac_net.load_state_dict(checkpoint['state_dict']) opt_ac.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) else: if args.use_sep_pol_val: policy_net = Policy(num_inputs, num_actions) value_net = Value(num_inputs) opt_policy = optim.Adam(policy_net.parameters(), lr=args.lr) opt_value = optim.Adam(value_net.parameters(), lr=args.lr) else: ac_net = ActorCritic(num_inputs, num_actions) opt_ac = optim.Adam(ac_net.parameters(), lr=args.lr) def select_action(state): state = torch.from_numpy(state).unsqueeze(0) action_mean, _, action_std = policy_net(Variable(state)) action = torch.normal(action_mean, action_std) return action def select_action_actor_critic(state):