Beispiel #1
0
def sms_maama_weekly(request, as_pdf=False):

    groups = Group.get_sms_maama_groups()
    contacts = Contact.get_sms_maama_weekly_contacts()
    sms_maama_contacts = Contact.get_sms_maama_contacts()
    sent_messages = Message.get_sms_maama_sent_messages()
    delivered_messages = Message.get_sms_maama_delivered_messages()
    failed_messages = Message.get_sms_maama_failed_messages()
    failed_messages_count = Message.get_sms_maama_failed_messages_count()
    contacts_count = Contact.get_sms_maama_contacts_count()
    weekly_contacts_count = Contact.get_sms_maama_weekly_contacts_count()
    messages_count = Message.get_sms_maama_sent_messages_count()
    read_messages_count = Message.get_sms_maama_read_messages_count()
    hanging_messages_count = Message.get_sms_maama_hanging_messages_count()
    unread_messages = Message.get_sms_maama_unread_messages()
    flow_responses = Message.get_sms_maama_flow_responses()
    flow_responses_count = Message.get_sms_maama_flow_responses_count()
    # responses = Message.get_specific_flow_response()
    baby_responses = Message.get_sms_maama_flow_responses_baby()
    baby_responses_count = Message.get_sms_maama_flow_responses_baby_count()
    stops = Message.get_sms_maama_opted_out()
    stops_count = Message.get_sms_maama_opted_out_count()
    enrollments = Message.get_sms_maama_flow_responses_enrollment()
    flows = Value.sms_maama_contact_flows_values()
    antenatal_responses = Value.sms_maama_contact_flows_antenatal_values()
    start_date = datetime.datetime.now() - datetime.timedelta(days=7)
    end_date = datetime.datetime.now()
    this_day = now()
    target = str(this_day)[:-22]
    payload = {
        'groups': groups,
        'contacts': contacts,
        'sms_maama_contacts': sms_maama_contacts,
        'sent_messages': sent_messages,
        'delivered_messages': delivered_messages,
        'failed_messages': failed_messages,
        'failed_messages_count': failed_messages_count,
        'contacts_count': contacts_count,
        'weekly_contacts_count': weekly_contacts_count,
        'messages_count': messages_count,
        'read_messages_count': read_messages_count,
        'hanging_messages_count': hanging_messages_count,
        'unread_messages': unread_messages,
        'flow_responses': flow_responses,
        'flow_responses_count': flow_responses_count,
        'baby_responses': baby_responses,
        'baby_responses_count': baby_responses_count,
        'stops': stops,
        'stops_count': stops_count,
        'flows': flows,
        'antenatal_responses': antenatal_responses,
        'enrollments': enrollments,
        'start_date': start_date,
        'end_date': end_date,
        'this_day': this_day
    }
    if as_pdf:
        return payload
    return render_to_response('qcreports/sms_maama_weekly_report.html',
                              payload, RequestContext(request))
Beispiel #2
0
    def insert_or_replace(self, session, key, value):
        entry = self.lookup_entry(session, key)

        if entry:
            # Update existing entry
            entry.value = self.lookup_value(session, value) or Value(
                hash=self.hash_value(value), blob=value)
        else:
            # Create new entry.
            entry = Entry(key=key,
                          value=self.lookup_value(session, value)
                          or Value(hash=self.hash_value(value), blob=value))
            session.add(entry)
Beispiel #3
0
def update_or_create(testcase,
                     env,
                     build,
                     metric,
                     value=None,
                     comment=None,
                     color=None):
    """Update testresults/settings if exist, otherwise create new ones.

    :return created     True if created new results, otherwise False
    """

    settings = Settings.objects.get_or_create(testcase=testcase,
                                              metric=metric)[0]

    testresults, created = TestResults.objects.get_or_create(
        build=build,
        testcase=testcase,
        env=env,
        metric=metric,
        tag=gen_tag(build),
        settings=settings)
    testresults.timestamp = time.strftime('%Y-%m-%d %H:%M:%S',
                                          time.localtime())
    if value:
        v = Value(value=value)
        testresults.value_set.add(v)
    if comment:
        testresults.comment = comment
    if color:
        testresults.color = color
    testresults.save()

    return created
Beispiel #4
0
def sensor(request):
    sensor_name = request.GET.get("sensor", "")
    value = request.GET.get("value", "")
    time = request.GET.get("time", "")
    from django.utils.timezone import get_current_timezone
    from django.utils import timezone

    if len(time) == 0:
        time_p = timezone.now()
    else:
        time_p = datetime.datetime.fromtimestamp(int(time), tz=get_current_timezone())

    print time_p

    objs = Sensor.objects.filter(name=sensor_name)
    sensor = None
    if len(objs) > 0:
        sensor = objs[0]
    else:
        sensor = Sensor()
        sensor.name = sensor_name
        sensor.save()

    try:
        latest = Value.objects.filter(sensor=sensor).order_by("-pub_date")[0]
        # print "latest",str(latest)
        if time_p < latest.pub_date:
            print "WARNNING: Sensor time should be greater than last in DB"
        if latest.value == float(value):
            result = "No need to update data for sensor %s" % sensor
            print result
            HttpResponse(result)
    except IndexError:
        pass

    v = Value(sensor=sensor, pub_date=time_p, value=float(value))
    v.save()
    return HttpResponse(str(sensor.name) + ":" + str(value))
Beispiel #5
0
def post_evaluate(models_path, sigma, n_post_episodes=5, add_noise=False):
    # print('----------------Post evaluation----------------')

    policy_path = models_path + "_policy"
    value_path = models_path + "_value"

    if args.use_parameter_noise:
        policy_post = PolicyLayerNorm(num_inputs, num_actions)
        value_post = Value(num_inputs)

    else:
        policy_post = Policy(num_inputs, num_actions)
        value_post = Value(num_inputs)

    # print('------------------')
    value_post.load_state_dict(torch.load(value_path))
    policy_post.load_state_dict(torch.load(policy_path))

    reward_post = 0

    for i in range(n_post_episodes):
        state = env.reset()

        ##seeding
        # env.seed(i)
        # torch.manual_seed(i)

        # state = running_state(state)
        for t in range(1000):

            if args.use_parameter_noise and add_noise:
                action = select_action(policy_post,
                                       state,
                                       sigma,
                                       add_noise=True)

            else:
                action = select_action(policy_post, state)
            action = action.data[0].numpy()

            next_state, reward, done, _ = env.step(action)

            reward_post += reward

            # next_state = running_state(next_state)

            if done:
                break

            # state = running_state(next_state)
            state = next_state

    print('___Post evaluation reward___')
    print(reward_post / n_post_episodes)

    return reward_post / n_post_episodes
Beispiel #6
0
def merge_person_property(dbsession, person, property, value, source):
    """Merge the given ``property`` with ``value`` into the ``person``. Attribute the change to the ``source``.
    ``value`` can be a string or a dictionary with keys "label", "lang", and "value". ``source`` is a dictionary
    with keys "label", "source", and "timestamp".
    """
    if isinstance(value, dict):
        label = value['label'] if 'label' in value else None
        lang = value['lang'] if 'lang' in value else None
        value = value['value']
    else:
        label = None
        lang = None
    db_property = dbsession.query(PersonProperty).join(Value).filter(and_(PersonProperty.person_id == person.id,
                                                                          PersonProperty.name == property,
                                                                          Value.value == value,
                                                                          Value.lang == lang)).first()
    if not db_property:
        db_value = dbsession.query(Value).filter(and_(Value.value == value,
                                                      Value.lang == lang)).first()
        if not db_value:
            db_value = Value(label=label, value=value, lang=lang)
            dbsession.add(db_value)
        db_property = PersonProperty(person=person, name=property, value=db_value, status='unconfirmed')
        dbsession.add(db_property)
    property_source = dbsession.query(PersonPropertySource).join(Source).filter(and_(PersonPropertySource.property == db_property,
                                                                                     Source.url == source['url'])).first()
    if not property_source:
        db_source = dbsession.query(Source).filter(Source.url == source['url']).first()
        if not db_source:
            db_source = Source(label=source['label'], url=source['url'])
            dbsession.add(db_source)
        property_source = PersonPropertySource(property=db_property, source=db_source, timestamp=source['timestamp'])
        dbsession.add(property_source)
    else:
        property_source.timestamp = source['timestamp']
        dbsession.add(property_source)
    dbsession.commit()
    return db_property
Beispiel #7
0
def main(gamma=0.995, env_name='Walker2d-v2', tau=0.97, seed=543, number_of_batches=500,\
        batch_size=5000, maximum_steps=10000, render=False, log_interval=1, entropy_coeff=0.0,\
        clip_epsilon=0.2, use_joint_pol_val=False):

    torch.set_default_tensor_type('torch.DoubleTensor')
    PI = torch.DoubleTensor([3.1415926])

    env = gym.make(env_name)

    num_inputs = env.observation_space.shape[0]
    num_actions = env.action_space.shape[0]

    env.seed(seed)
    torch.manual_seed(seed)

    policy_net = Policy(num_inputs, num_actions)
    value_net = Value(num_inputs)
    opt_policy = optim.Adam(policy_net.parameters(), lr=0.001)
    opt_value = optim.Adam(value_net.parameters(), lr=0.001)

    running_state = ZFilter((num_inputs,), clip=5)
    running_reward = ZFilter((1,), demean=False, clip=10)
    episode_lengths = []
    plot_rew = []
    for i_episode in range(number_of_batches):
        memory = Memory()

        num_steps = 0
        reward_batch = 0
        num_episodes = 0
        while num_steps < batch_size:
            state = env.reset()
            state = running_state(state)

            reward_sum = 0
            for t in range(maximum_steps): # Don't infinite loop while learning
                action = select_action(state, policy_net)
                action = action.data[0].numpy()
                next_state, reward, done, _ = env.step(action)
                reward_sum += reward

                next_state = running_state(next_state)

                mask = 1
                if done:
                    mask = 0

                memory.push(state, np.array([action]), mask, next_state, reward)

                if render:
                    env.render()
                if done:
                    break

                state = next_state
            num_steps += (t-1)
            num_episodes += 1
            reward_batch += reward_sum

        reward_batch /= num_episodes
        batch = memory.sample()
        plot_rew.append(reward_batch)
        update_params(batch, policy_net, value_net, gamma, opt_policy, opt_value)

        if i_episode % args.log_interval == 0:
            print('Episode {}\tLast reward: {}\tAverage reward {:.2f}'.format(
                i_episode, reward_sum, reward_batch))
    
    plot_epi = []
    for i in range (number_of_batches):
        plot_epi.append(i)
    trace = go.Scatter( x = plot_epi, y = plot_rew) 
    layout = go.Layout(title='PPO',xaxis=dict(title='Episodes', titlefont=dict(family='Courier New, monospace',size=18,color='#7f7f7f')),
    yaxis=dict(title='Average Reward', titlefont=dict(family='Courier New, monospace',size=18,color='#7f7f7f')))

    plotly.offline.plot({"data": [trace], "layout": layout},filename='PPO.html',image='jpeg')
Beispiel #8
0
    def __init__(self,
                 args,
                 logger,
                 state_size=2,
                 action_size=4,
                 context_size=1,
                 num_goals=4,
                 history_size=1,
                 dtype=torch.FloatTensor):
        super(InfoGAIL, self).__init__(args,
                                       logger,
                                       state_size=state_size,
                                       action_size=action_size,
                                       context_size=context_size,
                                       num_goals=num_goals,
                                       history_size=history_size,
                                       dtype=dtype)

        # Create networks
        self.policy_net = Policy(state_size=state_size * history_size,
                                 action_size=0,
                                 latent_size=context_size,
                                 output_size=action_size,
                                 hidden_size=64,
                                 output_activation='sigmoid')
        self.old_policy_net = Policy(state_size=state_size * history_size,
                                     action_size=0,
                                     latent_size=context_size,
                                     output_size=action_size,
                                     hidden_size=64,
                                     output_activation='sigmoid')

        # Use value network for calculating GAE. We should use this for
        # training the policy network.
        if args.use_value_net:
            # context_size contains num_goals
            self.value_net = Value(state_size * history_size + context_size,
                                   hidden_size=64)

        # Reward net is the discriminator network. Discriminator does not
        # receive the latent vector in InfoGAIL.
        self.reward_net = Reward(
            state_size * history_size,
            action_size,  # action size
            0,  # latent size
            hidden_size=64)

        self.posterior_net = DiscretePosterior(
            state_size=state_size * history_size,  # state
            action_size=0,  # action
            latent_size=0,  # context
            hidden_size=64,
            output_size=num_goals)

        self.opt_policy = optim.Adam(self.policy_net.parameters(), lr=0.0003)
        self.opt_reward = optim.Adam(self.reward_net.parameters(), lr=0.0003)
        self.opt_value = optim.Adam(self.value_net.parameters(), lr=0.0003)
        self.opt_posterior = optim.Adam(self.posterior_net.parameters(),
                                        lr=0.0003)

        # Create loss functions
        self.criterion = nn.BCELoss()
        self.criterion_posterior = nn.CrossEntropyLoss()

        self.create_environment()
Beispiel #9
0
class InfoGAIL(BaseGAIL):
    def __init__(self,
                 args,
                 logger,
                 state_size=2,
                 action_size=4,
                 context_size=1,
                 num_goals=4,
                 history_size=1,
                 dtype=torch.FloatTensor):
        super(InfoGAIL, self).__init__(args,
                                       logger,
                                       state_size=state_size,
                                       action_size=action_size,
                                       context_size=context_size,
                                       num_goals=num_goals,
                                       history_size=history_size,
                                       dtype=dtype)

        # Create networks
        self.policy_net = Policy(state_size=state_size * history_size,
                                 action_size=0,
                                 latent_size=context_size,
                                 output_size=action_size,
                                 hidden_size=64,
                                 output_activation='sigmoid')
        self.old_policy_net = Policy(state_size=state_size * history_size,
                                     action_size=0,
                                     latent_size=context_size,
                                     output_size=action_size,
                                     hidden_size=64,
                                     output_activation='sigmoid')

        # Use value network for calculating GAE. We should use this for
        # training the policy network.
        if args.use_value_net:
            # context_size contains num_goals
            self.value_net = Value(state_size * history_size + context_size,
                                   hidden_size=64)

        # Reward net is the discriminator network. Discriminator does not
        # receive the latent vector in InfoGAIL.
        self.reward_net = Reward(
            state_size * history_size,
            action_size,  # action size
            0,  # latent size
            hidden_size=64)

        self.posterior_net = DiscretePosterior(
            state_size=state_size * history_size,  # state
            action_size=0,  # action
            latent_size=0,  # context
            hidden_size=64,
            output_size=num_goals)

        self.opt_policy = optim.Adam(self.policy_net.parameters(), lr=0.0003)
        self.opt_reward = optim.Adam(self.reward_net.parameters(), lr=0.0003)
        self.opt_value = optim.Adam(self.value_net.parameters(), lr=0.0003)
        self.opt_posterior = optim.Adam(self.posterior_net.parameters(),
                                        lr=0.0003)

        # Create loss functions
        self.criterion = nn.BCELoss()
        self.criterion_posterior = nn.CrossEntropyLoss()

        self.create_environment()

    def checkpoint_data_to_save(self):
        return {
            'policy': self.policy_net,
            'value': self.value_net,
            'reward': self.reward_net,
            'posterior': self.posterior_net,
        }

    def load_checkpoint_data(self, checkpoint_path):
        assert os.path.exists(checkpoint_path), \
            'Checkpoint path does not exists {}'.format(checkpoint_path)
        checkpoint_data = torch.load(checkpoint_path)
        self.policy_net = checkpoint_data['policy']
        self.value_net = checkpoint_data['value']
        self.reward_net = checkpoint_data['reward']
        self.posterior_net = checkpoint_data['posterior']

    def update_params_for_batch(self, states, actions, latent_c, targets,
                                advantages, expert_states, expert_actions,
                                optim_batch_size, optim_batch_size_exp,
                                optim_iters):
        '''Update parameters for one batch of data.

        Update the policy network, discriminator (reward) network and the
        posterior network here.
        '''
        args, dtype = self.args, self.dtype
        curr_id, curr_id_exp = 0, 0
        for _ in range(optim_iters):
            curr_batch_size = min(optim_batch_size, actions.size(0) - curr_id)
            curr_batch_size_exp = min(optim_batch_size_exp,
                                      expert_actions.size(0) - curr_id_exp)
            start_idx, end_idx = curr_id, curr_id + curr_batch_size

            state_var = Variable(states[start_idx:end_idx])
            action_var = Variable(actions[start_idx:end_idx])
            latent_c_var = Variable(latent_c[start_idx:end_idx])
            advantages_var = Variable(advantages[start_idx:end_idx])

            start_idx, end_idx = curr_id_exp, curr_id_exp + curr_batch_size_exp
            expert_state_var = Variable(expert_states[start_idx:end_idx])
            expert_action_var = Variable(expert_actions[start_idx:end_idx])

            # Update reward net
            self.opt_reward.zero_grad()

            # Backprop with expert demonstrations
            expert_output = self.reward_net(
                torch.cat((expert_state_var, expert_action_var), 1))
            expert_disc_loss = self.criterion(
                expert_output,
                Variable(
                    torch.zeros(expert_action_var.size(0), 1).type(dtype)))
            expert_disc_loss.backward()

            # Backprop with generated demonstrations
            gen_output = self.reward_net(torch.cat((state_var, action_var), 1))
            gen_disc_loss = self.criterion(
                gen_output,
                Variable(torch.ones(action_var.size(0), 1)).type(dtype))
            gen_disc_loss.backward()

            # Add loss scalars.
            self.logger.summary_writer.add_scalars(
                'loss/discriminator', {
                    'total': expert_disc_loss.data[0] + gen_disc_loss.data[0],
                    'expert': expert_disc_loss.data[0],
                    'gen': gen_disc_loss.data[0],
                }, self.gail_step_count)
            self.opt_reward.step()

            reward_l2_norm, reward_grad_l2_norm = \
                    get_weight_norm_for_network(self.reward_net)
            self.logger.summary_writer.add_scalar('weight/discriminator/param',
                                                  reward_l2_norm,
                                                  self.gail_step_count)
            self.logger.summary_writer.add_scalar('weight/discriminator/grad',
                                                  reward_grad_l2_norm,
                                                  self.gail_step_count)

            # Update posterior net. We need to do this by reparameterization
            # trick.
            predicted_posterior = self.posterior_net(state_var)
            # There is no GOAL info in latent_c_var here.
            # TODO: This 0 and -1 stuff is not needed here. Confirm?
            _, true_posterior = torch.max(latent_c_var.data, dim=1)
            posterior_loss = self.criterion_posterior(predicted_posterior,
                                                      Variable(true_posterior))
            posterior_loss.backward()
            self.logger.summary_writer.add_scalar('loss/posterior',
                                                  posterior_loss.data[0],
                                                  self.gail_step_count)

            # compute old and new action probabilities
            action_means, action_log_stds, action_stds = self.policy_net(
                torch.cat((state_var, latent_c_var), 1))
            log_prob_cur = normal_log_density(action_var, action_means,
                                              action_log_stds, action_stds)

            action_means_old, action_log_stds_old, action_stds_old = \
                    self.old_policy_net(torch.cat(
                        (state_var, latent_c_var), 1))
            log_prob_old = normal_log_density(action_var, action_means_old,
                                              action_log_stds_old,
                                              action_stds_old)

            if args.use_value_net:
                # update value net
                self.opt_value.zero_grad()
                value_var = self.value_net(
                    torch.cat((state_var, latent_c_var), 1))
                value_loss = (value_var - \
                        targets[curr_id:curr_id+curr_batch_size]).pow(2.).mean()
                value_loss.backward()
                self.opt_value.step()

            # Update policy net (PPO step)
            self.opt_policy.zero_grad()
            ratio = torch.exp(log_prob_cur - log_prob_old)  # pnew / pold
            surr1 = ratio * advantages_var[:, 0]
            surr2 = torch.clamp(ratio, 1.0 - self.args.clip_epsilon, 1.0 +
                                self.args.clip_epsilon) * advantages_var[:, 0]
            policy_surr = -torch.min(surr1, surr2).mean()
            policy_surr.backward()
            # torch.nn.utils.clip_grad_norm(self.policy_net.parameters(), 40)
            self.opt_policy.step()
            self.logger.summary_writer.add_scalar('loss/policy',
                                                  policy_surr.data[0],
                                                  self.gail_step_count)

            policy_l2_norm, policy_grad_l2_norm = \
                    get_weight_norm_for_network(self.policy_net)
            self.logger.summary_writer.add_scalar('weight/policy/param',
                                                  policy_l2_norm,
                                                  self.gail_step_count)
            self.logger.summary_writer.add_scalar('weight/policy/grad',
                                                  policy_grad_l2_norm,
                                                  self.gail_step_count)

            # set new starting point for batch
            curr_id += curr_batch_size
            curr_id_exp += curr_batch_size_exp

            self.gail_step_count += 1

    def update_params(self, gen_batch, expert_batch, episode_idx, optim_epochs,
                      optim_batch_size):
        '''Update params for Policy (G), Reward (D) and Posterior (q) networks.
        '''
        args, dtype = self.args, self.dtype

        self.opt_policy.lr = self.args.learning_rate \
            * max(1.0 - float(episode_idx)/args.num_epochs, 0)
        clip_epsilon = self.args.clip_epsilon \
            * max(1.0 - float(episode_idx)/args.num_epochs, 0)

        # generated trajectories
        states = torch.Tensor(np.array(gen_batch.state)).type(dtype)
        actions = torch.Tensor(np.array(gen_batch.action)).type(dtype)
        rewards = torch.Tensor(np.array(gen_batch.reward)).type(dtype)
        masks = torch.Tensor(np.array(gen_batch.mask)).type(dtype)

        ## Expand states to include history ##
        # Generated trajectories already have history in them.

        latent_c = torch.Tensor(np.array(gen_batch.c)).type(dtype)
        values = None
        if args.use_value_net:
            values = self.value_net(Variable(torch.cat((states, latent_c), 1)))

        # expert trajectories
        list_of_expert_states, list_of_expert_actions = [], []
        list_of_masks = []
        for i in range(len(expert_batch.state)):
            ## Expand expert states ##
            expanded_states = self.expand_states_numpy(expert_batch.state[i],
                                                       self.history_size)
            list_of_expert_states.append(torch.Tensor(expanded_states))
            list_of_expert_actions.append(torch.Tensor(expert_batch.action[i]))
            list_of_masks.append(torch.Tensor(expert_batch.mask[i]))

        expert_states = torch.cat(list_of_expert_states, 0).type(dtype)
        expert_actions = torch.cat(list_of_expert_actions, 0).type(dtype)
        expert_masks = torch.cat(list_of_masks, 0).type(dtype)

        assert expert_states.size(0) == expert_actions.size(0), \
                "Expert transition size do not match"
        assert expert_states.size(0) == expert_masks.size(0), \
                "Expert transition size do not match"

        # compute advantages
        returns, advantages = get_advantage_for_rewards(rewards,
                                                        masks,
                                                        self.args.gamma,
                                                        values,
                                                        dtype=dtype)
        targets = Variable(returns)
        advantages = (advantages - advantages.mean()) / advantages.std()

        # Backup params after computing probs but before updating new params
        for old_policy_param, policy_param in zip(
                self.old_policy_net.parameters(),
                self.policy_net.parameters()):
            old_policy_param.data.copy_(policy_param.data)

        # update value, reward and policy networks
        optim_iters = self.args.batch_size // optim_batch_size
        optim_batch_size_exp = expert_actions.size(0) // optim_iters

        # Remove extra 1 array shape from actions, since actions were added as
        # 1-hot vector of shape (1, A).
        actions = np.squeeze(actions)
        expert_actions = np.squeeze(expert_actions)

        for _ in range(optim_epochs):
            perm = np.random.permutation(np.arange(actions.size(0)))
            perm_exp = np.random.permutation(np.arange(expert_actions.size(0)))
            if args.cuda:
                perm = torch.cuda.LongTensor(perm)
                perm_exp = torch.cuda.LongTensor(perm_exp)
            else:
                perm, perm_exp = torch.LongTensor(perm), torch.LongTensor(
                    perm_exp)

            self.update_params_for_batch(
                states[perm], actions[perm], latent_c[perm], targets[perm],
                advantages[perm], expert_states[perm_exp],
                expert_actions[perm_exp], optim_batch_size,
                optim_batch_size_exp, optim_iters)

    def train_gail(self, expert):
        '''Train Info-GAIL.'''
        args, dtype = self.args, self.dtype
        results = {
            'average_reward': [],
            'episode_reward': [],
            'true_traj': {},
            'pred_traj': {}
        }
        self.train_step_count, self.gail_step_count = 0, 0

        for ep_idx in range(args.num_epochs):
            memory = Memory()

            num_steps = 0
            reward_batch, true_reward_batch = [], []
            expert_true_reward_batch = []
            true_traj_curr_episode, gen_traj_curr_episode = [], []

            while num_steps < args.batch_size:
                traj_expert = expert.sample(size=1)
                state_expert, action_expert, _, _ = traj_expert

                # Expert state and actions
                state_expert = state_expert[0]
                action_expert = action_expert[0]
                expert_episode_len = len(state_expert)

                # Sample start state or should we just choose the start state
                # from the expert trajectory sampled above.
                # curr_state_obj = self.sample_start_state()
                curr_state_obj = State(state_expert[0], self.obstacles)
                curr_state_feat = self.get_state_features(
                    curr_state_obj, self.args.use_state_features)

                # Add history to state
                if args.history_size > 1:
                    curr_state = -1 * np.ones(
                        (args.history_size * curr_state_feat.shape[0]),
                        dtype=np.float32)
                    curr_state[(args.history_size-1) \
                            * curr_state_feat.shape[0]:] = curr_state_feat
                else:
                    curr_state = curr_state_feat

                # TODO: Make this a separate function. Can be parallelized.
                ep_reward, ep_true_reward, expert_true_reward = 0, 0, 0
                true_traj, gen_traj = [], []
                gen_traj_dict = {
                    'features': [],
                    'actions': [],
                    'c': [],
                    'mask': []
                }
                disc_reward, posterior_reward = 0.0, 0.0
                # Use a hard-coded list for memory to gather experience since we
                # need to mutate it before finally creating a memory object.

                c_sampled = np.zeros((self.num_goals), dtype=np.float32)
                c_sampled[np.random.randint(0, self.num_goals)] = 1.0
                c_sampled_tensor = torch.zeros((1)).type(torch.LongTensor)
                c_sampled_tensor[0] = int(np.argmax(c_sampled))
                if self.args.cuda:
                    c_sampled_tensor = torch.cuda.LongTensor(c_sampled_tensor)

                memory_list = []
                for t in range(expert_episode_len):
                    action = self.select_action(
                        np.concatenate((curr_state, c_sampled)))
                    action_numpy = action.data.cpu().numpy()

                    # Save generated and true trajectories
                    true_traj.append((state_expert[t], action_expert[t]))
                    gen_traj.append((curr_state_obj.coordinates, action_numpy))
                    gen_traj_dict['features'].append(
                        self.get_state_features(curr_state_obj,
                                                self.args.use_state_features))
                    gen_traj_dict['actions'].append(action_numpy)
                    gen_traj_dict['c'].append(c_sampled)

                    action = epsilon_greedy_linear_decay(action_numpy,
                                                         args.num_epochs * 0.5,
                                                         ep_idx,
                                                         self.action_size,
                                                         low=0.05,
                                                         high=0.3)

                    # Get the discriminator reward
                    disc_reward_t = float(
                        self.reward_net(
                            torch.cat((Variable(
                                torch.from_numpy(curr_state).unsqueeze(
                                    0)).type(dtype),
                                       Variable(
                                           torch.from_numpy(
                                               oned_to_onehot(
                                                   action, self.action_size)).
                                           unsqueeze(0)).type(dtype)),
                                      1)).data.cpu().numpy()[0, 0])

                    if args.use_log_rewards and disc_reward_t < 1e-6:
                        disc_reward_t += 1e-6

                    disc_reward_t = -math.log(disc_reward_t) \
                            if args.use_log_rewards else -disc_reward_t
                    disc_reward += disc_reward_t

                    # Predict c given (x_t)
                    predicted_posterior = self.posterior_net(
                        Variable(torch.from_numpy(curr_state).unsqueeze(
                            0)).type(dtype))
                    posterior_reward_t = self.criterion_posterior(
                        predicted_posterior,
                        Variable(c_sampled_tensor)).data.cpu().numpy()[0]

                    posterior_reward += (self.args.lambda_posterior *
                                         posterior_reward_t)

                    # Update Rewards
                    ep_reward += (disc_reward_t + posterior_reward_t)
                    true_goal_state = [
                        int(x) for x in state_expert[-1].tolist()
                    ]
                    if self.args.flag_true_reward == 'grid_reward':
                        ep_true_reward += self.true_reward.reward_at_location(
                            curr_state_obj.coordinates,
                            goals=[true_goal_state])
                        expert_true_reward += self.true_reward.reward_at_location(
                            state_expert[t], goals=[true_goal_state])
                    elif self.args.flag_true_reward == 'action_reward':
                        ep_true_reward += self.true_reward.reward_at_location(
                            np.argmax(action_expert[t]), action)
                        expert_true_reward += self.true_reward.corret_action_reward
                    else:
                        raise ValueError("Incorrect true reward type")

                    # Update next state
                    next_state_obj = self.transition_func(
                        curr_state_obj, Action(action), 0)
                    next_state_feat = self.get_state_features(
                        next_state_obj, self.args.use_state_features)
                    #next_state = running_state(next_state)

                    mask = 0 if t == expert_episode_len - 1 else 1

                    # Push to memory
                    memory_list.append([
                        curr_state,
                        np.array([oned_to_onehot(action,
                                                 self.action_size)]), mask,
                        next_state_feat, disc_reward_t + posterior_reward_t,
                        c_sampled, c_sampled
                    ])

                    if args.render:
                        env.render()

                    if not mask:
                        break

                    curr_state_obj = next_state_obj
                    curr_state_feat = next_state_feat

                    if args.history_size > 1:
                        curr_state[:(args.history_size-1) \
                                * curr_state_feat.shape[0]] = \
                                curr_state[curr_state_feat.shape[0]:]
                        curr_state[(args.history_size-1) \
                                * curr_state_feat.shape[0]:] = curr_state_feat
                    else:
                        curr_state = curr_state_feat



                assert memory_list[-1][2] == 0, \
                        "Mask for final end state is not 0."
                for memory_t in memory_list:
                    memory.push(*memory_t)

                self.logger.summary_writer.add_scalars(
                    'gen_traj/gen_reward', {
                        'discriminator': disc_reward,
                        'posterior': posterior_reward,
                    }, self.train_step_count)

                num_steps += (t - 1)
                reward_batch.append(ep_reward)
                true_reward_batch.append(ep_true_reward)
                expert_true_reward_batch.append(expert_true_reward)
                results['episode_reward'].append(ep_reward)

                # Append trajectories
                true_traj_curr_episode.append(true_traj)
                gen_traj_curr_episode.append(gen_traj)

            results['average_reward'].append(np.mean(reward_batch))

            # Add to tensorboard
            self.logger.summary_writer.add_scalars(
                'gen_traj/reward', {
                    'average': np.mean(reward_batch),
                    'max': np.max(reward_batch),
                    'min': np.min(reward_batch)
                }, self.train_step_count)
            self.logger.summary_writer.add_scalars(
                'gen_traj/true_reward', {
                    'average': np.mean(true_reward_batch),
                    'max': np.max(true_reward_batch),
                    'min': np.min(true_reward_batch),
                    'expert_true': np.mean(expert_true_reward_batch)
                }, self.train_step_count)

            # Add predicted and generated trajectories to results
            if ep_idx % self.args.save_interval == 0:
                results['true_traj'][ep_idx] = copy.deepcopy(
                    true_traj_curr_episode)
                results['pred_traj'][ep_idx] = copy.deepcopy(
                    gen_traj_curr_episode)

            # Update parameters
            gen_batch = memory.sample()

            # We do not get the context variable from expert trajectories.
            # Hence we need to fill it in later.
            expert_batch = expert.sample(size=args.num_expert_trajs)

            self.update_params(gen_batch, expert_batch, ep_idx,
                               args.optim_epochs, args.optim_batch_size)

            self.train_step_count += 1

            if ep_idx > 0 and ep_idx % args.log_interval == 0:
                print('Episode [{}/{}]  Avg R: {:.2f}   Max R: {:.2f} \t' \
                      'True Avg {:.2f}   True Max R: {:.2f}   ' \
                      'Expert (Avg): {:.2f}'.format(
                          ep_idx, args.num_epochs, np.mean(reward_batch),
                          np.max(reward_batch), np.mean(true_reward_batch),
                          np.max(true_reward_batch),
                          np.mean(expert_true_reward_batch)))

            results_path = os.path.join(args.results_dir, 'results.pkl')
            with open(results_path, 'wb') as results_f:
                pickle.dump((results), results_f, protocol=2)
                # print("Did save results to {}".format(results_path))

            if ep_idx % args.save_interval == 0:
                checkpoint_filepath = self.model_checkpoint_filepath(ep_idx)
                torch.save(self.checkpoint_data_to_save(), checkpoint_filepath)
                print("Did save checkpoint: {}".format(checkpoint_filepath))
Beispiel #10
0
parser.add_argument('--max-steps', type=int, default=1000000)
parser.add_argument('--log-dir', type=str)
args = parser.parse_args()

env = bench.Monitor(gym.make(args.env_name),
                    os.path.join(args.log_dir, '0'),
                    allow_early_resets=False)

num_inputs = env.observation_space.shape[0]
num_actions = env.action_space.shape[0]

env.seed(args.seed)
torch.manual_seed(args.seed)

policy_net = Policy(num_inputs, num_actions)
value_net = Value(num_inputs)


def select_action(state):
    state = torch.from_numpy(state).unsqueeze(0)
    action_mean, _, action_std = policy_net(Variable(state))
    action = torch.normal(action_mean, action_std)
    return action


def update_params(batch):
    rewards = torch.Tensor(batch.reward)
    masks = torch.Tensor(batch.mask)
    actions = torch.Tensor(np.concatenate(batch.action, 0))
    states = torch.Tensor(batch.state)
    values = value_net(Variable(states))
Beispiel #11
0
def train(args):

    # Initialize data type
    dtype = torch.float32
    torch.set_default_dtype(dtype)
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # Initialize environment
    env = gym.make(args.env_id)
    envname = env.spec.id
    obs_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0]

    # Initialize random seeds
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    env.seed(args.seed)

    # Initialize neural nets
    policy = GaussianPolicy(obs_dim, act_dim, args.hidden_size, args.activation, args.logstd)
    value_net = Value(obs_dim, args.hidden_size, args.activation)
    cvalue_net = Value(obs_dim, args.hidden_size, args.activation)
    policy.to(device)
    value_net.to(device)
    cvalue_net.to(device)

    # Initialize optimizer
    pi_optimizer = torch.optim.Adam(policy.parameters(), args.pi_lr)
    vf_optimizer = torch.optim.Adam(value_net.parameters(), args.vf_lr)
    cvf_optimizer = torch.optim.Adam(cvalue_net.parameters(), args.cvf_lr)

    # Initialize learning rate scheduler
    lr_lambda = lambda it: max(1.0 - it / args.max_iter_num, 0)
    pi_scheduler = torch.optim.lr_scheduler.LambdaLR(pi_optimizer, lr_lambda=lr_lambda)
    vf_scheduler = torch.optim.lr_scheduler.LambdaLR(vf_optimizer, lr_lambda=lr_lambda)
    cvf_scheduler = torch.optim.lr_scheduler.LambdaLR(cvf_optimizer, lr_lambda=lr_lambda)

    # Store hyperparameters for log
    hyperparams = vars(args)

    # Initialize RunningStat for state normalization, score queue, logger
    running_stat = RunningStats(clip=5)
    score_queue = deque(maxlen=100)
    cscore_queue = deque(maxlen=100)
    logger = Logger(hyperparams)

    # Get constraint bounds
    cost_lim = get_threshold(envname, constraint=args.constraint)

    # Initialize and train FOCOPS agent
    agent = FOCOPS(env, policy, value_net, cvalue_net,
                   pi_optimizer, vf_optimizer, cvf_optimizer,
                   args.num_epochs, args.mb_size,
                   args.c_gamma, args.lam, args.delta, args.eta,
                   args.nu, args.nu_lr, args.nu_max, cost_lim,
                   args.l2_reg, score_queue, cscore_queue, logger)

    start_time = time.time()

    for iter in range(args.max_iter_num):

        # Update iteration for model
        agent.logger.save_model('iter', iter)

        # Collect trajectories
        data_generator = DataGenerator(obs_dim, act_dim, args.batch_size, args.max_eps_len)
        rollout = data_generator.run_traj(env, agent.policy, agent.value_net, agent.cvalue_net,
                                          running_stat, agent.score_queue, agent.cscore_queue,
                                          args.gamma, args.c_gamma, args.gae_lam, args.c_gae_lam,
                                          dtype, device, args.constraint)

        # Update FOCOPS parameters
        agent.update_params(rollout, dtype, device)

        # Update learning rates
        pi_scheduler.step()
        vf_scheduler.step()
        cvf_scheduler.step()

        # Update time and running stat
        agent.logger.update('time', time.time() - start_time)
        agent.logger.update('running_stat', running_stat)

        # Save and print values
        agent.logger.dump()
Beispiel #12
0
print(args.use_parameter_noise)
env = gym.make(args.env_name)

num_inputs = env.observation_space.shape[0]
num_actions = env.action_space.shape[0]

env.seed(args.seed)
torch.manual_seed(args.seed)

if args.use_joint_pol_val:
    ac_net = ActorCritic(num_inputs, num_actions)
    opt_ac = optim.Adam(ac_net.parameters(), lr=0.001)
#Here if we are using parameter noise we should use modified policy network
elif args.use_parameter_noise:
    policy_net = PolicyLayerNorm(num_inputs, num_actions)
    value_net = Value(num_inputs)
    opt_policy = optim.Adam(policy_net.parameters(), lr=0.001)
    opt_value = optim.Adam(value_net.parameters(), lr=0.001)
else:
    policy_net = Policy(num_inputs, num_actions)
    value_net = Value(num_inputs)
    opt_policy = optim.Adam(policy_net.parameters(), lr=0.001)
    opt_value = optim.Adam(value_net.parameters(), lr=0.001)

# def select_action(state,sigma):
#     state = torch.from_numpy(state).unsqueeze(0)
#     if args.use_parameter_noise:
#         action_mean, _, action_std = policy_net(Variable(state),sigma,param_noise=True)
#     else:
#         action_mean, _, action_std = policy_net(Variable(state))
#     action = torch.normal(action_mean, action_std)
Beispiel #13
0
def sms_maama_report():
    doc = SimpleDocTemplate(
        "qc/static/qc/reports/sms_maama_weekly_report_{end_date}.pdf",
        pagesize=letter,
        rightMargin=72,
        leftMargin=72,
        topMargin=72,
        bottomMargin=18)
    report = []
    logo = "qc/static/images/logo.jpg"
    logo2 = "qc/static/images/sms_maama_logo.jpg"
    project_name = "SMS MAAMA Project"
    report_title = "SMS Maama Weekly Report"
    prepared_by = "Faith Nassiwa"
    groups = Group.get_sms_maama_groups()
    contacts = Contact.get_sms_maama_weekly_contacts()
    sms_maama_contacts = Contact.get_sms_maama_contacts()
    sent_messages = Message.get_sms_maama_sent_messages()
    delivered_messages = Message.get_sms_maama_delivered_messages()
    failed_messages = Message.get_sms_maama_failed_messages()
    failed_messages_count = Message.get_sms_maama_failed_messages_count()
    contacts_count = Contact.get_sms_maama_contacts_count()
    weekly_contacts_count = Contact.get_sms_maama_weekly_contacts_count()
    messages_count = Message.get_sms_maama_sent_messages_count()
    read_messages_count = Message.get_sms_maama_read_messages_count()
    hanging_messages_count = Message.get_sms_maama_hanging_messages_count()
    hanging_messages = Message.get_sms_maama_hanging_messages()
    flow_responses_weekly = Message.get_sms_maama_weekly_flow_responses()
    flow_responses_count = Message.get_sms_maama_flow_responses_count()
    baby_responses = Message.get_sms_maama_flow_responses_baby()
    baby_responses_count = Message.get_sms_maama_flow_responses_baby_count()
    stops = Message.get_sms_maama_opted_out()
    stops_count = Message.get_sms_maama_opted_out_count()
    screening_responses = Value.sms_maama_contact_flows_screening_values()
    antenatal_responses = Value.sms_maama_contact_flows_antenatal_values()
    enrollments = Message.get_sms_maama_flow_responses_enrollment()
    concerning = Message.get_concerning_messages()
    start_date = datetime.date.today() - datetime.timedelta(days=7)
    end_date = datetime.date.today() - datetime.timedelta(days=1)
    this_day = datetime.datetime.now(
        pytz.timezone('Africa/Kampala')).strftime('%Y-%m-%d %H:%M %Z')

    im = Image(logo, 2 * inch, 1 * inch)
    im2 = Image(logo2, 2 * inch, 1 * inch)
    tabele_data = [[im, im2]]
    t = Table(tabele_data)
    report.append(t)
    report.append(Spacer(1, 12))
    styles = getSampleStyleSheet()
    styles.add(ParagraphStyle(name='Left', alignment=TA_LEFT))
    ptext = '<font size=14><b>%s</b></font>' % report_title
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))
    ptext = '<font size=12>Date: %s</font>' % this_day
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))
    ptext = '<font size=12> Report Date: %s - %s</font>' % (start_date,
                                                            end_date)
    report.append(Paragraph(ptext, styles["Normal"]))
    ptext = '<font size=12> Prepared By: %s</font>' % prepared_by
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))

    styles.add(ParagraphStyle(name='Center', alignment=TA_CENTER))
    ptext = '<font size=12> <b>All SMS Maama Contacts.</b></font>'
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))
    all_sms_maama_contact_titles = [
        'Phone Number', 'Name', 'Points', 'Enrolled On', 'Week Enrolled'
    ]
    data = [all_sms_maama_contact_titles]
    colwidths = (100, 120, 40, 120, 80)
    for i, contact in enumerate(sms_maama_contacts):
        data.append([
            contact.urns, contact.name, contact.points,
            contact.sms_maama_enrollment_date, contact.number_of_weeks
        ])

    t = Table(data,
              colwidths,
              style=[
                  ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black),
                  ('BOX', (0, 0), (-1, -1), 0.5, colors.black),
                  ('VALIGN', (0, 0), (-1, 0), 'MIDDLE'),
                  ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
                  ('BACKGROUND', (0, 0), (-1, 0), colors.gray),
              ])

    report.append(t)
    report.append(Spacer(1, 12))

    report.append(Spacer(1, 12))
    ptext = '<font size=12> <b>SMS Maama Week of Pregnancy Upon Enrollment Status</b></font>'
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))
    groups_titles = ['SMS Maama Week', 'Number of Participants']
    data = [groups_titles]
    colwidths = (230, 230)
    for i, group in enumerate(groups):
        data.append([group.name, group.count])
    t = Table(data,
              colwidths,
              style=[
                  ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black),
                  ('BOX', (0, 0), (-1, -1), 0.5, colors.black),
                  ('VALIGN', (0, 0), (-1, 0), 'MIDDLE'),
                  ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
                  ('BACKGROUND', (0, 0), (-1, 0), colors.gray),
              ])
    report.append(t)
    ptext = '<font size=12> <center>Total Participants: %s</center></font>' % contacts_count
    report.append(Paragraph(ptext, styles["Normal"]))

    report.append(Spacer(1, 12))
    report.append(Spacer(1, 12))
    ptext = '<font size=12> <b> Weekly Enrolled Contacts</b></font>'
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))
    contacts_titles = ['Phone Number', 'Created On', 'Enrolled On', 'Language']
    data = [contacts_titles]
    colwidths = (100, 120, 120, 100)
    for i, weekly_contact in enumerate(contacts):
        data.append([
            weekly_contact.urns,
            localtime(weekly_contact.created_on).strftime("%Y-%m-%d %H:%M"),
            weekly_contact.sms_maama_enrollment_date, weekly_contact.language
        ])
    t = Table(data,
              colwidths,
              style=[
                  ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black),
                  ('BOX', (0, 0), (-1, -1), 0.5, colors.black),
                  ('VALIGN', (0, 0), (-1, 0), 'MIDDLE'),
                  ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
                  ('BACKGROUND', (0, 0), (-1, 0), colors.gray),
              ])
    report.append(t)
    ptext = '<font size=12> <center>Total Weekly Participants: %s</center></font>' % weekly_contacts_count
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))

    report.append(Spacer(1, 12))
    ptext = '<font size=12> <b>Weekly Message Count Summary</b></font>'
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))
    ptext = '<font size=12> <center>Total Messages Sent: %s</center></font>' % messages_count
    report.append(Paragraph(ptext, styles["Normal"]))
    ptext = '<font size=12> <center>Total Messages Delivered: %s</center></font>' % read_messages_count
    report.append(Paragraph(ptext, styles["Normal"]))
    ptext = '<font size=12> <center>Total Messages Hanging(No delivery receipt): %s</center></font>' \
            % hanging_messages_count
    report.append(Paragraph(ptext, styles["Normal"]))
    ptext = '<font size=12> <center>Total Failed to Send Messages: %s</center></font>' % failed_messages_count
    report.append(Paragraph(ptext, styles["Normal"]))
    ptext = '<font size=12> <center>Total Weekly Responses: %s</center></font>' % flow_responses_count
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))

    report.append(Spacer(1, 12))
    ptext = '<font size=12> <b> Weekly Baby, Post-Partum Initiations </b></font>'
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))
    baby_responses_titles = ['Phone Number', 'Message', 'Status', 'Sent On']
    data = [baby_responses_titles]
    colwidths = (100, 130, 100, 130)
    for i, baby_response in enumerate(baby_responses):
        data.append([
            baby_response.urn,
            Paragraph(baby_response.text, styles["BodyText"]),
            baby_response.status,
            localtime(baby_response.sent_on).strftime('%Y-%m-%d %H:%M')
        ])
    t = Table(data,
              colwidths,
              style=[
                  ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black),
                  ('BOX', (0, 0), (-1, -1), 0.5, colors.black),
                  ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                  ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
                  ('BACKGROUND', (0, 0), (-1, 0), colors.gray),
              ])
    report.append(t)
    ptext = '<font size=12> <center>Total Weekly Baby Responses: %s</center></font>' % baby_responses_count
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))

    report.append(Spacer(1, 12))
    ptext = '<font size=12> <b> Weekly Terminations </b></font>'
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))
    stops_titles = ['Phone Number', 'Message', 'Status', 'Sent On']
    data = [stops_titles]
    colwidths = (100, 130, 100, 130)
    for i, stop in enumerate(stops):
        data.append([
            stop.urn,
            Paragraph(stop.text, styles["BodyText"]), stop.status,
            localtime(stop.sent_on).strftime('%Y-%m-%d %H:%M')
        ])
    t = Table(data,
              colwidths,
              style=[
                  ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black),
                  ('BOX', (0, 0), (-1, -1), 0.5, colors.black),
                  ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                  ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
                  ('BACKGROUND', (0, 0), (-1, 0), colors.gray),
              ])
    report.append(t)
    ptext = '<font size=12> <center>Total Weekly Terminations: %s</center></font>' % stops_count
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))

    report.append(Spacer(1, 12))
    ptext = '<font size=12><b>Responses to Screening Questions</b></font>'
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))
    flow_responsess_titles = [
        'Phone Number', 'Screening', 'Question Sent On', 'Response',
        'Response Sent On'
    ]
    data = [flow_responsess_titles]
    colwidths = (100, 100, 100, 60, 100)
    for screening_response in screening_responses:
        data.append([
            screening_response.run.contact.urns,
            Paragraph(screening_response.run.flow.name, styles["BodyText"]),
            localtime(
                screening_response.run.created_on).strftime('%Y-%m-%d %H:%M'),
            Paragraph(screening_response.value, styles["BodyText"]),
            localtime(screening_response.time).strftime('%Y-%m-%d %H:%M')
        ])
    t = Table(data,
              colwidths,
              style=[
                  ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black),
                  ('BOX', (0, 0), (-1, -1), 0.5, colors.black),
                  ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                  ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
                  ('BACKGROUND', (0, 0), (-1, 0), colors.gray),
              ])
    report.append(t)

    report.append(Spacer(1, 12))
    report.append(Spacer(1, 12))
    ptext = '<font size=12><b>Responses to Antenatal Reminders</b></font>'
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))
    antenatal_responses_titles = [
        'Phone Number', 'Appointment Reminder', 'Reminder Sent On', 'Response',
        'Response Sent On'
    ]
    data = [antenatal_responses_titles]
    colwidths = (85, 130, 95, 55, 95)
    if antenatal_responses.count() >= 1:
        for antenatal_response in antenatal_responses:
            data.append([
                antenatal_response.run.contact.urns,
                Paragraph(antenatal_response.run.flow.name,
                          styles["BodyText"]),
                localtime(antenatal_response.run.created_on).strftime(
                    '%Y-%m-%d %H:%M'),
                Paragraph(antenatal_response.value, styles["BodyText"]),
                localtime(antenatal_response.time).strftime('%Y-%m-%d %H:%M')
            ])
        t = Table(data,
                  colwidths,
                  style=[
                      ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black),
                      ('BOX', (0, 0), (-1, -1), 0.5, colors.black),
                      ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                      ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
                      ('BACKGROUND', (0, 0), (-1, 0), colors.gray),
                  ])
        report.append(t)
    else:
        ptext = '<font size=12>No responses to Antenatal Reminders yet. </font>'
        report.append(Paragraph(ptext, styles["Normal"]))
        report.append(Spacer(1, 12))

    report.append(Spacer(1, 12))
    report.append(Spacer(1, 12))
    ptext = '<font size=12><b>TMCG Call Interactions</b></font>'
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))
    ptext = '<font size=12>No TMCG voice call interactions yet. </font>'
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))

    report.append(Spacer(1, 12))
    ptext = '<font size=12> <b> Weekly Responses </b></font>'
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))
    flow_responses_titles = ['Phone Number', 'Message', 'Status', 'Sent On']
    data = [flow_responses_titles]
    colwidths = (100, 130, 100, 130)
    for i, flow_response in enumerate(flow_responses_weekly):
        data.append([
            flow_response.urn,
            Paragraph(flow_response.text, styles["BodyText"]),
            flow_response.status,
            localtime(flow_response.sent_on).strftime('%Y-%m-%d %H:%M')
        ])
    t = Table(data,
              colwidths,
              style=[
                  ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black),
                  ('BOX', (0, 0), (-1, -1), 0.5, colors.black),
                  ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                  ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
                  ('BACKGROUND', (0, 0), (-1, 0), colors.gray),
              ])
    report.append(t)
    report.append(Spacer(1, 12))

    ptext = '<font size=12> <b> Weekly failed to send messages </b></font>'
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))
    failed_messages_titles = ['Phone Number', 'Message', 'Status', 'Sent On']
    data = [failed_messages_titles]
    colwidths = (100, 160, 100, 100)
    for i, failed_message in enumerate(failed_messages):
        data.append([
            failed_message.urn,
            Paragraph(failed_message.text, styles["BodyText"]),
            failed_message.status,
            localtime(failed_message.sent_on).strftime('%Y-%m-%d %H:%M')
        ])
    t = Table(data,
              colwidths,
              style=[
                  ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black),
                  ('BOX', (0, 0), (-1, -1), 0.5, colors.black),
                  ('VALIGN', (0, 0), (-1, -1), 'TOP'),
                  ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
                  ('BACKGROUND', (0, 0), (-1, 0), colors.gray),
              ])
    report.append(t)

    report.append(Spacer(1, 12))

    report.append(Spacer(1, 12))
    ptext = '<font size=12> <b> Weekly hanging messages </b></font>'
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))
    read_messages_titles = ['Phone Number', 'Message', 'Status', 'Sent On']
    data = [read_messages_titles]
    colwidths = (100, 160, 100, 100)
    for i, message in enumerate(hanging_messages):
        data.append([
            message.urn,
            Paragraph(message.text, styles["BodyText"]), message.status,
            localtime(message.sent_on).strftime("%Y-%m-%d %H:%M")
        ])
    t = Table(data,
              colwidths,
              style=[
                  ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black),
                  ('BOX', (0, 0), (-1, -1), 0.5, colors.black),
                  ('VALIGN', (0, 0), (-1, 0), 'MIDDLE'),
                  ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
                  ('BACKGROUND', (0, 0), (-1, 0), colors.gray),
              ])
    report.append(t)

    report.append(Spacer(1, 12))

    report.append(Spacer(1, 12))
    ptext = '<font size=12> <b> Weekly read/delivered messages </b></font>'
    report.append(Paragraph(ptext, styles["Normal"]))
    report.append(Spacer(1, 12))
    read_messages_titles = ['Phone Number', 'Message', 'Status', 'Sent On']
    data = [read_messages_titles]
    colwidths = (100, 160, 100, 100)
    for i, delivered_message in enumerate(delivered_messages):
        data.append([
            delivered_message.urn,
            Paragraph(delivered_message.text, styles["BodyText"]),
            delivered_message.status,
            localtime(delivered_message.sent_on).strftime("%Y-%m-%d %H:%M")
        ])
    t = Table(data,
              colwidths,
              style=[
                  ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black),
                  ('BOX', (0, 0), (-1, -1), 0.5, colors.black),
                  ('VALIGN', (0, 0), (-1, 0), 'MIDDLE'),
                  ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
                  ('BACKGROUND', (0, 0), (-1, 0), colors.gray),
              ])
    report.append(t)
    report.append(Spacer(1, 12))

    doc.build(report)
Beispiel #14
0
    def save(self, *args, **kwargs):
        # 如果EObject还不是数据库里存储的, 那么就先存储一下
        if self._eobject.pk == None:
            self._eobject.save()

        #特别需要注意的!!! 填写时,同一个项目在一个页面填写过一个field, 就不要让他在另外一个页面填写同一个field了 !!!!
        if self._group == None:  # 如果是新建group,就为之赋值一个 group,取值规则是max(现有groups) + 1
            try:
                self._group = max(self.eobject_eform_groups) + 1
            except ValueError:
                self._group = 1

        new_values = []
        for key, value in self.cleaned_data.items():
            if value == None: continue  # 非必填项,就跳过
            efield = self._key_field_dict[key]
            if efield.field_type in EField.MULT_CHOICES_FIELD:
                for item in value:
                    new_values.append(
                        Value(eobject=self._eobject,
                              efield=efield,
                              value=efield.get_db_value(item),
                              group=self._group))
            elif efield.field_type == u'SimpleModelChoiceField':
                content_type_id, object_id = value.split("-")[0], value.split(
                    "-")[1]
                new_values.append(
                    Value(eobject=self._eobject,
                          efield=efield,
                          content_type=ContentType.objects.get(
                              pk=content_type_id),
                          object_id=object_id,
                          group=self._group))

            elif efield.field_type in [u"VideoField", u'FileField']:
                new_values.append(
                    Value(eobject=self._eobject,
                          efield=efield,
                          vfile=value,
                          group=self._group))
            elif efield.field_type == u"ImageField":
                commonImage = CommonImage.objects.create(
                    image=value) if isinstance(value, UploadedFile) else value
                new_values.append(
                    Value(eobject=self._eobject,
                          efield=efield,
                          content_type=ContentType.objects.get_for_model(
                              commonImage),
                          object_id=commonImage.id,
                          group=self._group))
            else:
                new_values.append(
                    Value(eobject=self._eobject,
                          efield=efield,
                          value=efield.get_db_value(value),
                          group=self._group))

        self.eobject_values.filter(
            group=self._group, eobject=self._eobject
        ).delete(
        )  # 初始值可以是其他的 eobject的, 所以这个删除必须加上  eobject.self._eobject, 否则可能将别人的数值删除
        Value.objects.bulk_create(new_values)
        return self._eobject
Beispiel #15
0
env = gym.make(args.env_name)

num_inputs = env.observation_space.shape[0]
num_actions = env.action_space.shape[0]

env.seed(args.seed)
torch.manual_seed(args.seed)

if args.use_joint_pol_val:
    ac_net = ActorCritic(num_inputs, num_actions)
    opt_ac = optim.Adam(ac_net.parameters(), lr=0.0003)
else:
    policy_net = GRU(num_inputs, num_actions)
    old_policy_net = GRU(num_inputs, num_actions)
    value_net = Value(num_inputs)
    opt_policy = optim.Adam(policy_net.parameters(), lr=0.0003)
    opt_value = optim.Adam(value_net.parameters(), lr=0.0003)


def create_batch_inputs(batch_states_list, batch_actions_list,
                        batch_advantages_list, batch_targets_list):
    lengths = []
    for states in batch_states_list:
        lengths.append(states.size(0))

    max_length = max(lengths)
    batch_states = torch.zeros(len(batch_states_list), max_length, num_inputs)
    batch_actions = torch.zeros(len(batch_actions_list), max_length,
                                num_actions)
    batch_advantages = torch.zeros(len(batch_advantages_list), max_length)
Beispiel #16
0
        self.num_steps = 20
        self.max_episode_length = 10000
        self.seed = 1
        self.env_name = 'Pendulum-v0'


if __name__ == '__main__':
    os.environ['OMP_NUM_THREADS'] = '1'
    params = Params()
    torch.manual_seed(params.seed)
    env = gym.make(params.env_name)
    num_inputs = env.observation_space.shape[0]
    num_outputs = env.action_space.shape[0]

    shared_p = Policy(num_inputs, num_outputs)
    shared_v = Value(num_inputs)
    shared_p.share_memory()
    shared_v.share_memory()
    optimizer_p = my_optim.SharedAdam(shared_p.parameters(), lr=params.lr)
    optimizer_v = my_optim.SharedAdam(shared_v.parameters(), lr=params.lr)

    processes = []
    p = mp.Process(target=test, args=(params.num_processes, params, shared_p))
    p.start()
    processes.append(p)
    for rank in range(0, params.num_processes):
        p = mp.Process(target=train,
                       args=(rank, params, shared_p, shared_v, optimizer_p,
                             optimizer_v))
        p.start()
        processes.append(p)
Beispiel #17
0
def train(rank, params, shared_p, shared_v, optimizer_p, optimizer_v):
    torch.manual_seed(params.seed + rank)
    env = gym.make(params.env_name)
    num_inputs = env.observation_space.shape[0]
    num_outputs = env.action_space.shape[0]
    policy = Policy(num_inputs, num_outputs)
    value = Value(num_inputs)

    memory = ReplayMemory(1e6)
    batch_size = 10000

    state = env.reset()
    state = Variable(torch.Tensor(state).unsqueeze(0))
    done = True

    episode_length = 0
    while True:
        episode_length += 1
        policy.load_state_dict(shared_p.state_dict())
        value.load_state_dict(shared_v.state_dict())

        w = -1
        while w < batch_size:
            states = []
            actions = []
            rewards = []
            values = []
            returns = []
            advantages = []

            # Perform K steps
            for step in range(params.num_steps):
                w += 1
                states.append(state)

                mu, sigma_sq = policy(state)
                eps = torch.randn(mu.size())
                action = (mu + sigma_sq.sqrt()*Variable(eps))
                actions.append(action)

                v = value(state)
                values.append(v)

                env_action = action.data.squeeze().numpy()
                state, reward, done, _ = env.step(env_action)
                done = (done or episode_length >= params.max_episode_length)
                reward = max(min(reward, 1), -1)
                rewards.append(reward)

                if done:
                    episode_length = 0
                    state = env.reset()

                state = Variable(torch.Tensor(state).unsqueeze(0))

                if done:
                    break

            R = torch.zeros(1, 1)
            if not done:
                v = value(state)
                R = v.data

            # compute returns and advantages:
            values.append(Variable(R))
            R = Variable(R)
            for i in reversed(range(len(rewards))):
                R = params.gamma * R + rewards[i]
                returns.insert(0, R)
                A = R - values[i]
                advantages.insert(0, A)

            # store usefull info:
            memory.push([states, actions, returns, advantages])

        batch_states, batch_actions, batch_returns, batch_advantages = memory.sample(batch_size)

        # policy grad updates:
        mu_old, sigma_sq_old = policy(batch_states)
        probs_old = normal(batch_actions, mu_old, sigma_sq_old)
        policy_new = Policy(num_inputs, num_outputs)
        kl = 0.
        kl_coef = 1.
        kl_target = Variable(torch.Tensor([params.kl_target]))
        for m in range(100):
            policy_new.load_state_dict(shared_p.state_dict())
            mu_new, sigma_sq_new = policy_new(batch_states)
            probs_new = normal(batch_actions, mu_new, sigma_sq_new)
            policy_loss = torch.mean(batch_advantages * torch.sum(probs_new/probs_old,1))
            kl = torch.mean(probs_old * torch.log(probs_old/probs_new))
            kl_loss = kl_coef * kl + \
                params.ksi * torch.clamp(kl-2*kl_target, max=0)**2
            total_policy_loss = - policy_loss + kl_loss
            if kl > 4*kl_target:
                break
            # assynchronous update:
            optimizer_p.zero_grad()
            total_policy_loss.backward()
            ensure_shared_grads(policy_new, shared_p)
            optimizer_p.step()

        # value grad updates:
        for b in range(100):
            value.load_state_dict(shared_v.state_dict())
            v = value(batch_states)
            value_loss = torch.mean((batch_returns - v)**2)
            # assynchronous update:
            optimizer_v.zero_grad()
            value_loss.backward()
            ensure_shared_grads(value, shared_v)
            optimizer_v.step()

        if kl > params.beta_hight*kl_target:
            kl_coef *= params.alpha
        if kl < params.beta_low*kl_target:
            kl_coef /= params.alpha

        print("update done !")
Beispiel #18
0
#env.seed(args.seed)
torch.manual_seed(args.seed)

if args.resume:
    print("=> loading checkpoint ")
    checkpoint = torch.load('../models/ss/3.t7')
    #args.start_epoch = checkpoint['epoch']
    #best_prec1 = checkpoint['best_prec1']
    ac_net.load_state_dict(checkpoint['state_dict'])
    opt_ac.load_state_dict(checkpoint['optimizer'])
    print("=> loaded checkpoint  (epoch {})".format(checkpoint['epoch']))
else:
    if args.use_sep_pol_val:
        policy_net = Policy(num_inputs, num_actions)
        value_net = Value(num_inputs)
        opt_policy = optim.Adam(policy_net.parameters(), lr=args.lr)
        opt_value = optim.Adam(value_net.parameters(), lr=args.lr)
    else:
        ac_net = ActorCritic(num_inputs, num_actions)
        opt_ac = optim.Adam(ac_net.parameters(), lr=args.lr)


def select_action(state):
    state = torch.from_numpy(state).unsqueeze(0)
    action_mean, _, action_std = policy_net(Variable(state))
    action = torch.normal(action_mean, action_std)
    return action


def select_action_actor_critic(state):