def update_risk_risk_update(cls, human, update_message, rng):
     # TODO: update a random message when there are ties in the scoring
     scores = cls.score_matches(human, update_message)
     m_enc = max(scores.items(), key=operator.itemgetter(1))[0]
     assignment = human.M[m_enc]
     uid, risk, day, unobs_id = _decode_message(m_enc)
     updated_message = human.Message(uid, update_message.new_risk, day,
                                     unobs_id)
     del human.M[m_enc]
     human.M[_encode_message(updated_message)] = assignment
def messages_to_np(human):
    ms_enc = np.zeros((len(human.M), 3))
    idx = 0
    for m_enc, assignment in human.M.items():
        obs_uid, risk, day, unobs_uid = _decode_message(m_enc)
        message = human.Message(obs_uid, risk, day, unobs_uid)

        m_enc = np.array([assignment, message.risk, day])
        ms_enc[idx] = m_enc
        idx += 1
    return ms_enc
 def score_matches(cls, human, m_i):
     scores = {}
     for m_enc, _ in reversed(list(human.M.items())):
         obs_uid, risk, day, unobs_uid = _decode_message(m_enc)
         m = human.Message(obs_uid, risk, day, unobs_uid)
         if m_i.uid == m.uid and m_i.day == m.day:
             scores[m_enc] = 3
             break
         elif m_i.uid[:3] == m.uid[:3] and m_i.day - 1 == m.day:
             scores[m_enc] = 2
         elif m_i.uid[:2] == m.uid[:2] and m_i.day - 2 == m.day:
             scores[m_enc] = 1
         elif m_i.uid[:1] == m.uid[:1] and m_i.day - 2 == m.day:
             scores[m_enc] = 0
         else:
             scores[m_enc] = -1
     return scores
Exemple #4
0
    def add_message_to_cluster(cls, human, m_i):
        """ This function clusters new messages by scoring them against old messages in a sort of naive nearest neighbors approach"""
        # TODO: include risk level in clustering, currently only uses quantized uid
        # TODO: refactor to compare multiple clustering schemes
        # TODO: check for mutually exclusive messages in order to break up a group and re-run nearest neighbors
        m_i_enc = _encode_message(m_i)
        m_risk = binary_to_float(
            "".join([str(x) for x in np.array(m_i[1].tolist()).astype(int)]),
            0, 4)

        # otherwise score against previous messages
        scores = {}
        for m_enc, _ in human.M.items():
            m = _decode_message(m_enc)
            if m_i[0] == m[0] and m_i[2].day == m[2].day:
                scores[m_enc] = 3
            elif m_i[0][:3] == m[0][:3] and m_i[2].day - 1 == m[2].day:
                scores[m_enc] = 2
            elif m_i[0][:2] == m[0][:2] and m_i[2].day - 2 == m[2].day:
                scores[m_enc] = 1
            elif m_i[0][:1] == m[0][:1] and m_i[2].day - 2 == m[2].day:
                scores[m_enc] = 0

        if scores:
            max_score_message = max(scores.items(),
                                    key=operator.itemgetter(1))[0]
            human.M[m_i_enc] = {
                'assignment': human.M[max_score_message]['assignment'],
                'previous_risk': m_risk,
                'carry_over_transmission_proba': RISK_TRANSMISSION_PROBA
            }
        # if it's either the first message
        elif len(human.M) == 0:
            human.M[m_i_enc] = {
                'assignment': 0,
                'previous_risk': m_risk,
                'carry_over_transmission_proba': RISK_TRANSMISSION_PROBA
            }
        # if there was no nearby neighbor
        else:
            new_group = max([v['assignment'] for k, v in human.M.items()]) + 1
            human.M[m_i_enc] = {
                'assignment': new_group,
                'previous_risk': m_risk,
                'carry_over_transmission_proba': RISK_TRANSMISSION_PROBA
            }
Exemple #5
0
if not os.path.isdir(INDIVIDUAL_CLUSTER_PATH):
    os.mkdir(INDIVIDUAL_CLUSTER_PATH)

# load the cluster data
everyones_clustered_messages = json.load(open(CLUSTER_PATH, 'r'))

# gather some high level statistics about the clusters (how many groups, total and unique contacts)
all_groups = []
all_total_num_contacts = []
all_unique_people_contacted = []
for someones_clustered_messages in everyones_clustered_messages:
    groups = defaultdict(list)
    unique_people_contacted = set()
    total_num_contacts = 0
    for m_enc, assignment in someones_clustered_messages.items():
        obs_uid, obs_risk, m_sent, unobs_uid = _decode_message(m_enc)
        groups[assignment].append(unobs_uid)
        unique_people_contacted.add(unobs_uid)
        total_num_contacts += 1
    all_groups.append(dict(groups))
    all_unique_people_contacted.append(unique_people_contacted)
    all_total_num_contacts.append(total_num_contacts)

# count the number of people in each group
all_count_people_in_group = []
all_number_of_groups = [len(groups) for groups in all_groups]
for group in all_groups:
    count_people_in_group = []
    for g, ps in group.items():
        cnt = Counter()
        num_people_in_group = len(ps)