def update_risk_risk_update(cls, human, update_message, rng): # TODO: update a random message when there are ties in the scoring scores = cls.score_matches(human, update_message) m_enc = max(scores.items(), key=operator.itemgetter(1))[0] assignment = human.M[m_enc] uid, risk, day, unobs_id = _decode_message(m_enc) updated_message = human.Message(uid, update_message.new_risk, day, unobs_id) del human.M[m_enc] human.M[_encode_message(updated_message)] = assignment
def messages_to_np(human): ms_enc = np.zeros((len(human.M), 3)) idx = 0 for m_enc, assignment in human.M.items(): obs_uid, risk, day, unobs_uid = _decode_message(m_enc) message = human.Message(obs_uid, risk, day, unobs_uid) m_enc = np.array([assignment, message.risk, day]) ms_enc[idx] = m_enc idx += 1 return ms_enc
def score_matches(cls, human, m_i): scores = {} for m_enc, _ in reversed(list(human.M.items())): obs_uid, risk, day, unobs_uid = _decode_message(m_enc) m = human.Message(obs_uid, risk, day, unobs_uid) if m_i.uid == m.uid and m_i.day == m.day: scores[m_enc] = 3 break elif m_i.uid[:3] == m.uid[:3] and m_i.day - 1 == m.day: scores[m_enc] = 2 elif m_i.uid[:2] == m.uid[:2] and m_i.day - 2 == m.day: scores[m_enc] = 1 elif m_i.uid[:1] == m.uid[:1] and m_i.day - 2 == m.day: scores[m_enc] = 0 else: scores[m_enc] = -1 return scores
def add_message_to_cluster(cls, human, m_i): """ This function clusters new messages by scoring them against old messages in a sort of naive nearest neighbors approach""" # TODO: include risk level in clustering, currently only uses quantized uid # TODO: refactor to compare multiple clustering schemes # TODO: check for mutually exclusive messages in order to break up a group and re-run nearest neighbors m_i_enc = _encode_message(m_i) m_risk = binary_to_float( "".join([str(x) for x in np.array(m_i[1].tolist()).astype(int)]), 0, 4) # otherwise score against previous messages scores = {} for m_enc, _ in human.M.items(): m = _decode_message(m_enc) if m_i[0] == m[0] and m_i[2].day == m[2].day: scores[m_enc] = 3 elif m_i[0][:3] == m[0][:3] and m_i[2].day - 1 == m[2].day: scores[m_enc] = 2 elif m_i[0][:2] == m[0][:2] and m_i[2].day - 2 == m[2].day: scores[m_enc] = 1 elif m_i[0][:1] == m[0][:1] and m_i[2].day - 2 == m[2].day: scores[m_enc] = 0 if scores: max_score_message = max(scores.items(), key=operator.itemgetter(1))[0] human.M[m_i_enc] = { 'assignment': human.M[max_score_message]['assignment'], 'previous_risk': m_risk, 'carry_over_transmission_proba': RISK_TRANSMISSION_PROBA } # if it's either the first message elif len(human.M) == 0: human.M[m_i_enc] = { 'assignment': 0, 'previous_risk': m_risk, 'carry_over_transmission_proba': RISK_TRANSMISSION_PROBA } # if there was no nearby neighbor else: new_group = max([v['assignment'] for k, v in human.M.items()]) + 1 human.M[m_i_enc] = { 'assignment': new_group, 'previous_risk': m_risk, 'carry_over_transmission_proba': RISK_TRANSMISSION_PROBA }
if not os.path.isdir(INDIVIDUAL_CLUSTER_PATH): os.mkdir(INDIVIDUAL_CLUSTER_PATH) # load the cluster data everyones_clustered_messages = json.load(open(CLUSTER_PATH, 'r')) # gather some high level statistics about the clusters (how many groups, total and unique contacts) all_groups = [] all_total_num_contacts = [] all_unique_people_contacted = [] for someones_clustered_messages in everyones_clustered_messages: groups = defaultdict(list) unique_people_contacted = set() total_num_contacts = 0 for m_enc, assignment in someones_clustered_messages.items(): obs_uid, obs_risk, m_sent, unobs_uid = _decode_message(m_enc) groups[assignment].append(unobs_uid) unique_people_contacted.add(unobs_uid) total_num_contacts += 1 all_groups.append(dict(groups)) all_unique_people_contacted.append(unique_people_contacted) all_total_num_contacts.append(total_num_contacts) # count the number of people in each group all_count_people_in_group = [] all_number_of_groups = [len(groups) for groups in all_groups] for group in all_groups: count_people_in_group = [] for g, ps in group.items(): cnt = Counter() num_people_in_group = len(ps)