def update_records(self, update_messages, human):
        # if we're using naive tracing, we actually don't care which records we update
        if not update_messages:
            return self

        grouped_update_messages = self.group_by_received_at(update_messages)
        for received_at, update_messages in grouped_update_messages.items():

            for update_message in update_messages:
                old_message_dec = Message(update_message.uid,
                                          update_message.risk,
                                          update_message.contact_day,
                                          update_message.unobs_id,
                                          update_message.has_app)
                old_message_enc = encode_message(old_message_dec)
                old_cluster = None
                for cluster, messages in self.clusters_by_day[
                        update_message.contact_day].items():
                    for message in messages:
                        if message == old_message_enc:
                            old_cluster = cluster
                            #print(old_cluster)
                            break
                    if old_cluster:
                        break
                updated_message = Message(old_message_dec.uid,
                                          update_message.new_risk,
                                          old_message_dec.day,
                                          old_message_dec.unobs_id,
                                          old_message_dec.has_app)
                new_cluster = hash_to_cluster(updated_message)

                self.update_record(old_cluster, new_cluster, old_message_dec,
                                   updated_message)
        return self
Example #2
0
    def update_records(self, update_messages):
        """ Updates old encounter messages with a new risk"""
        if not update_messages:
            return self

        # TODO: implement a 24-hour cycle of message updates and batch the update messages by this.
        grouped_update_messages = self.group_by_received_at(update_messages)
        # for each batch of update messages
        for received_at, update_messages in grouped_update_messages.items():
            # for each update message in the batch
            for update_message in update_messages:
                old_message_dec = Message(update_message.uid, update_message.risk, update_message.day, update_message.unobs_id)
                old_message_enc = encode_message(old_message_dec)
                old_cluster = None

                # TODO: don't use the secret info when finding the message to update. Also, optimize this loop.
                # Find the matching message to update
                for cluster, messages in self.clusters_by_day[update_message.day].items():
                    for message in messages:
                        if message == old_message_enc:
                            old_cluster = cluster
                            break
                    if old_cluster:
                        break

                # Create the a new encounter message with the update risk and replace the old encounter message
                updated_message = Message(old_message_dec.uid, update_message.new_risk, old_message_dec.day, old_message_dec.unobs_id)
                new_cluster = hash_to_cluster(updated_message)
                self.update_record(old_cluster, new_cluster, old_message_dec, updated_message)

        return self
    def add_messages(self, messages, current_day, rng=None):
        """ This function clusters new messages by scoring them against old messages in a sort of naive nearest neighbors approach"""
        for message in messages:
            m_dec = decode_message(message)
            # otherwise score against previous messages
            best_cluster, best_message, best_score = self.score_matches(
                m_dec, current_day, rng=rng)
            if best_score >= 0:
                cluster_id = best_cluster
            else:
                cluster_id = hash_to_cluster(m_dec)

            self.all_messages.append(message)
            self.clusters[cluster_id].append(message)
            self.add_to_clusters_by_day(cluster_id, m_dec.day, message)
    def score_matches(self, m_new, current_day, rng=None):
        """ This function checks a new risk message against all previous messages, and assigns to the closest one in a brute force manner"""
        best_cluster = hash_to_cluster(m_new)
        best_message = None
        best_score = -1
        for i in range(current_day - 3, current_day + 1, -1):
            for cluster_id, messages in self.clusters_by_day[i].items():
                for m_enc in messages:
                    obs_uid, risk, day, unobs_uid, has_app = decode_message(
                        m_enc)
                    if m_new.uid == obs_uid and m_new.day == day:
                        best_cluster = cluster_id
                        best_message = m_enc
                        best_score = 3
                        break
                    elif compare_uids(
                            m_new.uid, obs_uid,
                            1) and m_new.day - 1 == day and m_new.risk == risk:
                        best_cluster = cluster_id
                        best_message = m_enc
                        best_score = 2
                    elif compare_uids(
                            m_new.uid, obs_uid,
                            2) and m_new.day - 2 == day and best_score < 1:
                        best_cluster = cluster_id
                        best_message = m_enc
                        best_score = 1
                    elif compare_uids(
                            m_new.uid, obs_uid,
                            3) and m_new.day - 3 == day and best_score < 0:
                        best_cluster = cluster_id
                        best_message = m_enc
                        best_score = 0
                    else:
                        best_cluster = cluster_id
                        best_message = m_enc
                        best_score = -1
                if best_score == 3:
                    break
            if best_score == 3:
                break
        # print(f"best_cluster: {best_cluster}, m_new: {m_new}, best_score: {best_score}")
        # print(self.clusters)

        if best_message:
            best_message = decode_message(best_message)
        return best_cluster, best_message, best_score
Example #5
0
    def score_matches(self, m_new, current_day):
        """ This function checks a new risk message against previous clusterings, and assigns to the closest one in a brute force manner"""
        cluster_days = hash_to_cluster_day(m_new)
        best_cluster = hash_to_cluster(m_new)

        if self.clusters_by_day[current_day].get(best_cluster, None):
            return best_cluster
        found = False
        for day, cluster_ids in cluster_days.items():
            for cluster_id in cluster_ids:
                if self.clusters_by_day[current_day - day].get(cluster_id, None):
                    best_cluster = cluster_id
                    found = True
                    break
            if found:
                break
        return best_cluster
Example #6
0
File: clusters.py Project: mnqu/ctt
    def update_records(self, update_messages, human):
        # if we're using naive tracing, we actually don't care which records we update

        #if not config.CLUSTER_MESSAGES and config.CLUSTER_TYPE == "heuristic":
        #    for update_message in update_messages:
        #        self.clusters_by_day
        if not update_messages:
            return self
        grouped_update_messages = self.group_by_received_at(update_messages)
        for received_at, update_messages in grouped_update_messages.items():

            # num days x num clusters
            cluster_cards = np.zeros((max(self.clusters_by_day.keys()) + 1,
                                      max(self.clusters.keys()) + 1))
            update_cards = np.zeros((max(self.clusters_by_day.keys()) + 1, 1))

            # figure out the cardinality of each day's message set
            for day, clusters in self.clusters_by_day.items():
                for cluster_id, messages in clusters.items():
                    cluster_cards[day][cluster_id] = len(messages)

            for update_message in update_messages:
                update_cards[update_message.day] += 1

            # find the nearest cardinality cluster
            perfect_signatures = np.where(
                (cluster_cards == update_cards).all(axis=0))[0]
            if not any(perfect_signatures):
                # calculate the wasserstein distance between every signature
                scores = []
                for cluster_idx in range(cluster_cards.shape[1]):
                    scores.append(
                        dist(cluster_cards[:, cluster_idx],
                             update_cards.reshape(-1)))
                best_cluster = int(np.argmin(scores))

                # for each day
                for day in range(len(update_cards)):
                    cur_cardinality = int(cluster_cards[day, best_cluster])
                    target_cardinality = int(update_cards[day])

                    # if (and while) the cardinality is not what it should be, as determined by the update_messages
                    while cur_cardinality - target_cardinality != 0:
                        # print(f"day: {day}, cur_cardinality: {cur_cardinality}, target_cardinality: {target_cardinality}")
                        # if we need to remove messages from this cluster on this day,
                        if cur_cardinality > target_cardinality:
                            best_score = -1
                            best_message = None
                            new_cluster_id = None

                            # then for each message in that day/cluster,
                            for message in self.clusters_by_day[day][
                                    best_cluster]:
                                for cluster_id, messages in self.clusters_by_day[
                                        day].items():
                                    if cluster_id == best_cluster:
                                        continue

                                    # and for each alternative cluster on that day
                                    for candidate_cluster_message in messages:
                                        # check if it's a good cluster to move this message to
                                        score = self.score_two_messages(
                                            decode_message(
                                                candidate_cluster_message),
                                            message)
                                        if (score > best_score
                                                or not best_message):
                                            best_message = message
                                            new_cluster_id = cluster_id

                            # if there are no other clusters on that day make a new cluster
                            if not best_message:
                                best_message = message
                                message = decode_message(message)
                                new_cluster_id = hash_to_cluster(message)
                            best_message = decode_message(best_message)

                            # for the message which best fits another cluster, move it there
                            self.update_record(best_cluster, new_cluster_id,
                                               best_message, best_message)
                            cur_cardinality -= 1
                            # print(f"removing from cluster {best_cluster} to cluster {new_cluster_id} on day {day}")

                        #otherwise we need to add messages to this cluster/day
                        else:
                            # so look for messages which closely match our update messages, and add them
                            for update_message in update_messages:
                                if update_message.day == day:
                                    break
                            best_score = -2
                            best_message = None
                            old_cluster_id = None
                            for cluster_id, messages in self.clusters_by_day[
                                    day].items():
                                for message in messages:
                                    score = self.score_two_messages(
                                        update_message, message)
                                    if (score > best_score
                                            and cluster_id != best_cluster):
                                        best_message = message
                                        old_cluster_id = cluster_id

                            best_message = decode_message(best_message)
                            updated_message = Message(best_message.uid,
                                                      update_message.new_risk,
                                                      best_message.day,
                                                      best_message.unobs_id)
                            # print(f"adding from cluster {old_cluster_id} to cluster {best_cluster} on day {day}")
                            self.update_record(old_cluster_id, best_cluster,
                                               best_message, updated_message)
                            cur_cardinality += 1
            else:
                best_cluster = self.score_clusters(update_messages,
                                                   perfect_signatures)
            for update_message in update_messages:
                best_score = -1
                best_message = self.clusters_by_day[
                    update_message.day][best_cluster][0]
                for risk_message in self.clusters_by_day[
                        update_message.day][best_cluster]:
                    score = self.score_two_messages(update_message,
                                                    risk_message)
                    if score > best_score:
                        best_message = risk_message
                best_message = decode_message(best_message)
                updated_message = Message(best_message.uid,
                                          update_message.new_risk,
                                          best_message.day,
                                          best_message.unobs_id)
                self.update_record(best_cluster, best_cluster, best_message,
                                   updated_message)
        return self