예제 #1
0
    def __connection_is_valid(self) -> bool:
        """Checks if the current database connection is valid."""

        if not self.__is_open():
            logger.info("Database connection is closed!")
            return False

        return True
예제 #2
0
 def query_timed(self, url: str) -> Dict[str, Any]:
     start_time: float = time.time()
     response: Dict[str, Any] = self.__get(url)
     duration_sec: float = time.time() - start_time
     if logger.isEnabledFor(logging.DEBUG):
         logger.debug("OSM query took ~%s: %s", int(duration_sec), url)
     else:
         logger.info("OSM query took ~%s: %s chars", duration_sec, len(url))
     return response
예제 #3
0
    def query_pd(self, query: str, *argv, **kwargs) -> pd.DataFrame:
        """ Queries the database and returns the result as a DataFrame."""

        logger.info(f'Querying SQL with { query }')

        if not self.__connection_is_valid():
            logger.info("Trying to reconnect...")
            self.connect()

        data = pd.DataFrame()

        return pd.read_sql(query, self.__db, *argv, **kwargs)
예제 #4
0
def timer(message):
    """Context manager for reporting time measurements"""
    tic = time.perf_counter()
    extra = ""
    try:
        yield
    except Exception:
        extra = " (failed)"
        raise
    finally:
        toc = time.perf_counter()
        ms = int(1000 * (toc - tic))
        logger.info(f"{message}{extra}: {ms}ms")
예제 #5
0
    def _compute_graph_edges(self):
        """
        Computes contacts and adds as edges to graph
        """

        glue_below_duration = self.params['bt_glue_below_duration']
        min_duration = self.params['bt_min_duration']
        timeFrom = self.params['timeFrom']
        timeTo = self.params['timeTo']
        outlier_threshold = self.params['bt_outlier_threshold']
        bt_data = self._bt_data

        logger.info("Building Bluetooth contact graph edges")

        for uuid1 in self.query_uuids:
            trajectory_uuid1 = self._load_trajectory(uuid1)

            for uuid2 in tqdm(self.uuids):
                if uuid1 == uuid2:
                    continue

                # Extract relevant part of pandas frame
                bt_data_local = bt_data.loc[(
                    (bt_data['uuid'] == uuid1) &
                    (bt_data['paireddeviceid'] == uuid2)) | (
                        (bt_data['paireddeviceid'] == uuid1) &
                        (bt_data['uuid'] == uuid2))]
                if len(bt_data_local) == 0:
                    # No contacts
                    continue

                trajectories = {
                    uuid1: trajectory_uuid1,
                    uuid2: self._load_trajectory(uuid2)
                }

                # Construct contact list
                t1 = trajectories[uuid1]
                t2 = trajectories[uuid2]
                bt_iterator = BluetoothContactDetailsIterator(
                    bt_data_local, glue_below_duration)
                contacts = ContactList([
                    BluetoothContact(t1, t2, contact_details)
                    for contact_details in bt_iterator
                ])
                contacts = contacts.filter(min_duration=min_duration)

                self._add_contacts(uuid1, uuid2, contacts)

        logger.info("Building Bluetooth contact graph edges")
예제 #6
0
def log_contacts(uuid, contacts, device_info, add_random_salt=False):
    """ Writes a list [contact1_dict, contact2_dict,...] into the given txt file
    where each row corresponds to one case.
    :params uuid: Patient uuid (string)
    :params contacts: contacts argument of a ContactGraphResult object 
    :params device_info: dict of uuid -> List of device info tuples
    :params logfile: File to log the contacts 
    :params mode: if 'a' contacts are appended to logfile, if 'w' previous content is overwritten overwritten
    :params add_random_salt: If true adds a random combination of letters infront of uuids before 
                             hashing, where combination changes on each call of function.
    
    """
    contacts_list = contacts_to_dicts(uuid, contacts, add_random_salt=add_random_salt, device_info=device_info)
    for contact in contacts_list:
        logger.info(contact)
예제 #7
0
    def contacts_with(self, uuid):
        """ Returns a list with all contacts with a given uuid.

        :param uuid: The uuid of interest
        :return: A list of ContactList objects

        """
        if uuid in self.uuids:
            contacts = [
                (uuid1, uuid2, edge["contact_list"])
                for uuid1, uuid2, edge in self._G.edges(uuid, data=True)
            ]
            return ContactGraphResult(uuid, contacts)
        else:
            logger.info(
                f"{self.__class__.__name__}: No contacts for {uuid} - returning empty list"
            )
            return ContactGraphResult(uuid, [])
예제 #8
0
def nested_get(dictionary, key, debug=False):
    '''Get value from nested dictionary'''
    assert isinstance(key, tuple)

    debug and logger.info(f'Get {key} {list(dictionary.keys())}')
    key0, *keys = key
    if not keys:
        return dictionary[key0]
    return nested_get(dictionary[key0], tuple(keys), debug)
예제 #9
0
    def _load_bt_data(self):
        # Load data from database
        assert (len(self.query_uuids) == 1
                )  # FIXME: support multiple query uuids
        query_uuid = self.query_uuids[0]
        timeFrom = self.params['timeFrom']
        timeTo = self.params['timeTo']
        dt_threshold = self.params['bt_dt_threshold']

        logger.info(
            f"BTContactGraph: Loading BT contacts from SQL server for uuid {query_uuid}."
        )
        self._bt_data = load_azure_data_bluetooth(query_uuid,
                                                  timeFrom,
                                                  timeTo,
                                                  dt_threshold=dt_threshold)
        logger.info(
            "BTContactGraph: Finished loading BT contacts from SQL server")
예제 #10
0
    def query(self, query: str, *argv, **kwargs) -> pyodbc.Cursor:
        """ Queries the database and returns the result as a Cursor."""

        logger.info(f'Querying SQL with { query }')

        if not self.__connection_is_valid():
            logger.info("Trying to reconnect...")
            self.connect()

        cursor = self.__db.cursor()

        try:
            cursor.execute(query, *argv, **kwargs)
        except Exception as e:
            logger.error(f'Error querying SQL with { query } | { e }')
            raise

        return cursor
예제 #11
0
def nested_set(dictionary, key, value, debug=False):
    '''Set value of nested dictionary'''
    assert isinstance(key, tuple)

    debug and logger.info(f'Set {key} {list(dictionary.keys())}')
    key0, *keys = key
    if not keys:
        dictionary[key0] = value
        return dictionary
    return nested_set(dictionary[key0], tuple(keys), value, debug)
예제 #12
0
    def __init__(self, query_uuids, params):
        """
        Constructs a contact graph for a list of query uuids.

        :params: query_uuids: A list of uuids to query
        :params: params: A parameter dictionary used for the graph computations
        """

        logger.info("Building contact graph")
        self.query_uuids = query_uuids
        self.params = params

        self._G = nx.Graph()
        self._compute_graph_nodes()
        self._compute_graph_edges()
        # Collect device info for all participant uuids
        self.node_device_info = load_device_info(list(self._G.nodes))

        logger.info("Finished building contact graph")
예제 #13
0
def set_analysis_period(params, timeFrom, timeTo):
    """ Sets the flags params["timeFrom"] and params["timeTo"] """
    if timeTo is not None:
        params["timeTo"] = timeTo
    else:
        params["timeTo"] = datetime.utcnow().replace(microsecond=0)

    if timeFrom is not None:
        params["timeFrom"] = timeFrom
        params["analysis_duration_in_days"] = (params['timeTo'] - params['timeFrom']).days
    else:
        params["timeFrom"] = (params["timeTo"] - timedelta(days=params["analysis_duration_in_days"])).replace(microsecond=0)

    # Ensure that we work in UTC timezone - this should not really be necessary if we work with
    # time-aware DateTime objects, but letæ's be on the safe side
    params["timeFrom"] = params["timeFrom"].astimezone(timezone.utc)
    params["timeTo"] = params["timeTo"].astimezone(timezone.utc)

    logger.info(f"Analysis period: {params['timeFrom'].isoformat()} - {params['timeTo'].isoformat()}")
예제 #14
0
 def query_multiple_mt(self, queries, wrapper) -> List[Dict[str, Any]]:
     ordered_output: List[Dict[str, Any]] = [{}] * len(queries)
     success: int = 0
     logger.info("Starting %s OSM requests...", len(queries))
     with concurrent.futures.ThreadPoolExecutor(
             max_workers=self.max_workers) as executor:
         futures = {}
         for index, url in enumerate(queries):
             futures[executor.submit(partial(self.__get,
                                             wrapper(url)))] = index
         for future in concurrent.futures.as_completed(futures):
             try:
                 response = future.result()
                 ordered_output[futures[future]] = response
                 success += 1
             except Exception as ex:
                 if self.verbose > 0:
                     logger.warning(ex)
         logger.info("Successfully processed %s / %s OSM requests.",
                     success, len(queries))
     return ordered_output
예제 #15
0
def load_device_info(uuids):
    '''Return a dictionary of uuid -> List of device info tuples'''
    if not __CONFIG__.features.device_info:
        logger.info("Loading device info is disabled in config.")
        return defaultdict(list)

    logger.info("Loading device info")

    if isinstance(uuids, str):
        uuids = (uuids, )

    uuids = set(uuids)

    device_info = defaultdict(list)

    query_template = "SELECT * FROM getDeviceInformationSingle('%s')"

    with timer("db connect"):
        db = connect_to_azure_database()

    for uuid in uuids:
        query = query_template % uuid
        with timer("db query getDeviceInformationSingle"):
            frame = pd.read_sql_query(query, con=db)

        # NOTE: it seems there are some different conventions for naming
        # e.g. ios10.1 and ios101 are (probably) the same thing and we might
        # want to merge these
        frame is not None and device_info[uuid].extend(
            zip(frame['platform'], frame['model'], frame['appversion']))

    db.close()
    logger.info("Finished loading device info")

    return device_info
예제 #16
0
    def _compute_graph_edges(self, dist_function=convolution):
        """ Constructs the edges of the graph by computing the contacts
        of each trajectory pair. The distance function is user-specific.
        """

        dist_func_options = self.params['filter_options']
        allowed_jump = self.params['allowed_jump']
        hard_time_gap = self.params['max_interpol_in_h']
        glue_below_duration = self.params['glue_below_duration']
        min_duration = self.params['min_duration']

        logger.info("Building GPS contact graph edges")
        # Loop over trajectory pairs
        for i, uuid1 in enumerate(self.query_uuids):
            if uuid1 in self._trajectories.keys():
                t1 = self._trajectories[uuid1]
                # self._G.add_nodes(uuid1)
                for uuid2 in tqdm(self.uuids):
                    if uuid2 == uuid1:
                        # Contact with themselves is not relevant
                        continue
                    # Find contact and add to graph
                    t2 = self._trajectories[uuid2]
                    contacts = get_gps_contacts_from_trajectories(
                        t1, t2, allowed_jump, hard_time_gap,
                        glue_below_duration, dist_function, dist_func_options)
                    contacts = contacts.filter(min_duration=min_duration)

                    if len(contacts) > 0:
                        self._add_contacts(uuid1, uuid2, contacts)
            else:
                logger.info("No trajectory corresponds to that uuid. \n")

        logger.info("Finished building GPS contact graph edges")
예제 #17
0
    def query_hits_partial_trajectory(
            self, query_type_names: Iterable[str],
            bounding_boxes: List[BoundingBox], containing_box: BoundingBox,
            query_types: Iterable[QueryType],
            type_queries: Iterable[str]) -> List[Dict[str, Any]]:
        """
        Query for the POIs in the containing box, then use the bounding_boxes (originally submitted) to filter away
        the POIs that fell outside of the original boxes. Possible CPU bottleneck, so it logs preocessing time.

        :param query_type_names: Query types by name
        :param bounding_boxes: Bounding boxes originally submitted
        :param containing_box: Bounding box containing all the bounding boxes
        :param query_types: QueryType objects
        :param type_queries:
        :return:
        """
        url = self.__global_query_wrapper(containing_box, type_queries)
        loggable_types = ", ".join(query_type_names)
        logger.info(
            f"OSM: trajectory[{len(bounding_boxes)}]: {loggable_types}: {containing_box.area_str()}"
        )
        logger.debug(f"OSM: {url}")
        results = self.query_timed(url)
        start_time: float = time.time()
        elements = results['elements']
        bounding_box_hits = [
            box.contained_elements(elements) for box in bounding_boxes
        ]
        query_typed_hits_per_box = []
        for box_hits in bounding_box_hits:
            for query_type in query_types:
                query_typed_hits_per_box.append(
                    dict(elements=query_type.matching_elements(box_hits)))
        duration_sec: float = time.time() - start_time
        logger.info("%s elements from %s boxes -> %s processed in %sms",
                    len(elements), len(bounding_boxes), str(containing_box),
                    int(1000 * duration_sec))
        return query_typed_hits_per_box
예제 #18
0
    def connect(self) -> None:
        """Connects to the database. Will reuse connection if
           a connection is open and the connection string has not
           changed.
        """

        logger.info("Connecting to database...")

        if self.__db is not None and self.__connection_is_valid():
            logger.info(
                "A connection is already open! Reusing old connection.")
            return

        try:
            self.__db = pyodbc.connect(self.__connection_string)
            logger.info("Database connection successful!")
        except Exception as e:
            logger.error(f"Database connection failed! | { e }")
            raise
예제 #19
0
    def _load_trajectory(self, uuid):
        """ Loads GPS trajectory for a uuid for the analysis period """

        # Get the trajectory for all uuids
        params = self.params
        dt_threshold = params['gps_dt_threshold']
        dx_threshold = params['gps_dx_threshold']

        query = f"SELECT * FROM getTrajectorySpeed('{uuid}','{params['timeFrom']}','{params['timeTo']}')"
        logger.info(
            f"BTContactGraph: Calling getTrajectorySpeed() for BT contact.")
        df = load_azure_data(query,
                             params['outlier_threshold'],
                             dt_threshold=dt_threshold,
                             dx_threshold=dx_threshold).get(uuid, None)
        logger.info(f"BTContactGraph: Parsing trajectory for BT contact")
        trajectory = TrajectoryParser(pd_frame=df, uuid=uuid, verbose=0)
        logger.info(
            f"Finished getTrajectorySpeed() and parsing trajectory for BT contact."
        )
        return trajectory
예제 #20
0
    def query_points_batched(self,
                             points: List[List[float]],
                             query_type_names: List[str],
                             distances,
                             element_types=None,
                             mt_split=True,
                             mt_threshold=0) -> List[Dict[str, Any]]:
        """
        This method needs some cleaning up.

        :param points: The points, as a list of pairs of coordinates
        :param query_type_names: Query types, as in the _QUERY_TYPES_LIST dict.  Types of POIs to retrieve
        :param distances: Accuracy/distances for each point.  Can be an integer, which sets a distance for each point
        :param element_types: The types of OSM structures to retreive
        :param mt_split:
        :param mt_threshold:
        :return:
        """
        query_types: Iterable[QueryType] = [
            _QUERY_TYPES_INDEX[query_type] for query_type in query_type_names
        ]

        trajectory_length = len(points)
        if type(distances) == int:
            distances = [distances] * trajectory_length

        # The OSM query denoting poi types and their node/way/relation setup
        type_queries: Iterable[str] = self.__type_queries(
            element_types, query_types)

        # Decide on multithreading parameters ...
        resolved_mt_threshold = max(self.batched_mt_threshold, mt_threshold)
        resolved_mt = (resolved_mt_threshold > 0) and self.batched or mt_split

        # Compute the bounding boxes and the containing bounding box
        bounding_boxes, containing_box = self.__bounding_boxes(
            points, distances)
        sqkm = containing_box.sqkm()

        if trajectory_length > 100 or sqkm > 10.0:
            # We need to split the box.  (All hard-coded parameters here are candidates for configuration.)
            split_count = max(
                2, int(min(trajectory_length / 2, trajectory_length / 50)))
            logger.info(
                f"Trajectory[{trajectory_length}] spans {containing_box.area_str()}, split in {split_count}"
            )
            trajectory_splits = np.array_split(points, split_count)
            distances_splits = np.array_split(distances, split_count)
            query_hits_splits = []

            if resolved_mt and split_count > resolved_mt_threshold:
                # We want to multi-thread to exploit the OSM capacity
                ordered_output: List[Dict[str, Any]] = [{}] * split_count
                success: int = 0
                logger.info(f"Starting {split_count} OSM sub-threads...")
                with concurrent.futures.ThreadPoolExecutor(
                        max_workers=self.max_workers) as executor:
                    futures = {}
                    for index in range(0, split_count):
                        futures[executor.submit(
                            partial(self.subquery, trajectory_splits[index],
                                    distances_splits[index], query_type_names,
                                    query_types, type_queries))] = index
                    for future in concurrent.futures.as_completed(futures):
                        try:
                            response = future.result()
                            ordered_output[futures[future]] = response
                            success += 1
                        except Exception as ex:
                            logger.warn("Failed to query OSM", ex)
                    logger.info(
                        f"Successfully processed {success:d} / {split_count:d} OSM requests."
                    )
                for output in ordered_output:
                    query_hits_splits.extend(output)
                return query_hits_splits
            else:
                # Run the boxes in sequence, avoid the overhead of multithreading
                for i in range(0, split_count):
                    hits = self.subquery(trajectory_splits[i],
                                         distances_splits[i], query_type_names,
                                         query_types, type_queries)
                    query_hits_splits.extend(hits)
            return query_hits_splits
        else:
            # Run the whole box in one go
            return self.query_hits_partial_trajectory(query_type_names,
                                                      bounding_boxes,
                                                      containing_box,
                                                      query_types,
                                                      type_queries)
예제 #21
0
    def to_dict_daily(self):
        """ Returns a dictionary representation of the report where contacts are aggregated on a daily basis.

        :return: A dictionary of the form:

        {<<uuid of contact>>: {
            "bluetooth_cumulative_risk_score": 10.0,
            "gps_cumulative_risk_score": 10.0,
            "categorical_risk": "medium",
            "bluetooth_cumulative_duration": 210.0,
            "gps_cumulative_duration": 210.0,
            "number_of_contacts": 3,
            "points_of_interest": "residential, school",

            "2020-04-10": {
                'gps_contacts' : <<output of to_dict() call on gps contact list>> (if existing),
                'bluetooth_contacts' : <<output of to_dict() call on BT contact list>> (if existing),
                'bar_plot' : <<bar plot containing summary of gps and bluetooth contact details>>
                },
            "2020-04-11": { ...
            }
        }
        <<uuid of contact>>: {
            "bluetooth_cumulative_risk_score": 10.0,
            ...
        ...
        }
        """

        # Create a default dic where we can append without creating keys
        dic = nested_dict()

        n = 0
        N = len(self.contacts.keys())

        for (_, uuid2), contact_list in self.contacts.items():

            logger.info(f"Generating report {n+1}/{N}")
            n += 1

            if not contact_list.include_in_report() and not self.testing:
                logger.info(
                    "Contact does not match the FHI requirements... skipping")
                continue
            logger.info(
                "Contact matches the FHI requirements... adding to report")
            gps_contacts = contact_list.filter(contact_type="gps")
            bt_contacts = contact_list.filter(contact_type="bluetooth")
            dic[uuid2]["cumulative"]["all_contacts"] = contact_list.to_dict(
                include_individual_contacts=False, include_bar_plot=True)
            dic[uuid2]["cumulative"]['gps_contacts'] = gps_contacts.to_dict(
                include_individual_contacts=False, include_hist=True)
            dic[uuid2]["cumulative"]['bt_contacts'] = bt_contacts.to_dict(
                include_individual_contacts=False)

            daily_contacts = contact_list.split_by_days()
            for day, contact_list_day in daily_contacts.items():
                # After splitting we need to check again that all contacts have the required min_duration
                gps_contacts_day = contact_list_day.filter(
                    contact_type="gps", min_duration=params["min_duration"])
                bt_contacts_day = contact_list_day.filter(
                    contact_type="bluetooth",
                    min_duration=params["bt_min_duration"])
                all_contacts_day = ContactList(gps_contacts_day +
                                               bt_contacts_day)
                dic[uuid2]['daily'][day.isoformat(
                )]['all_contacts'] = all_contacts_day.to_dict(
                    include_individual_contacts=False,
                    include_bar_plot=False,
                    include_summary_plot=self.include_maps)
                dic[uuid2]['daily'][day.isoformat(
                )]['gps_contacts'] = gps_contacts_day.to_dict(
                    include_individual_contacts=False, include_hist=True)
                dic[uuid2]['daily'][
                    day.isoformat()]['bt_contacts'] = bt_contacts_day.to_dict(
                        include_individual_contacts=False)

            # Enrich the cumulative info for uuid2 by how many days were spent with _
            daily = dic[uuid2]['daily']
            gps_days = set(day for day in daily
                           if daily[day]['gps_contacts']['number_of_contacts'])
            bt_days = set(day for day in daily
                          if daily[day]['bt_contacts']['number_of_contacts'])
            contact_days = gps_days | bt_days

            dic[uuid2]['cumulative']['all_contacts']['days_in_contact'] = len(
                contact_days)
            dic[uuid2]['cumulative']['gps_contacts']['days_in_contact'] = len(
                gps_days)
            dic[uuid2]['cumulative']['bt_contacts']['days_in_contact'] = len(
                bt_days)

        # defaultdict to defaultdict
        dic = fhi_filter_dict(dic)
        # Sign it off
        # NOTE: version info should be top level but perhpas too many
        # things on our as well as FHI side depend on assumption that dic.keys()
        # is only uuid
        for uuid in dic.keys():
            dic[uuid]['version_info']['pipeline'] = corona.__VERSION__
            dic[uuid]['version_info']['device'] = self.device_info[uuid]

        # NOTE: at this point ordering of entries in dic is not guaranteed
        # to be the same as in original dic (where events where ordered by time)
        dic = default_to_regular(dic)

        # Sort daily
        # {uuid: {'cumulative': ...,
        #         'daily': {'date0': x,
        #                   'date1': y}}}
        as_date = lambda string: datetime.datetime.strptime(string, '%Y-%m-%d')
        # Assure ordering of dates
        uuids = list(dic.keys())
        for uuid in uuids:
            dates = dic[uuid]['daily'].keys()  # As strings
            dic[uuid]['daily'] = {
                date: dic[uuid]['daily'][date]
                for date in sorted(dates, key=as_date)
            }

        return dic
예제 #22
0
def isolate_events(events, debug=False):
    '''A list of pandas row(events) is isolated to produce new events'''
    # NOTE: after the isolate the events should 'isolated'
    if len(events) == 1:
        return [events[0]]

    # This is the work horse
    A, B, rest = events[0], events[1], events[2:]

    debug and logger.info('<%g---(A)--%g>' % (e_start(A), e_end(A)))
    debug and logger.info('<%g---(B)--%g>' % (e_start(B), e_end(B)))

    if e_is_identical(A, B):
        debug and logger.info('Identity\n')
        new_event = [
            e_uuid(A),  # This guys are determined by the enclosing
            e_pd(A),  # event
            e_start(A),
            e_dur(A),
            # Focus on worst case scenario so we pick longest *_duration
            max(e_vcd(A), e_vcd(B)),
            max(e_cd(A), e_cd(B)),
            max(e_rcd(A), e_rcd(B))
        ]
        return isolate_events([new_event] + rest)

    if e_no_overlap(A, B):
        debug and logger.info('!Overlap\n')
        return [A] + isolate_events([B] + rest)

    if e_contains(A, B):
        debug and logger.info('Contains\n')
        new_event = [
            e_uuid(A),  # This guys are determined by the enclosing
            e_pd(A),  # event
            e_start(A),
            e_dur(A),
            # Focus on worst case scenario so we pick longest *_duration
            max(e_vcd(A), e_vcd(B)),
            max(e_cd(A), e_cd(B)),
            max(e_rcd(A), e_rcd(B))
        ]
        return isolate_events([new_event] + rest)

    # Join the events
    if right_overlaps(A, B):
        debug and logger.info('Overlap\n')
        # The new event has start of A and end of B
        start_A, start_B = map(e_start, (A, B))
        end_A, end_B = map(e_end, (A, B))
        dur_A, dur_B = map(e_dur, (A, B))
        # SA<------->EA
        #      SB<--------->EB
        overlap = end_A - start_B

        foo = lambda qA, qB: (qA / dur_A * (dur_A - overlap) + max(
            qA / dur_A, qB / dur_B) * overlap + qB / dur_B * (dur_B - overlap))

        event = [
            e_uuid(A),
            e_pd(A), start_A, end_B - start_A,
            foo(e_vcd(A), e_vcd(B)),
            foo(e_cd(A), e_cd(B)),
            foo(e_rcd(A), e_rcd(B))
        ]

        return isolate_events([event] + rest)
    raise ValueError('This should not happen', A, B)
예제 #23
0
    def _get_trajectories(self):
        assert (len(self.query_uuids) == 1
                )  # FIXME: support multiple query uuids
        query_uuid = self.query_uuids[0]
        params = self.params
        # Load data from database

        dt_threshold = params['gps_dt_threshold']
        dx_threshold = params['gps_dx_threshold']

        # Now get the trajectory of the patient
        query = f"SELECT * FROM getTrajectorySpeed('{query_uuid}','{params['timeFrom']}','{params['timeTo']}')"
        logger.info(
            f"GPSContactGraph: Calling getTrajectorySpeed() for GPS contact")
        t_patient = load_azure_data(query,
                                    params['outlier_threshold'],
                                    dt_threshold=dt_threshold,
                                    dx_threshold=dx_threshold).get(
                                        query_uuid, [])
        logger.info(
            "GPSContactGraph: getTrajectorySpeed() for GPS contact finished")
        minimum_duration = 60
        maximum_bb_diameter1 = 800
        maximum_bb_duration1 = 3 * 60
        maximum_bb_diameter2 = 200
        t_split = GPSContactGraph._bounding_boxes_greedy_(
            t_patient, minimum_duration, maximum_bb_diameter1,
            maximum_bb_duration1, maximum_bb_diameter2)
        logger.info(
            f"GPSContactGraph: Split trajectory into {len(t_split)} segments")

        # Get other trajectories using bounding box method
        logger.info(
            f"GPSContactGraph: Calling get other trajectories (using bounding boxes) for GPS contacts"
        )
        t = {}
        temp_trajectories = []
        for t_piece in t_split:
            timeFrom = datetime.datetime.utcfromtimestamp(
                t_piece['time'].min()).strftime('%Y-%m-%d %H:%M:%S')
            timeTo = datetime.datetime.utcfromtimestamp(
                t_piece['time'].max()).strftime('%Y-%m-%d %H:%M:%S')
            logger.info(
                "GPSContactGraph: Dealing with time window: {0} - {1}".format(
                    timeFrom, timeTo))
            lat_min = t_piece['latitude'].min()
            lat_max = t_piece['latitude'].max()
            long_min = t_piece['longitude'].min()
            long_max = t_piece['longitude'].max()
            query = f"SELECT * FROM getWithinBB ({long_min}, {lat_min},{long_max},{lat_max},'{timeFrom}','{timeTo}') ORDER BY 1,2 ASC"
            # Appends dictionary of format {uuid : pd_frame} to the list
            temp_trajectories.append(
                load_azure_data(query,
                                params['outlier_threshold'],
                                dt_threshold=dt_threshold,
                                dx_threshold=dx_threshold))

        # Combine data frames of temporary trajectories
        for temp_trajectories in temp_trajectories:
            for key in temp_trajectories.keys():
                if key in t.keys():
                    # Concatanate pd frame
                    t[key] = pd.concat([t[key], temp_trajectories[key]])
                else:
                    t[key] = temp_trajectories[key]

        # Finally rebase indexes of pandas frames (these are not consistent now)
        for key in t.keys():
            t[key] = t[key].reset_index()
        logger.info(
            f"GPSContactGraph: Calling get other trajectories (using bounding boxes) for GPS contacts finished"
        )
        logger.info(
            f"GPSContactGraph: Found GPS contacts with {len(t)} people.")

        logger.info("GPSContactGraph: Parsing trajectories of GPS contacts")
        trajectories = {}
        for uuid, df in t.items():
            trajectories[uuid] = TrajectoryParser(pd_frame=df,
                                                  uuid=uuid,
                                                  verbose=0)
        return trajectories
예제 #24
0
def run_analysis_pipeline(patient_uuid, output_formats=["dict"], daily_summary=True,
                          timeFrom=None,
                          timeTo=None,
                          request_id=None,
                          html_filename_prefix="", include_maps="static", testing=False):
    """ Runs the analysis pipeline and returns the risk report.

        :param patient_uuid: UUID of the patient to be analysed
        :param output_format: The output format of the report. Valid options are "dict, html, stdout".
        :param daily_summary: If True, the contacts are aggregated on  a daily basis, otherwise each contact is reported independently._dist_thresh_
        :param timeFrom: The start time of the analysis. If None, the pipeline runs the analysis for the last days as specified in default_parameters.py.
        :param timeTo: The end time of the analysis. If None, UTC now will be used.
        :param html_filename_prefix: Only valid if output_format includes "html". A prefix string that for the filename._dist_thresh_
        :param include_maps: Specifies which types of maps to include in the report. Valid options are  None, "static" or "interactive".
        :param testing: Boolean flag - if true reports also contacts that do not satisfy the criteria defined by FHI
    """

    if request_id:
        context_name = str(request_id)
    else:
        context_name = str(patient_uuid)

    calling_thread = current_thread()
    calling_thread_name = calling_thread.name
    calling_thread.name = context_name
    try:
        # Set parameters
        assert set(output_formats).issubset(("dict", "html", "stdout"))
        patient_uuid = patient_uuid.lower()  # UUIDs are always lower characters by convention
        set_analysis_period(params, timeFrom, timeTo)

        logger.info("Running analysis pipeline with following parameters and config (extracts): "
                    f"Params={json.dumps(params, default=str)} "
                    f"Config={json.dumps(config.loggable_params(), default=str)}")

        # We only render matplotlib images, so we can use the agg backend.
        matplotlib.use('Agg')

        # Build contact graphs
        gps_contact_graph = GPSContactGraph([patient_uuid], params)
        bt_contact_graph = BTContactGraph([patient_uuid], params)

        # Extract contacts results
        gps_results = gps_contact_graph.contacts_with(patient_uuid)
        bt_results = bt_contact_graph.contacts_with(patient_uuid)

        all_results = bt_results + gps_results

        # Gather device infos of uuids in graph
        device_info = gps_contact_graph.node_device_info.copy()
        device_info.update(bt_contact_graph.node_device_info)

        # Log all contacts anonymously
        log_contacts(patient_uuid, all_results.contacts, device_info, add_random_salt=True)

        # Create report
        report = RiskReport(patient_uuid, all_results.contacts, device_info, include_maps, testing)

        # Return report in the requested format
        if "html" in output_formats:
            filename = f"{html_filename_prefix}report_{patient_uuid}_dist_thresh_{params['filter_options']['dist_thresh']}.html"
            report.to_html(filename, daily_summary)
            logger.info("Analysis pipeline finished")
        if "stdout" in output_formats:
            logger.info(report)
            logger.info("Analysis pipeline finished")
        if "dict" in output_formats:
            if daily_summary:
                d = report.to_dict_daily()
            else:
                d = report.to_dict()
            logger.info("Analysis pipeline finished")
            return d
    finally:
        calling_thread.name = calling_thread_name