def in_past_week(result, now, ordered_before=None): sent_collection_time_str = result[_SENT_COLLECTION_TIME_INDEX] received_time_str = result[_RECEIVED_TIME_INDEX] max_time = None if sent_collection_time_str: max_time = parse_datetime(sent_collection_time_str) if ordered_before and max_time > ordered_before: return False if received_time_str: received_time = parse_datetime(received_time_str) if received_time and max_time: max_time = max(received_time, max_time) else: max_time = received_time if max_time: return (now - max_time).days <= 7 return False
def reduce_participant_data_to_hpo_metric_date_deltas(reducer_key, reducer_values, now=None): """Input: reducer_key - participant ID reducer_values - strings of the form date|metric, or DOB|date_of_birth. Sorts everything by date, and emits hpoId|participant_type|metric|date|delta strings representing increments or decrements of metrics based on this participant. """ #pylint: disable=unused-argument metrics_conf = get_config() metric_fields = get_fieldnames() summary_fields = metrics_conf['summary_fields'] last_state = {} last_hpo_id = None dates_and_metrics = [] date_of_birth = None for reducer_value in reducer_values: t = parse_tuple(reducer_value) if t[0] == DATE_OF_BIRTH_PREFIX: date_of_birth = datetime.strptime(t[1], DATE_FORMAT).date() else: dates_and_metrics.append((parse_datetime(t[0]), t[1])) if not dates_and_metrics: return # Sort the dates and metrics, date first then metric. dates_and_metrics = sorted(dates_and_metrics) initial_state = {f.name: UNSET for f in metrics_conf['fields']} initial_state[TOTAL_SENTINEL] = 1 last_hpo_id = UNSET # Look for the starting HPO, update the initial state with it, and remove it from # the list of date-and-metrics pairs. for i in range(0, len(dates_and_metrics)): metric = dates_and_metrics[i][1] metric_name, value = parse_metric(metric) if metric_name == HPO_ID_METRIC: last_hpo_id = value initial_state[HPO_ID_METRIC] = last_hpo_id break # If we know the participant's date of birth, and a starting age range # and entries for when it changes over time. if date_of_birth: initial_state[AGE_RANGE_METRIC] = _add_age_range_metrics(dates_and_metrics, date_of_birth, now) # Re-sort with the new entries for age range changes. dates_and_metrics = sorted(dates_and_metrics) # Run summary functions on the initial state. _update_summary_fields(summary_fields, initial_state) # Emit 1 values for the initial state before any metrics change. initial_date = dates_and_metrics[0][0] for k, v in initial_state.iteritems(): yield reduce_result_value(map_result_key(last_hpo_id, _REGISTERED_PARTICIPANT, k, v), initial_date.date().isoformat(), '1') last_state = initial_state full_participant = False # Loop through all the metric changes for the participant. for dt, metric in dates_and_metrics: date = dt.date() new_state = copy.deepcopy(last_state) if not _process_metric(metric_fields, summary_fields, metric, new_state): continue # No changes so there's nothing to do. hpo_id = new_state.get(HPO_ID_METRIC) hpo_change = last_hpo_id != hpo_id last_full_participant = full_participant for k, v in new_state.iteritems(): # Output a delta for this field if it is either the first value we have, # or if it has changed. In the case that one of the facets has changed, # we need deltas for all fields. old_val = last_state and last_state.get(k, None) if hpo_change or v != old_val: formatted_date = date.isoformat() if (k == ENROLLMENT_STATUS_METRIC and v == EnrollmentStatus.FULL_PARTICIPANT and not full_participant): full_participant = True # Emit 1 values for the current state for all fields for the full participant type. for k2, v2 in new_state.iteritems(): yield reduce_result_value(map_result_key(hpo_id, _FULL_PARTICIPANT, k2, v2), formatted_date, '1') yield reduce_result_value(map_result_key(hpo_id, _REGISTERED_PARTICIPANT, k, v), formatted_date, '1') if last_full_participant: yield reduce_result_value(map_result_key(hpo_id, _FULL_PARTICIPANT, k, v), formatted_date, '1') if last_state: # If the value changed, output -1 delta for the old value. yield reduce_result_value(map_result_key(last_hpo_id, _REGISTERED_PARTICIPANT, k, old_val), formatted_date, '-1') if last_full_participant: yield reduce_result_value(map_result_key(last_hpo_id, _FULL_PARTICIPANT, k, old_val), formatted_date, '-1') last_state = new_state last_hpo_id = hpo_id