Ejemplo n.º 1
0
    def get(self, variable_id):
        parser = reqparse.RequestParser()
        parser.add_argument('filter', action='append')
        parser.add_argument('location', required=True, help="Please, provide a location parameter.")
        args = parser.parse_args()
        filters = args['filter']
        parent_id = int(args['location'])
        all_locations_dict = get_locations(db.session)
        children_location_ids = get_children(parent_id, all_locations_dict)
        results_by_location = []
        for location_id in children_location_ids:
            result = []
            location = Location.get_location_by_id(location_id)
            sql_alchemy_filters = self.get_sql_alchemy_filters(filters)

            if location.deviceid:
                device_ids = location.deviceid.split(',')
                for device_id in device_ids:
                    result.append(self._get_variable_count_for_deivce_id(device_id, variable_id, sql_alchemy_filters))
            results_by_location.append({
                "clinicId": location_id,
                "deviceSubmissions": result
            })
        return jsonify({
            "parentLocationId": parent_id,
            "clinicCount": len(children_location_ids),
            "clinicSubmissions": results_by_location
        })
Ejemplo n.º 2
0
    def get(self,
            variable_id,
            location=1,
            start_date=None,
            end_date=None,
            include_all_clinics=False):

        start_date, end_date = fix_dates(start_date, end_date)
        location = int(location)

        allowed_location = 1
        if g:
            allowed_location = g.allowed_location
        if not is_allowed_location(location, allowed_location):
            return {}
        vi = str(variable_id)
        results = db.session.query(
            func.sum(Data.variables[vi].astext.cast(Float)).label('value'),
            Data.geolocation, Data.clinic).filter(
                Data.variables.has_key(variable_id), Data.date >= start_date,
                Data.date < end_date,
                or_(loc == location
                    for loc in (Data.country, Data.region, Data.district,
                                Data.clinic))).group_by(
                                    "clinic", "geolocation")

        locations = get_locations(db.session)
        ret = {}
        for r in results.all():
            if r[1] is not None:
                geo = to_shape(r[1])
                if r[2]:
                    # Leaflet uses LatLng
                    ret[str(r[2])] = {
                        "value": r[0],
                        "geolocation": [geo.y, geo.x],
                        "clinic": locations[r[2]].name
                    }
                else:
                    if not include_all_clinics:
                        cords = [geo.y, geo.x]  # Leaflet uses LatLng
                        ret[str(cords)] = {
                            "value": r[0],
                            "geolocation": cords,
                            "clinic": "Outbreak Investigation"
                        }

        if include_all_clinics:
            results = db.session.query(model.Locations)
            for row in results.all():
                if is_allowed_location(row.id, location):
                    if row.case_report and row.point_location is not None and str(
                            row.id) not in ret.keys():
                        geo = to_shape(row.point_location)
                        ret[str(row.id)] = {
                            "value": 0,
                            "geolocation": [geo.y, geo.x],
                            "clinic": row.name
                        }
        return ret
Ejemplo n.º 3
0
    def get(self, location_id, clinic_type=None, require_case_report="yes"):
        locations = get_locations(db.session)
        other_conditions = {}
        for arg in request.args:
            other_conditions[arg] = request.args.get(arg)
        points = []
        if not is_allowed_location(location_id, g.allowed_location):
            return FeatureCollection(points)

        for l in locations:
            if ((locations[l].case_report or require_case_report == "no")
                    and is_child(location_id, l, locations)
                    and locations[l].point_location is not None and
                (not clinic_type or locations[l].clinic_type == clinic_type)):
                other_cond = True
                for cond in other_conditions:
                    if locations[l].other.get(cond,
                                              None) != other_conditions[cond]:
                        other_cond = False
                        break
                if not other_cond:
                    continue
                geo = to_shape(locations[l].point_location)
                p = Point(
                    (float(geo.x), float(geo.y)
                     ))  # Note that this is the specified order for geojson
                points.append(
                    Feature(geometry=p,
                            properties={
                                "name": locations[l].name,
                                "other": locations[l].other
                            }))
        return FeatureCollection(points)
Ejemplo n.º 4
0
    def get(self,
            category,
            location=1,
            start_date=None,
            end_date=None,
            include_all_clinics=False):

        start_date, end_date = fix_dates(start_date, end_date)
        location = int(location)

        allowed_location = 1
        if g:
            allowed_location = g.allowed_location
        if not is_allowed_location(location, allowed_location):
            return {}
        results = db.session.query(
            Data.categories[category], Data.geolocation, Data.clinic,
            Data.date).distinct(Data.clinic).filter(
                Data.categories.has_key(category), Data.date >= start_date,
                Data.date < end_date,
                or_(loc == location
                    for loc in (Data.country, Data.region, Data.district,
                                Data.clinic))).order_by(Data.clinic).order_by(
                                    Data.date.desc())

        locations = get_locations(db.session)
        ret = {}
        for r in results.all():
            print(r)
            if r[1] is not None:
                geo = to_shape(r[1])
                if r[2]:
                    # Leaflet uses LatLng
                    ret[str(r[2])] = {
                        "value": r[0],
                        "geolocation": [geo.y, geo.x],
                        "clinic": locations[r[2]].name
                    }
                else:
                    if not include_all_clinics:
                        cords = [geo.y, geo.x]  # Leaflet uses LatLng
                        ret[str(cords)] = {
                            "value": r[0],
                            "geolocation": cords,
                            "clinic": "Outbreak Investigation"
                        }

        if include_all_clinics:
            results = db.session.query(model.Locations)
            for row in results.all():
                if is_allowed_location(row.id, location):
                    if row.case_report and row.point_location is not None and str(
                            row.id) not in ret.keys():
                        geo = to_shape(row.point_location)
                        ret[str(row.id)] = {
                            "value": 0,
                            "geolocation": [geo.y, geo.x],
                            "clinic": row.name
                        }
        return ret
Ejemplo n.º 5
0
    def get(self, location_id, clinic_type=None):
        locs = get_locations(db.session)
        children = get_children(location_id, locs)
        if clinic_type:
            res = db.session.query(func.count(model.Locations.id)).filter(
                model.Locations.id.in_(children),
                model.Locations.case_report == 1,
                model.Locations.clinic_type == clinic_type).first()
        else:
            res = db.session.query(func.count(model.Locations.id)).filter(
                model.Locations.id.in_(children),
                model.Locations.case_report == 1).first()

        return {"total": res[0]}
Ejemplo n.º 6
0
def is_allowed_location(location, allowed_location):
    """"
    Returns true if the location is allowed_location

    Args:
        location: location id
        allowed_location: allowed_location
    Returns:
        is_allowed(bool): Is location allowed.

    """
    if allowed_location == 1:
        return True
    global allowed_locations_locs
    if allowed_locations_locs is None:
        allowed_locations_locs = get_locations(db.session)
    if is_child(allowed_location, int(location), allowed_locations_locs):
        return True
    return False
Ejemplo n.º 7
0
    def get(self, variable_id, identifier_id, level, weekly=True, location_id=1):
        variable_id = str(variable_id)
        identifier_id = str(identifier_id)
        if weekly == "0":
            weekly = False
        year = datetime.today().year
        start_date = datetime(year, 1, 1)
        end_date = datetime(year + 1, 1, 1)
        result = latest_query(
            db, variable_id, identifier_id, start_date, end_date, location_id, weeks=True
        )
        ret = {}
        locs = get_locations(db.session)
        if result:
            for r in result[level]:
                ret[locs[r].name] = {"total": result[level][r]["total"],
                                     "weeks": result[level][r]["weeks"],
                                     "id": r}

        return ret
Ejemplo n.º 8
0
def get_locations_by_level(level, only_loc):
    """
    Returns all the locations with the given level. If only_loc is
    given we only include children of only_loc.If we ask for the clinic
    level we also require that the clinic sends case reports

    Args:
        level: clinic, district or region
        only_loc: location to restrict wich locations are included

    Returns:
        names: {id: name}
    """
    locations = abacus_util.get_locations(db.session)
    names = {}
    for l in locations.values():
        if (l.level == level and
            (not only_loc or abacus_util.is_child(only_loc, l.id, locations))
                and (level != "clinic" or l.case_report)):
            names[l.id] = l.name
    return names
Ejemplo n.º 9
0
    def get(self):
        # First get clinics and total population
        locs = get_locations(db.session)
        refugee_clinics = get_children(1, locs, clinic_type="Refugee")
        tot_pop = 0
        clinic_map = []
        for clinic in refugee_clinics:
            result = get_latest_category("population", clinic,
                                         datetime(2015, 1, 1), datetime.now())
            clinic_pop = 0
            if (result):
                clinic_pop = sum(
                    [sum(result[x].values()) for x in result.keys()])

                tot_pop += clinic_pop
            geo = to_shape(locs[clinic].point_location)
            clinic_map.append({"value": clinic_pop,
                               "geolocation": [geo.y, geo.x],
                               "clinic": locs[clinic].name,
                               "location_id": clinic})
        return clinic_map
Ejemplo n.º 10
0
    def get(self, variable_id):

        ir = IncidenceRate()

        incidence_rates = ir.get(variable_id, "clinic")

        locations = get_locations(db.session)
        ret = {}
        for clinic in incidence_rates.keys():
            if incidence_rates[clinic]:
                print(clinic)

                if locations[clinic].point_location is not None:
                    geo = to_shape(locations[clinic].point_location)
                    ret[clinic] = {
                        "value": incidence_rates[clinic],
                        "geolocation": [geo.y, geo.x],
                        # Leaflet uses LatLng
                        "clinic": locations[clinic].name
                    }

        return ret
Ejemplo n.º 11
0
def export_data(uuid, allowed_location, use_loc_ids=False, param_config_yaml=yaml.dump(config)):
    """
    Exports the data table from db

    Inserts finished file in to databse

    Args:
       uuid: uuid for download
       use_loc_ids: If we use names are location ids
    """

    db, session = get_db_engine()
    status = DownloadDataFiles(
        uuid=uuid,
        generation_time=datetime.now(),
        type="data",
        success=0,
        status=0
    )
    session.add(status)
    session.commit()

    results = session.query(
        func.distinct(
            func.jsonb_object_keys(Data.variables)))
    variables = []
    for row in results:
        variables.append(row[0])
    locs = get_locations(session)
    fieldnames = ["id", "zone", "country", "region",
                  "district", "clinic", "zone_id", "country_id", "region_id",
                  "district_id", "clinic_id", "clinic_type",
                  "geolocation", "date", "uuid"] + list(variables)
    dict_rows = []

    filename = base_folder + "/exported_data/" + uuid + "/data"
    os.mkdir(base_folder + "/exported_data/" + uuid)
    output = open(filename + ".csv", "w")
    writer = csv.DictWriter(output, fieldnames, extrasaction="ignore")
    writer.writeheader()
    results = session.query(Data).yield_per(500)
    i = 0
    for row in results:
        dict_row = dict(
            (col, getattr(row, col)) for col in row.__table__.columns.keys()
        )

        for l in ["country", "zone", "region", "district", "clinic"]:
            if dict_row[l]:
                dict_row[l + "_id"] = dict_row[l]
                dict_row[l] = locs[dict_row[l]].name

        dict_row.update(dict_row.pop("variables"))
        dict_rows.append(dict_row)
        if i % 1000 == 0:
            writer.writerows(dict_rows)
            dict_rows = []
        i += 1
    writer.writerows(dict_rows)
    status.status = 1
    status.success = 1
    session.commit()
    return True
Ejemplo n.º 12
0
    def get(self, location, start_date=None, end_date=None):

        start_date, end_date = fix_dates(start_date, end_date)
        self.locs = get_locations(db.session)
        clinics = get_children(parent=location, locations=self.locs, require_case_report=True)
        kit_contents = db.session.query(CalculationParameters.parameters) \
            .filter(CalculationParameters.name == 'medicine_kits') \
            .one()[0]
        barcode_category = 'barcode_prescription'
        conditions = [Data.categories.has_key(barcode_category), Data.clinic.in_(clinics)]

        # Get first and last prescription for a clinic and medicine without time constraints
        first_last_prescr_query = db.session.query(Data.clinic,
                                                   Data.categories[barcode_category].astext,
                                                   func.count(Data.id),
                                                   func.min(Data.date),
                                                   func.max(Data.date))
        first_last_prescr_query = first_last_prescr_query.filter(*conditions)
        first_last_prescr_query = first_last_prescr_query.group_by(Data.clinic,
                                                                   Data.categories[barcode_category].astext)

        # Get first and last prescription for a clinic without time constraints
        clinic_info = db.session.query(Data.clinic,
                                       func.count(Data.id),
                                       func.min(Data.date),
                                       func.max(Data.date))
        clinic_info = clinic_info.filter(*conditions).group_by(Data.clinic)


        # Get number of prescriptions within time constraints
        date_conditions = [Data.date >= start_date, Data.date < end_date]
        prescription_in_date_range_query = db.session.query(Data.clinic,
                                                            Data.categories[barcode_category].astext,
                                                            func.count(Data.id))
        prescription_in_date_range_query = prescription_in_date_range_query.filter(*conditions)
        prescription_in_date_range_query = prescription_in_date_range_query.filter(*date_conditions)
        prescription_in_date_range_query = prescription_in_date_range_query.group_by(Data.clinic, Data.categories[barcode_category].astext)

        prescriptions = {
            'clinic_table': [],
            'medicine_table': [],
            'clinic_table_title': 'Prescribing clinics',
            'clinic_data': {}
        }

        # Restructure the DB return sets into a JSON
        for prescription in first_last_prescr_query.all():

            location_id = prescription[0]
            location_id_str = str(location_id)
            medicine_key = prescription[1]
            prescription_count = prescription[2]
            prescription_min_date = prescription[3]
            prescription_max_date = prescription[4]

            # if the medicine type is not configured to be reported, skip
            medicine_kit_details = kit_contents.get(medicine_key)
            if not medicine_kit_details:
                continue

            # get number of kits in the clinic
            kits_in_clinic = self._get_number_of_kits_in_clinic(location_id)

            # If clinic is not in JSON yet
            prescription_for_clinic = prescriptions['clinic_data'].setdefault(location_id_str, {})
            prescription_for_clinic[medicine_key] = {
                "min_date": prescription_min_date.strftime("%Y-%m-%d"),
                "max_date": prescription_max_date.strftime("%Y-%m-%d"),
                "total_prescriptions": prescription_count,
                "inventory":
                    (medicine_kit_details["total"] * kits_in_clinic
                     if medicine_kit_details["tablets_in_kit"] == ""
                     else int(medicine_kit_details["tablets_in_kit"]) * kits_in_clinic - prescription_count
                     ),
                "depletion":
                    (prescription_count / (float(medicine_kit_details["total"]) * kits_in_clinic)
                     if medicine_kit_details["tablets_in_kit"] == ""
                     else prescription_count / (float(medicine_kit_details["tablets_in_kit"]) * kits_in_clinic)
                     ),
                "stock":
                    (1 - prescription_count / (float(medicine_kit_details["total"]) * kits_in_clinic)
                     if medicine_kit_details["tablets_in_kit"] == ""
                     else 1 - prescription_count / (float(medicine_kit_details["tablets_in_kit"]) * kits_in_clinic)
                     ),
            }

        # Assign the number of prescriptions to data object
        for prescription in prescription_in_date_range_query.all():
            str_prescription_location = str(prescription[0])
            medicine_key = str(prescription[1])
            prescription_count = prescription[2]
            prescription_for_location = prescriptions['clinic_data'].setdefault(str_prescription_location, {})
            medicine = prescription_for_location.setdefault(medicine_key, {})
            medicine['prescriptions'] = prescription_count

        barcode_variables = get_variables(barcode_category)
        # create clinic table info
        for prescription in clinic_info.all():
            location_id = prescription[0]
            location_id_str = str(location_id)
            prescription_min_date = prescription[2]
            prescription_max_date = prescription[3]

            prescriptions_for_location = prescriptions['clinic_data'].setdefault(location_id_str, {})
            highest_depletion = find_highest_depletion(prescriptions_for_location)
            if highest_depletion:
                depletion_round_percent = round(highest_depletion['depletion'] * 100, 1)
                prescriptions['clinic_table'].append({
                    "clinic_id": location_id_str,
                    "clinic_name": self.locs[location_id].name,
                    "min_date": prescription_min_date.strftime("%Y-%m-%d"),
                    "max_date": prescription_max_date.strftime("%Y-%m-%d"),
                    "most_depleted_medicine": barcode_variables[highest_depletion['medicine']],
                    "depletion": highest_depletion['depletion'],
                    "str_depletion": str(depletion_round_percent) + '%'
                })

        # create medicine table info
        for clinic in prescriptions['clinic_data']:
            for medicine_key, medicine in prescriptions['clinic_data'][clinic].items():
                kit_details_for_medicine = kit_contents.get(medicine_key, {})
                if kit_details_for_medicine.get('tablets_in_kit', '') != '':
                    medicine_round_stock_percentage = round(medicine['stock'] * 100, 1)
                    prescriptions['medicine_table'].append({
                        "clinic_id": clinic,
                        "clinic_name": self.locs[int(clinic)].name,
                        "medicine_name": barcode_variables[medicine_key],
                        "min_date": medicine['min_date'],
                        "max_date": medicine['max_date'],
                        "stock": medicine['stock'],
                        "str_stock": str(medicine_round_stock_percentage) + '%',
                        "old_str_stock": (
                            "-"
                            if kit_contents[medicine_key]["tablets_in_kit"] == ""
                            else str(medicine_round_stock_percentage) + '%'
                        ),
                        "total_prescriptions": medicine['total_prescriptions']
                    })

        return prescriptions
Ejemplo n.º 13
0
    def get(self, only_case_reports=True):
        # Load filters supplied in GET args
        inc_case_types = json.loads(request.args.get('inc_case_types', '[]'))
        exc_case_types = json.loads(request.args.get('exc_case_types', '[]'))

        key = f"{inc_case_types!r}_{exc_case_types!r}"
        if key in loc_trees:
            return loc_trees[key]

        # Get location data from db and any access restrictions set by auth
        locs = get_locations(db.session)
        loc = g.allowed_location

        # Start drawing the tree
        ret = {loc: {"id": loc, "text": locs[loc].name, "nodes": []}}
        for l in sorted(locs.keys()):
            if l >= loc and is_child(loc, l, locs):
                if not only_case_reports or (locs[l].case_report == 1
                                             or not locs[l].deviceid):
                    if is_child(l, loc, locs):
                        ret.setdefault(locs[l].parent_location, {"nodes": []})

                    # Factor out the process of adding a location to the tree
                    def add_loc():
                        ret.setdefault(l, {"nodes": []})
                        ret[l].update({"id": l, "text": locs[l].name})
                        ret[locs[l].parent_location]["nodes"].append(ret[l])

                    # Determine if the location matches incl and excl criteria
                    loc_case_types = set()
                    if locs[l].case_type:
                        loc_case_types = set(locs[l].case_type)
                    inc = bool(set(inc_case_types) & loc_case_types)
                    exc = set(exc_case_types) >= loc_case_types

                    # Add the location if it is not a clinic
                    if not locs[l].level == 'clinic':
                        add_loc()
                    # Otherwise add the location if no filters provided at all
                    elif not inc_case_types and not exc_case_types:
                        add_loc()
                    # Otherwise if both filters are provided, only add loc if
                    # ...inclusion criteria is met but not exclusion criteria
                    elif inc_case_types and exc_case_types:
                        if inc and not exc:
                            add_loc()
                    # Otherwise add loc if incl criteria specified and met
                    elif inc_case_types and inc:
                        add_loc()
                    # Otherwise add loc if excl criteria specified and not met
                    elif exc_case_types and not exc:
                        add_loc()

        # Recursively clean any branches without clinics in them.
        def clean(tree):
            for child in reversed(tree['nodes']):
                clean(child)
                if not (child['nodes'] or locs[child['id']].level == 'clinic'):
                    tree['nodes'].remove(child)

        clean(ret[loc])
        loc_trees[key] = jsonify(ret[loc])
        return jsonify(ret[loc])
Ejemplo n.º 14
0
def _export_week_level_completeness(uuid, download_name, level,
                                    completeness_config, translator, param_config,
                                    start_date=None, end_date=None,
                                    wide_data_format=False):
    """
    Exports completeness data by location and week ( and year),

    Args:\n
      uuid: uuid for the download process
      download_name: Name of download file
      level: level of location
      competeness_config: Specified the completeness call we want to make
      translator: Translator
      param_config: param config
      start_date: The date to start the data set
      end_date: End date for the aggregation
      wide_data_format: If true the data is returned in the wide format, else in long format
    """
    db, session = get_db_engine()
    locs = get_locations(session)
    operation_status = OperationStatus(download_name, uuid)

    if start_date:
        start_date = parse(start_date).replace(tzinfo=None)
    if end_date:
        end_date = parse(end_date).replace(tzinfo=None)
    completeness_calls = construct_completeness_call(completeness_config[0],
                                                     level,
                                                     start_date,
                                                     end_date)
    jwt_auth_token = meerkat_libs.authenticate(
        username=param_config.server_auth_username,
        password=param_config.server_auth_password,
        auth_root=param_config.auth_root)
    if not jwt_auth_token:
        raise AttributeError("Not sucessfully logged in for api access")
    headers = {'content-type': 'application/json',
               'authorization': 'Bearer {}'.format(jwt_auth_token)}
    data = []

    year_label = translator.gettext("Year")
    location_label = translator.gettext(level.title())
    week_label = translator.gettext("Week")
    district_label = translator.gettext("District")
    completeness_config_label = translator.gettext(completeness_config[1])

    for call, year, start_week in completeness_calls:
        api_result = requests.get(param_config.api_root + call, headers=headers)
        timeline = api_result.json()["timeline"]
        max_per_week = int(call.split("/")[4])  # Extract the maximum number from api call
        for location in timeline:
            loc_id = int(location)
            for week in range(len(timeline[location]["weeks"])):
                data.append({year_label: year,
                             location_label: locs[loc_id].name,
                             week_label: week + start_week,
                             completeness_config_label: timeline[location]["values"][week] / max_per_week * 100
                             })
                if level == "clinic" and loc_id != 1:
                    data[-1][district_label] = locs[locs[loc_id].parent_location].name

    filename = base_folder + "/exported_data/" + uuid + "/" + download_name
    os.mkdir(base_folder + "/exported_data/" + uuid)
    df = pandas.DataFrame(data)
    if wide_data_format:
        if level == "clinic":
            index_labels = [year_label, district_label, location_label, week_label]
        else:
            index_labels = [year_label, location_label, week_label]
        df = df.set_index(index_labels).unstack()
    df.to_csv(filename + ".csv")
    df.to_excel(filename + ".xlsx")
    operation_status.submit_operation_success()
Ejemplo n.º 15
0
    def get(self,
            variable,
            location,
            exclude_case_type=None,
            num_weeks=0,
            include_case_type=None,
            include_clinic_type=None,
            require_case_report=True):

        inc_case_types = set(
            json.loads(request.args.get('inc_case_types', '[]')))
        exc_case_types = set(
            json.loads(request.args.get('exc_case_types', '[]')))

        if not is_allowed_location(location, g.allowed_location):
            return {}

        if require_case_report in [0, "0"]:
            require_case_report = False
        if num_weeks == "0":
            num_weeks = 0

        if exclude_case_type in [0, "0", "None"]:
            exclude_case_type = None
        if include_case_type in [0, "0", "None"]:
            include_case_type = None
        if include_clinic_type in [0, "0", "None"]:
            include_clinic_type = None

        locations = abacus_util.get_locations(db.session)
        location = int(location)
        clinics = get_children(location,
                               locations,
                               require_case_report=require_case_report)
        conditions = [Data.variables.has_key(variable)]
        if num_weeks:
            epi_year, epi_week = abacus_util.epi_week.epi_week_for_date(
                datetime.today())
            start_date = meerkat_abacus.util.epi_week.epi_week_start_date(
                epi_year,
                int(epi_week) - int(num_weeks))
            end_date = meerkat_abacus.util.epi_week.epi_week_start_date(
                epi_year, epi_week)
            conditions.append(Data.date >= start_date)
            conditions.append(Data.date < end_date)
        exclude_list = []
        if exclude_case_type and "code:" in exclude_case_type:
            query = db.session.query(Data.clinic).filter(
                Data.variables.has_key(exclude_case_type.split(":")[1]))
            exclude_list = [r[0] for r in query.all()]

        query = db.session.query(Data.clinic).filter(*conditions)
        clinics_with_variable = [r[0] for r in query.all()]
        non_reporting_clinics = []

        if include_clinic_type:
            if "," in include_clinic_type:
                include_clinic_type = set(include_clinic_type.split(","))
            else:
                include_clinic_type = set([include_clinic_type])

        if include_case_type:
            if "," in include_case_type:
                include_case_type = set(include_case_type.split(","))
            else:
                include_case_type = set([include_case_type])
            if inc_case_types:
                include_case_type = inc_case_types.union(include_case_type)
        elif inc_case_types:
            include_case_type = inc_case_types

        if exclude_case_type and "code:" not in exclude_case_type:
            if "," in exclude_case_type:
                exclude_case_type = set(exclude_case_type.split(","))
            else:
                exclude_case_type = set([exclude_case_type])
            if exc_case_types:
                exclude_case_type = exc_case_types.union(exclude_case_type)
        elif exc_case_types:
            exclude_case_type = exc_case_types

        for clinic in clinics:
            if include_clinic_type and locations[
                    clinic].clinic_type not in include_clinic_type:
                continue
            if clinic not in clinics_with_variable:
                if len(exclude_list) > 0:
                    if clinic in exclude_list:
                        continue
                if include_case_type:
                    if set(locations[clinic].case_type) & include_case_type:
                        non_reporting_clinics.append(clinic)
                elif exclude_case_type and "code:" not in exclude_case_type:
                    if not set(
                            locations[clinic].case_type) & exclude_case_type:
                        non_reporting_clinics.append(clinic)

                else:
                    non_reporting_clinics.append(clinic)
        return {"clinics": non_reporting_clinics}
Ejemplo n.º 16
0
def export_category(uuid, form_name, category, download_name,
                    variables, data_type, allowed_location,
                    start_date=None, end_date=None, language="en",
                    param_config_yaml=yaml.dump(config)):
    """
    We take a variable dictionary of form field name: display_name.
    There are some special commands that can be given in the form field name:

    * icd_name$category will translate an icd code in icd_code to names given
       by the variables in category
    * clinic,region and district will give this location information

    * the $translate keyword can be used to translate row values to other ones.
       I.e to change gender from male, female to M, F

    * field$month, field$year, field$epi_week: will extract the month, year
       or epi_week from the field

    * alert_links$alert_investigation$field: will get the field in the c
       orrepsonding alert_investigation

    Inserts the resulting csv file in the database

    Args:\n
       category: category to match\n
       variables: variable dictionary\n

    """
    # Runner loads the config object through a function parameter.
    param_config = yaml.load(param_config_yaml)
    country_config = param_config.country_config
    config_directory = param_config.config_directory

    # Some strings in download data need to be translated
    translation_dir = country_config.get("translation_dir", None)
    t = get_translator(param_config, language)

    db, session = get_db_engine()
    db2, session2 = get_db_engine()
    status = DownloadDataFiles(
        uuid=uuid,
        generation_time=datetime.now(),
        type=download_name,
        success=0,
        status=0
    )
    session.add(status)
    session.commit()
    res = session.query(AggregationVariables).filter(
        AggregationVariables.category.has_key(category)
    )


    locs = get_locations(session)
    data_keys = []
    cat_variables = {}
    for r in res:
        data_keys.append(r.id)
        cat_variables[r.id] = r
    if len(data_keys) == 0:
        status.status = 1
        session.commit()
    return_keys = []
    translation_dict = {}
    icd_code_to_name = {}
    link_ids = []
    min_translation = {}

    def add_translations_from_file(details):
        # Load the csv file and reader
        file_path = '{}api/{}'.format(config_directory, details['dict_file'])
        csv_file = open(file_path, 'rt')
        reader = csv.reader(csv_file)
        # Establish which column in each row we're translating from and to.
        headers = next(reader)
        from_index = headers.index(details['from'])
        to_index = headers.index(details['to'])
        # Add translations to the translation dictionary.
        trans_dict = {}
        for row in reader:
            trans_dict[row[from_index]] = row[to_index]
        return trans_dict

    # DB conditions
    conditions = [
        or_(Data.variables.has_key(key) for key in data_keys)
    ]
    if data_type:
        conditions.append(Data.type == data_type)
    if start_date:
        conditions.append(Data.date >= parse(start_date))
    if end_date:
        conditions.append(Data.date <= parse(end_date))

    # Set up icd_code_to_name if needed and determine if
    # alert_links are included
    query_links = False

    to_columns_translations = {}
    for v in variables:

        if "every$" in v[0]:
            # Want to include all the fields in the dictionary
            # in v[1] for all the links in the name

            # First determine the maximum number of links
            link_name = v[0].split("$")[1]
            length_q = session.query(
                func.max(func.jsonb_array_length(Data.links[link_name]))).filter(
                *conditions)
            length = length_q.first()[0]
            for i in range(length):
                for variable in v[1]:
                    name = link_name + "_" + str(i) + " " + variable[1]
                    return_keys.append(name)
                    translation_dict[name] = "many_links&" + link_name + "&" + str(i) + "&" + variable[0]
            query_links = link_name
        else:
            return_keys.append(v[1])
            translation_dict[v[1]] = v[0]
        if "icd_name$" in v[0]:
            category = v[0].split("$")[-1]
            cat_variables = {}
            res = session.query(AggregationVariables).filter(
                AggregationVariables.category.has_key(category)
            )
            for r in res:
                cat_variables.setdefault(r.id, [])
                cat_variables[r.id].append(r)
            icd_code_to_name[v[0]] = {}
            for i in cat_variables.keys():
                for var in cat_variables[i]:
                    condition = var.condition
                    if ";" in condition:
                        condition = condition.split(";")[0]
                    if "," in condition:
                        # If a variable have many icd codes
                        # we take all of them into account
                        codes = condition.split(",")
                    else:
                        codes = [condition]
                    for c in codes:
                        if c:
                            icd_code_to_name[v[0]][c.strip()] = var.name
        if "$translate" in v[0]:
            split = v[0].split("$")
            field = "$".join(split[:-1])
            trans = split[-1]
            tr_dict = json.loads(trans.split(";")[1].replace("'", '"'))
            # If the json specifies file details, load translation from file.
            if tr_dict.get('dict_file', False):
                min_translation[v[1]] = add_translations_from_file(tr_dict)
            else:
                min_translation[v[1]] = tr_dict
            v[0] = field
            translation_dict[v[1]] = v[0]

        if "$to_columns" in v[0]:
            # Create columns of every possible value
            split = v[0].split("$")
            field = "$".join(split[:-1])
            trans = split[-1]
            tr_dict = {}
            if ";" in trans:
                tr_dict = json.loads(trans.split(";")[1].replace("'", '"'))

            # If the json specifies file details, load translation from file.

            # Get all possible options from the DB

            results = session2.query(
                func.distinct(
                    func.regexp_split_to_table(
                        form_tables(param_config)[form_name].data[field].astext, ' '))).join(
                Data,
                Data.uuid == form_tables(param_config)[form_name].uuid).filter(
                *conditions).all()
            if tr_dict.get('dict_file', False):
                translations = add_translations_from_file(tr_dict)
            else:
                translations = {}
            return_keys.pop()
            for r in results:
                if r[0]:
                    name = v[1] + " " + translations.get(r[0], r[0])
                    if name not in return_keys:
                        return_keys.append(name)
                        if name in translation_dict:
                            translation_dict[name] = translation_dict[name] + "," + r[0]
                        else:
                            translation_dict[name] = field + "$to_columns$" + r[0]

        if "gen_link$" in v[0]:
            link_ids.append(v[0].split("$")[1])
    if "uuid" not in return_keys:
        return_keys.append("uuid")
        translation_dict["uuid"] = "meta/instanceID"
    link_ids = set(link_ids)
    links_by_type, links_by_name = get_links(config_directory +
                                             country_config["links_file"])
    # DB query, with yield_per(200) for memory reasons
    columns = [Data, form_tables(param_config)[form_name]]

    link_id_index = {}
    joins = []

    if query_links:
        link_data = shelve.open(base_folder + "/exported_data/" + uuid)
        link_data_query = session.query(Links).filter(Links.type == link_name).yield_per(300)
        for row in link_data_query:
            link_data[row.uuid_to] = row.data_to

    for i, l in enumerate(link_ids):
        form = aliased(form_tables(param_config)[links_by_name[l]["to_form"]])
        joins.append((form, Data.links[(l, -1)].astext == form.uuid))
        link_id_index[l] = i + 2
        columns.append(form.data)

    number_query = session2.query(func.count(Data.id)).join(
        form_tables(param_config)[form_name], Data.uuid == form_tables(param_config)[form_name].uuid)

    results = session2.query(*columns).join(
        form_tables(param_config)[form_name], Data.uuid == form_tables(param_config)[form_name].uuid)
    for join in joins:
        results = results.outerjoin(join[0], join[1])

    total_number = number_query.filter(*conditions).first()[0]
    results = results.filter(*conditions).yield_per(200)

    locs = get_locations(session)
    list_rows = []

    filename = base_folder + "/exported_data/" + uuid + "/" + download_name
    os.mkdir(base_folder + "/exported_data/" + uuid)
    csv_content = open(filename + ".csv", "w")
    csv_writer = csv.writer(csv_content)
    csv_writer.writerows([return_keys])

    # XlsxWriter with "constant_memory" set to true, flushes mem after each row
    xls_content = open(filename + ".xlsx", "wb")
    xls_book = xlsxwriter.Workbook(xls_content, {'constant_memory': True})
    xls_sheet = xls_book.add_worksheet()

    # xls_sheet = pyexcel.Sheet([keys])

    # Little utility function write a row to file.
    def write_xls_row(data, row, sheet):
        for cell in range(len(data)):
            xls_sheet.write(row, cell, data[cell])

    write_xls_row(return_keys, 0, xls_sheet)

    i = 0

    def _list_category_variables(category, data_row):
        """
        Lists the variables from the specified category that are assigned to
        the specified row. This can be used to create data columns such as
        'Age Group' using 'category$ncd_age'.
        """
        # Get the category's variables' data, indexed by ID.
        cat_variables = {}
        variable_list = ""
        db_results = session.query(AggregationVariables).filter(
            AggregationVariables.category.has_key(category)
        )
        for variable in db_results:
            cat_variables[variable.id] = variable
        # Build a string listing the row's variables from specified category.
        for var_id, var in cat_variables.items():
            if var_id in r[0].variables:
                variable_list += var.name + ", "
        # Remove the last comma and space.
        return variable_list[:-2]

    # Prepare each row
    for r in results:
        list_row = [''] * len(return_keys)
        if not is_child(allowed_location, r[0].clinic, locs):
            continue

        dates = {}
        for k in return_keys:
            form_var = translation_dict[k]
            index = return_keys.index(k)

            raw_data = r[1].data
            if "many_links&" in form_var:
                link_name, number, form_var = form_var.split("&")[1:]
                number = int(number)
                if link_name in r[0].links:
                    links = r[0].links[link_name]
                    if len(links) >= number + 1:
                        link_uuid = links[number]
                        raw_data = link_data[link_uuid]
                    else:
                        list_row[index] = None
                        continue

                else:
                    list_row[index] = None
                    continue

            if "icd_name$" in form_var:
                fields = form_var.split("$")
                if len(fields) > 2:
                    field = fields[1]
                else:
                    field = "icd_code"
                if raw_data[field] in icd_code_to_name[form_var]:
                    list_row[index] = icd_code_to_name[form_var][raw_data[
                        field]]
                else:
                    list_row[index] = None
            elif form_var == "clinic":
                list_row[index] = locs[r[0].clinic].name
            elif form_var == "region":
                list_row[index] = locs[r[0].region].name
            elif form_var == "zone":
                list_row[index] = locs[r[0].zone].name
            elif form_var == "district":
                if r[0].district:
                    list_row[index] = locs[r[0].district].name
                else:
                    list_row[index] = None
            elif "$year" in form_var:
                field = form_var.split("$")[0]
                if field in raw_data and raw_data[field]:
                    if field not in dates:
                        dates[field] = parse(raw_data[field])
                    list_row[index] = dates[field].year
                else:
                    list_row[index] = None
            elif "$month" in form_var:
                field = form_var.split("$")[0]
                if field in raw_data and raw_data[field]:
                    if field not in dates:
                        dates[field] = parse(raw_data[field])
                    list_row[index] = dates[field].month
                else:
                    list_row[index] = None
            elif "$day" in form_var:
                field = form_var.split("$")[0]
                if field in raw_data and raw_data[field]:
                    if field not in dates:
                        dates[field] = parse(raw_data[field])
                    list_row[index] = dates[field].day
                else:
                    list_row[index] = None
            elif "$quarter" in form_var:
                field = form_var.split("$")[0]
                if raw_data.get(field):
                    if field not in dates:
                        dates[field] = parse(raw_data[field])
                    quarter = 1 + (dates[field].month - 1)//3
                    list_row[index] = quarter
                else:
                    list_row[index] = None
            elif "$epi_week" in form_var:
                field = form_var.split("$")[0]
                if field in raw_data and raw_data[field]:
                    if field not in dates:
                        dates[field] = parse(raw_data[field])
                    list_row[index] = epi_week_for_date(dates[field])[1]
                else:
                    list_row[index] = None

            # A general framework for referencing links in the
            # download data.
            # link$<link id>$<linked form field>
            elif "gen_link$" in form_var:
                link = form_var.split("$")[1]
                link_index = link_id_index[link]
                if r[link_index]:
                    list_row[index] = r[link_index].get(
                        form_var.split("$")[2],
                        None
                    )
                else:
                    list_row[index] = None

            elif "code" == form_var.split("$")[0]:
                # code$cod_1,cod_2,Text_1,Text_2$default_value
                split = form_var.split("$")
                codes = split[1].split(",")
                text = split[2].split(",")
                if len(split) > 3:
                    default_value = split[3]
                else:
                    default_value = None
                final_text = []
                for j in range(len(codes)):
                    if codes[j] in r[0].variables:
                        final_text.append(text[j])
                if len(final_text) > 0:
                    list_row[index] = " ".join(final_text)
                else:
                    list_row[index] = default_value

            elif "category" == form_var.split("$")[0]:
                list_row[index] = _list_category_variables(
                    form_var.split("$")[1],
                    r
                )

            elif "code_value" == form_var.split("$")[0]:
                code = form_var.split("$")[1]
                if code in r[0].variables:
                    list_row[index] = float(r[0].variables[code])
                else:
                    list_row[index] = None
            elif "value" == form_var.split(":")[0]:
                list_row[index] = form_var.split(":")[1]
            elif "$to_columns$" in form_var:
                int_has_code = 0
                field = form_var.split("$")[0]
                codes = form_var.split("$")[-1].split(",")
                str_elements = raw_data.get(field)
                if type(str_elements) == str:
                    elements = str_elements.split(" ")
                    has_code = any(code in elements for code in codes)
                    int_has_code = int(has_code)
                list_row[index] = int_has_code
            else:
                if form_var.split("$")[0] in raw_data:
                    list_row[index] = raw_data[form_var.split("$")[0]]
                else:
                    list_row[index] = None

            # Standardise date formating
            if "$date" in form_var:
                field = form_var.split("$")[0]
                if list_row[index]:
                    if field not in dates:
                        dates[field] = parse(list_row[index])
                    list_row[index] = dates[field].strftime(
                        "%d/%m/%Y"
                    )
                else:
                    list_row[index] = None

            # If the final value is a float, round to 2 dp.
            # This proceedure ensures integers are shown as integers.
            # Also accepts string values.
            try:
                a = float(list_row[index])
                b = int(float(list_row[index]))
                if a == b:
                    list_row[index] = b
                else:
                    list_row[index] = round(a, 2)
            except (ValueError, TypeError):
                pass

            # If a translation dictionary is defined in which the key exists...
            if min_translation and k in min_translation and list_row[index]:
                tr_dict = min_translation[k]
                if list_row[index] in tr_dict:
                    list_row[index] = tr_dict[list_row[index]]
                else:
                    parts = [x.strip() for x in str(list_row[index]).split(' ')]
                    for x in range(len(parts)):
                        # Get the translation using the appropriate key.
                        # If that doesn't exist get the wild card key: *
                        # If that doesn't exist just return the value
                        parts[x] = str(
                            tr_dict.get(parts[x], tr_dict.get('*', parts[x]))
                        )
                    list_row[index] = ' '.join(list(filter(bool, parts)))

            if translation_dir and language != "en" and list_row[index]:
                list_row[index] = t.gettext(list_row[index])

        list_rows.append(list_row)
        # Can write row immediately to xls file as memory is flushed after.
        write_xls_row(list_row, i + 1, xls_sheet)
        # Append the row to list of rows to be written to csv.
        if i % 1000 == 0:
            logging.warning("{} rows completed...".format(i))
            csv_writer.writerows(list_rows)
            list_rows = []
            status.status = i / total_number
            session.commit()
        i += 1
    csv_writer.writerows(list_rows)

    csv_content.close()
    xls_book.close()

    xls_content.close()
    status.status = 1
    status.success = 1
    session.commit()

    if query_links:
        link_data.close()
        dir_path = os.path.dirname(os.path.realpath(__file__))
        filename = dir_path + "/exported_data/" + uuid
        logging.warning("Filename: " + filename)
        if os.path.exists(filename + ".dir"):
            os.remove(filename + ".dir")
        if os.path.exists(filename + ".dat"):
            os.remove(filename + ".dat")
    return True
Ejemplo n.º 17
0
def export_data_table(uuid, download_name,
                      restrict_by, variables, group_by,
                      location_conditions=None,
                      start_date=None, end_date=None,
                      wide_data_format=False,
                      param_config_yaml=yaml.dump(config)):
    """
    Export an aggregated data table restricted by restrict by,

    Args:\n
      uuid: uuid for the download process
      variables: the variables we want to aggregate
      group_by: The data to group by (clinic, epi_week)
      data_orientation: long or wide data set
      start_date: The date to start the data set
      end_date: End date for the aggregation
      wide_data_format: If true the data is returned in the wide format, else in long format
      param_config: The configuration values
    """
    return_keys = []
    db, session = get_db_engine()
    locs = get_locations(session)
    list_rows = []
    operation_status = OperationStatus(download_name, uuid)
    level = "region"
    columns = []
    groups = []
    location_subs = []
    only_latest_from_clinic_in_week = False
    if "only_latest_from_clinic_in_week:" in restrict_by:
        restrict_by_variable = restrict_by.split(":")[1]
        only_latest_from_clinic_in_week = True
    else:
        restrict_by_variable = restrict_by

    for i, v in enumerate(group_by):
        field = v[0]
        if ":location" in field:
            field_column = field.split(":")[0]
            level = field_column
            location_subs.append(i)
        else:
            field_column = field

        columns.append(getattr(Data, field_column))
        groups.append(getattr(Data, field_column))
        return_keys.append(v[1])
    conditions = [Data.variables.has_key(restrict_by_variable)]
    if start_date:
        start_date = parse(start_date).replace(tzinfo=None)
        conditions.append(Data.date >= start_date)
    if end_date:
        end_date = parse(end_date).replace(tzinfo=None)
        conditions.append(Data.date <= end_date)
    for v in variables:
        if only_latest_from_clinic_in_week:
            columns.append(Data.variables[v[0]].astext.cast(Float))
        else:
            columns.append(func.sum(Data.variables[v[0]].astext.cast(Float)))
        return_keys.append(v[1])

    if only_latest_from_clinic_in_week:
        conditions.append(Data.variables.has_key(restrict_by_variable))
        result =  session.query(*columns).distinct(Data.clinic).filter(*conditions).order_by(Data.clinic).order_by(Data.date.desc())
    else:
        result = session.query(*columns).filter(*conditions).group_by(*groups)

    filename = base_folder + "/exported_data/" + uuid + "/" + download_name
    os.mkdir(base_folder + "/exported_data/" + uuid)
    i = 0
    for row in result:
        row_list = list(row)
        location_condition = True
        for l in location_subs:
            if row_list[l]:
                if location_conditions:
                    tmp = getattr(locs[row_list[l]], location_conditions[0][0])
                    if location_conditions[0][1] in tmp:
                        location_condition = False
                row_list[l] = locs[row_list[l]].name
        if location_condition:
            row_list = [x if x is not None else 0 for x in row_list]
            list_rows.append(row_list)
            i += 1

    df = pandas.DataFrame(list_rows, columns=return_keys)
    if wide_data_format:
        df = df.set_index(return_keys[:-len(variables)]).unstack().fillna(0)

    df.to_csv(filename + ".csv")
    df.to_excel(filename + ".xlsx")
    operation_status.submit_operation_success()

    return True
Ejemplo n.º 18
0
    def get(self,
            variable,
            location,
            number_per_week,
            weekend=None,
            start_week=1,
            end_date=None,
            non_reporting_variable=None,
            sublevel=None):
        inc_case_types = set(
            json.loads(request.args.get('inc_case_types', '[]')))
        exc_case_types = set(
            json.loads(request.args.get('exc_case_types', '[]')))
        if not is_allowed_location(location, g.allowed_location):
            return {}

        if not non_reporting_variable:
            non_reporting_variable = variable

        number_per_week = int(number_per_week)
        locs = abacus_util.get_locations(db.session)
        location = int(location)
        location_type = locs[location].level

        parsed_sublevel = self._get_sublevel(location_type, sublevel)

        conditions = [
            Data.variables.has_key(variable),
            or_(loc == location
                for loc in (Data.country, Data.zone, Data.region,
                            Data.district, Data.clinic)),
        ]
        if exc_case_types and exc_case_types != []:
            conditions.append(~Data.case_type.contains(exc_case_types))
        if inc_case_types and inc_case_types != []:
            conditions.append(Data.case_type.overlap(inc_case_types))
        if "tag" in request.args.keys():
            conditions.append(Data.tags.has_key(request.args["tag"]))
        # get the data
        data = pd.read_sql(
            db.session.query(Data.region, Data.zone, Data.district,
                             Data.clinic, Data.date,
                             Data.variables[variable].label(variable)).filter(
                                 *conditions).statement, db.session.bind)

        if len(data) == 0:
            return jsonify(self.__empty_response)
        # We drop duplicates so each clinic can only have one record per day
        data = data.drop_duplicates(
            subset=["region", "district", "clinic", "date", variable])
        shifted_end_date, timeseries_freq = self._get_shifted_end_date_and_timeseries_frequency(
            end_date)

        beginning_of_epi_start_week = self._get_epi_week_start(
            shifted_end_date, start_week)

        if parsed_sublevel:
            # We first create an index with sublevel, clinic, dates
            # Where dates are the dates after the clinic started reporting
            sublocations = []
            for l in locs.values():
                if abacus_util.is_child(location, l.id,
                                        locs) and l.level == parsed_sublevel:
                    sublocations.append(l.id)
            tuples = []
            for name in sublocations:
                for clinic in get_children(name, locs):
                    if locs[clinic].case_report:
                        if inc_case_types and not set(
                                locs[clinic].case_type) & inc_case_types:
                            continue
                        if exc_case_types and set(
                                locs[clinic].case_type) >= exc_case_types:
                            continue
                        start_date = locs[clinic].start_date
                        if start_date < beginning_of_epi_start_week:
                            start_date = beginning_of_epi_start_week
                        if shifted_end_date - start_date < timedelta(days=7):
                            start_date = (shifted_end_date -
                                          timedelta(days=6)).date()

                        for date in pd.date_range(start_date,
                                                  shifted_end_date,
                                                  freq=timeseries_freq):
                            tuples.append((name, clinic, date))
            if len(tuples) == 0:
                return jsonify(self.__empty_response)

            new_index = pd.MultiIndex.from_tuples(
                tuples, names=[parsed_sublevel, "clinic", "date"])
            completeness = data.groupby([
                parsed_sublevel, "clinic",
                pd.TimeGrouper(key="date", freq=timeseries_freq, label="left")
            ]).sum().reindex(new_index)[variable].fillna(0).sort_index()

            # Drop clinics with no submissions

            clinic_sums = completeness.groupby(level=1).sum()
            zero_clinics = clinic_sums[clinic_sums == 0].index
            nr = NonReporting()
            non_reporting_clinics = nr.get(non_reporting_variable,
                                           location)["clinics"]
            completeness = completeness.drop(non_reporting_clinics, level=1)
            completeness.reindex()

            # We only want to count a maximum of number per week per week
            completeness[completeness > number_per_week] = number_per_week

            location_completeness_per_week = completeness.groupby(
                level=2).mean()
            sublocations_completeness_per_week = completeness.groupby(
                level=[0, 2]).mean()

            # Find last two weeks
            idx = pd.IndexSlice
            last_two_weeks = location_completeness_per_week.index[-1:]
            last_year = location_completeness_per_week.index[:]

            # Get sublocation completeness for last two weeks as a percentage
            completeness_last_two_weeks = sublocations_completeness_per_week.loc[
                idx[:, last_two_weeks]]
            score = completeness_last_two_weeks.groupby(
                level=0).mean() / number_per_week * 100
            completeness_last_year = sublocations_completeness_per_week.loc[
                idx[:, last_year]]
            yearly_score = completeness_last_year.groupby(
                level=0).mean() / number_per_week * 100

            # Add current location
            score[location] = location_completeness_per_week[
                last_two_weeks].mean() / number_per_week * 100
            yearly_score[location] = location_completeness_per_week.mean(
            ) / number_per_week * 100
            # Sort the timeline data
            timeline = {}
            for sl in sublocations_completeness_per_week.index.get_level_values(
                    parsed_sublevel):
                sl_time = sublocations_completeness_per_week.iloc[
                    sublocations_completeness_per_week.index.get_level_values(
                        parsed_sublevel) == sl]
                timeline[str(sl)] = {
                    "weeks": sl_time.index.get_level_values("date"),
                    "values": sl_time
                }
            # Add current location
            timeline[str(location)] = {
                "weeks": location_completeness_per_week.index,
                "values": location_completeness_per_week
            }
            # Calculate completness score for each clinic
            clinic_completeness_last_two_weeks = completeness.loc[
                idx[:, :, last_two_weeks]]
            clinic_scores = clinic_completeness_last_two_weeks.groupby(
                level=1).mean() / number_per_week * 100
            clinic_completeness_last_year = completeness.loc[idx[:, :, :]]
            clinic_yearly_scores = clinic_completeness_last_year.groupby(
                level=1).mean() / number_per_week * 100
            dates_not_reported = []  # Not needed for this level
        else:
            # Take into account clinic start_date
            if locs[location].start_date > beginning_of_epi_start_week:
                beginning_of_epi_start_week = locs[location].start_date
            not_reported_dates_begining = beginning_of_epi_start_week
            if shifted_end_date - beginning_of_epi_start_week < timedelta(
                    days=7):
                beginning_of_epi_start_week = (shifted_end_date -
                                               timedelta(days=6)).date()

            dates = pd.date_range(beginning_of_epi_start_week,
                                  shifted_end_date,
                                  freq=timeseries_freq)
            completeness = data.groupby(
                pd.TimeGrouper(
                    key="date", freq=timeseries_freq,
                    label="left")).sum().fillna(0)[variable].reindex(
                        dates).sort_index().fillna(0)

            # We only want to count a maximum of number per week per week
            completeness[completeness > number_per_week] = number_per_week

            timeline = {
                str(location): {
                    "weeks": [
                        d.isoformat()
                        for d in completeness.index.to_pydatetime()
                    ],
                    "values": [float(v) for v in completeness.values]
                }
            }
            last_two_weeks = completeness.index[-1:]
            score = pd.Series()
            score.loc[location] = completeness[last_two_weeks].mean(
            ) / number_per_week * 100
            yearly_score = pd.Series()
            yearly_score.loc[location] = completeness.mean(
            ) / number_per_week * 100

            # Sort out the dates on which nothing was reported
            # Can specify on which weekdays we expect a record

            bdays = self._get_business_days(weekend_days=weekend)

            expected_days = pd.date_range(not_reported_dates_begining,
                                          shifted_end_date,
                                          freq=bdays)

            found_dates = data["date"]
            dates_not_reported = expected_days.drop(
                found_dates.values, errors="ignore").to_pydatetime()
            dates_not_reported = [d.isoformat() for d in dates_not_reported]
            clinic_scores = None  # Not needed for this level
            clinic_yearly_scores = None  # Not needed for this level

        return jsonify({
            "score":
            series_to_json_dict(score),
            "timeline":
            timeline,
            "clinic_score":
            series_to_json_dict(clinic_scores),
            "clinic_yearly_score":
            series_to_json_dict(clinic_yearly_scores),
            "dates_not_reported":
            dates_not_reported,
            "yearly_score":
            series_to_json_dict(yearly_score)
        })
Ejemplo n.º 19
0
    def get(self,
            variable,
            group_by,
            start_date=None,
            end_date=None,
            only_loc=None,
            use_ids=None,
            date_variable=None,
            additional_variables=None,
            group_by_variables=None):

        variable = str(variable)
        if not only_loc:
            if "only_loc" in request.args:
                only_loc = request.args["only_loc"]
            else:
                only_loc = g.allowed_location
        if not is_allowed_location(only_loc, g.allowed_location):
            return {}

        start_date, end_date = fix_dates(start_date, end_date)
        if "use_ids" in request.args.keys() or use_ids:
            use_ids = True
        else:
            use_ids = False

        if date_variable:
            date_conditions = [
                func.to_date(Data.variables[date_variable].astext,
                             "YYYY-MM-DDTHH-MI-SS") >= start_date,
                func.to_date(Data.variables[date_variable].astext,
                             "YYYY-MM-DDTHH-MI-SS") < end_date
            ]
        else:
            date_conditions = [Data.date >= start_date, Data.date < end_date]

        if "location" in variable:
            location_id = variable.split(":")[1]
            conditions = date_conditions + [
                or_(loc == location_id
                    for loc in (Data.country, Data.zone, Data.region,
                                Data.district, Data.clinic))
            ]
        else:
            conditions = [Data.variables.has_key(variable)] + date_conditions
            if additional_variables:
                # add additional variable filters if there are and
                for i in additional_variables:
                    conditions.append(Data.variables.has_key(i))

            if only_loc:
                conditions += [
                    or_(loc == only_loc
                        for loc in (Data.country, Data.zone, Data.region,
                                    Data.district, Data.clinic))
                ]
        epi_year_start = meerkat_abacus.util.epi_week.epi_year_start_date(
            start_date)
        # Determine which columns we want to extract from the Data table
        columns_to_extract = [func.count(Data.id).label('value')]
        if date_variable:
            columns_to_extract.append(
                func.floor(
                    extract(
                        'days',
                        func.to_date(Data.variables[date_variable].astext,
                                     "YYYY-MM-DDTHH-MI-SS") - epi_year_start) /
                    7 + 1).label("week"))
        else:
            columns_to_extract.append(
                func.floor(
                    extract('days', Data.date - epi_year_start) / 7 +
                    1).label("week"))
        # We want to add the columns to extract based on the group_by value
        # in addition we create a names dict that translates ids to names

        if "locations" in group_by:
            # If we have locations in group_by we also specify the level at
            #  which we want to group the locations, clinic, district or region
            if ":" in group_by:
                level = group_by.split(":")[1]
            else:
                level = "clinic"

            locations = abacus_util.get_locations(db.session)
            ids = locations.keys()
            names = get_locations_by_level(level, only_loc)

            columns_to_extract += [getattr(Data, level, None)]
            group_by_query = level
        else:
            if not group_by_variables:
                names = get_variables(group_by)
            else:
                names = group_by_variables
            if len(names) == 0:
                return {}
            ids = names.keys()
            for i in ids:
                columns_to_extract.append(
                    Data.variables.has_key(str(i)).label("id" + str(i)))
            group_by_query = ",".join(["id" + str(i) for i in ids])
        if use_ids:
            names = {vid: vid for vid in names.keys()}
        start_epi_week = abacus_util.epi_week.epi_week_for_date(start_date)[1]
        end_epi_week = abacus_util.epi_week.epi_week_for_date(end_date)[1]

        # How we deal with start and end dates in different years
        if start_date.year != end_date.year:
            end_epi_week += 53 * (end_date.year - start_date.year)

        # DB Query
        results = db.session.query(*tuple(columns_to_extract)).filter(
            *conditions).group_by("week," + group_by_query)
        # Assemble return dict
        ret = {}
        for n in names.values():
            ret[n] = {
                "total": 0,
                "weeks":
                {i: 0
                 for i in range(start_epi_week, end_epi_week + 1)}
            }

        for r in results:
            # r = (number, week, other_columns_to_extract
            if "locations" in group_by:
                # r[2] = location
                if r[2]:
                    ret[names[r[2]]]["total"] += r[0]
                    ret[names[r[2]]]["weeks"][int(r[1])] = int(r[0])
            else:
                # r[2:] are the ids that the record has
                for i, i_d in enumerate(ids):
                    if r[i + 2]:
                        ret[names[i_d]]["total"] += r[0]
                        ret[names[i_d]]["weeks"][int(r[1])] = int(r[0])
        return ret