コード例 #1
0
def export_form(uuid, form, allowed_location, fields=None, param_config_yaml=yaml.dump(config)):
    """
    Export a form. If fields is in the request variable we only include
    those fields.

    Starts background export

    Args:\n
       uuid: uuid of download\n
       form: the form to export\n
       allowed_location: will extract result only for this location
       fields: Fields from form to export\n

    Returns:\n
        bool: The return value. True for success, False otherwise.\n

    """

    # Runner loads the config object through a function parameter.
    param_config = yaml.load(param_config_yaml)

    db, session = get_db_engine()
    operation_status = OperationStatus(form, uuid)
    if form not in form_tables(param_config):
        operation_status.submit_operation_failure()
        return False

    location_data = all_location_data(session)
    locs_by_deviceid = location_data[1]
    if locs_by_deviceid is None:
        operation_status.submit_operation_failure()
        return False

    if fields:
        keys = fields
    else:
        keys = __get_keys_from_db(db, form, param_config)

    xls_csv_writer = XlsCsvFileWriter(base_folder, form, uuid)
    xls_csv_writer.write_xls_row(keys)
    xls_csv_writer.write_csv_row(keys)

    query_form_data = session.query(form_tables(param_config)[form].data)
    __save_form_data(xls_csv_writer, query_form_data, operation_status, keys, allowed_location, location_data)
    operation_status.submit_operation_success()
    xls_csv_writer.flush_csv_buffer()
    xls_csv_writer.close_cvs_xls_buffers()
    return True
コード例 #2
0
ファイル: dhis2_export.py プロジェクト: Shamwayne/meerkat_api
def process_form_records(form_config, program_id):
    """
    Gets meerkat form from db and sends dhis2 events in batches.
    Batch size can be configured in dhis2_config, defaults to 100.
    :param form_config: Meerkat form config for dhis2
    :param program_id: DHIS2 program id corresponding for the form
    :return: void
    """
    status = form_config['status']
    form_name = form_config['name']
    results = session.query(form_tables()[form_name].data).all()
    event_payload_list = []
    for counter, result in enumerate(results):

        data_values, event_date, completed_date, organisation_code = __prepare_data_values(result.data, form_config)
        event_payload = {
            'program': program_id,
            'orgUnit': get_dhis2_organisations_codes_to_ids().get(organisation_code),
            'eventDate': event_date,
            'completedDate': completed_date,
            'dataValues': data_values,
            'status': status
        }
        event_payload_list.append(event_payload)
        if counter % event_batch_size == 0:
            __send_events_batch(event_payload_list)
            event_payload_list = []
    __send_events_batch(event_payload_list)
コード例 #3
0
 def get(self):
     return_data = {}
     form_tables_ = form_tables()
     for form_name, form_table in form_tables_.items():
         results = db.session.query(form_table).filter(form_table.uuid != None).first()
         if results and results.data:
             return_data[form_name] = list(results.data.keys(
             )) + ["clinic", "district", "region"]
         else:
             return_data[form_name] = []
     return return_data
コード例 #4
0
    def setUp(self):
        """Setup for testing"""
        meerkat_api.app.config['TESTING'] = True
        meerkat_api.app.config['API_KEY'] = ""
        celery_app.conf.CELERY_ALWAYS_EAGER = True
        self.app = meerkat_api.app.test_client()
        self.session = db_util.session
        for table in model.form_tables():
            self.session.query(model.form_tables()[table]).delete()
        self.session.query(model.Locations).delete()

        self.session.commit()

        db_util.insert_codes(self.session)
        db_util.insert_locations(self.session)
        db_util.insert_cases(self.session, "public_health_report")
        current_directory = os.path.dirname(__file__)
        form_data = []
        for d in util.read_csv(current_directory + "/test_data/" +
                               "demo_case.csv"):
            form_data.append(d)

        data_import.add_rows_to_db(
            "demo_case",
            form_data,
            db_util.session,
            db_util.engine,
            deviceids=["1", "2", "3", "4", "5", "6"],
        )

        dr_name = config.country_config["tables"][1]
        form_data = []
        for d in util.read_csv(current_directory + "/test_data/" +
                               "demo_alert.csv"):
            form_data.append(d)
        data_import.add_rows_to_db("demo_alert",
                                   form_data,
                                   db_util.session,
                                   db_util.engine,
                                   deviceids=["1", "2", "3", "4", "5", "6"])
コード例 #5
0
def export_category(uuid, form_name, category, download_name,
                    variables, data_type, allowed_location,
                    start_date=None, end_date=None, language="en",
                    param_config_yaml=yaml.dump(config)):
    """
    We take a variable dictionary of form field name: display_name.
    There are some special commands that can be given in the form field name:

    * icd_name$category will translate an icd code in icd_code to names given
       by the variables in category
    * clinic,region and district will give this location information

    * the $translate keyword can be used to translate row values to other ones.
       I.e to change gender from male, female to M, F

    * field$month, field$year, field$epi_week: will extract the month, year
       or epi_week from the field

    * alert_links$alert_investigation$field: will get the field in the c
       orrepsonding alert_investigation

    Inserts the resulting csv file in the database

    Args:\n
       category: category to match\n
       variables: variable dictionary\n

    """
    # Runner loads the config object through a function parameter.
    param_config = yaml.load(param_config_yaml)
    country_config = param_config.country_config
    config_directory = param_config.config_directory

    # Some strings in download data need to be translated
    translation_dir = country_config.get("translation_dir", None)
    t = get_translator(param_config, language)

    db, session = get_db_engine()
    db2, session2 = get_db_engine()
    status = DownloadDataFiles(
        uuid=uuid,
        generation_time=datetime.now(),
        type=download_name,
        success=0,
        status=0
    )
    session.add(status)
    session.commit()
    res = session.query(AggregationVariables).filter(
        AggregationVariables.category.has_key(category)
    )


    locs = get_locations(session)
    data_keys = []
    cat_variables = {}
    for r in res:
        data_keys.append(r.id)
        cat_variables[r.id] = r
    if len(data_keys) == 0:
        status.status = 1
        session.commit()
    return_keys = []
    translation_dict = {}
    icd_code_to_name = {}
    link_ids = []
    min_translation = {}

    def add_translations_from_file(details):
        # Load the csv file and reader
        file_path = '{}api/{}'.format(config_directory, details['dict_file'])
        csv_file = open(file_path, 'rt')
        reader = csv.reader(csv_file)
        # Establish which column in each row we're translating from and to.
        headers = next(reader)
        from_index = headers.index(details['from'])
        to_index = headers.index(details['to'])
        # Add translations to the translation dictionary.
        trans_dict = {}
        for row in reader:
            trans_dict[row[from_index]] = row[to_index]
        return trans_dict

    # DB conditions
    conditions = [
        or_(Data.variables.has_key(key) for key in data_keys)
    ]
    if data_type:
        conditions.append(Data.type == data_type)
    if start_date:
        conditions.append(Data.date >= parse(start_date))
    if end_date:
        conditions.append(Data.date <= parse(end_date))

    # Set up icd_code_to_name if needed and determine if
    # alert_links are included
    query_links = False

    to_columns_translations = {}
    for v in variables:

        if "every$" in v[0]:
            # Want to include all the fields in the dictionary
            # in v[1] for all the links in the name

            # First determine the maximum number of links
            link_name = v[0].split("$")[1]
            length_q = session.query(
                func.max(func.jsonb_array_length(Data.links[link_name]))).filter(
                *conditions)
            length = length_q.first()[0]
            for i in range(length):
                for variable in v[1]:
                    name = link_name + "_" + str(i) + " " + variable[1]
                    return_keys.append(name)
                    translation_dict[name] = "many_links&" + link_name + "&" + str(i) + "&" + variable[0]
            query_links = link_name
        else:
            return_keys.append(v[1])
            translation_dict[v[1]] = v[0]
        if "icd_name$" in v[0]:
            category = v[0].split("$")[-1]
            cat_variables = {}
            res = session.query(AggregationVariables).filter(
                AggregationVariables.category.has_key(category)
            )
            for r in res:
                cat_variables.setdefault(r.id, [])
                cat_variables[r.id].append(r)
            icd_code_to_name[v[0]] = {}
            for i in cat_variables.keys():
                for var in cat_variables[i]:
                    condition = var.condition
                    if ";" in condition:
                        condition = condition.split(";")[0]
                    if "," in condition:
                        # If a variable have many icd codes
                        # we take all of them into account
                        codes = condition.split(",")
                    else:
                        codes = [condition]
                    for c in codes:
                        if c:
                            icd_code_to_name[v[0]][c.strip()] = var.name
        if "$translate" in v[0]:
            split = v[0].split("$")
            field = "$".join(split[:-1])
            trans = split[-1]
            tr_dict = json.loads(trans.split(";")[1].replace("'", '"'))
            # If the json specifies file details, load translation from file.
            if tr_dict.get('dict_file', False):
                min_translation[v[1]] = add_translations_from_file(tr_dict)
            else:
                min_translation[v[1]] = tr_dict
            v[0] = field
            translation_dict[v[1]] = v[0]

        if "$to_columns" in v[0]:
            # Create columns of every possible value
            split = v[0].split("$")
            field = "$".join(split[:-1])
            trans = split[-1]
            tr_dict = {}
            if ";" in trans:
                tr_dict = json.loads(trans.split(";")[1].replace("'", '"'))

            # If the json specifies file details, load translation from file.

            # Get all possible options from the DB

            results = session2.query(
                func.distinct(
                    func.regexp_split_to_table(
                        form_tables(param_config)[form_name].data[field].astext, ' '))).join(
                Data,
                Data.uuid == form_tables(param_config)[form_name].uuid).filter(
                *conditions).all()
            if tr_dict.get('dict_file', False):
                translations = add_translations_from_file(tr_dict)
            else:
                translations = {}
            return_keys.pop()
            for r in results:
                if r[0]:
                    name = v[1] + " " + translations.get(r[0], r[0])
                    if name not in return_keys:
                        return_keys.append(name)
                        if name in translation_dict:
                            translation_dict[name] = translation_dict[name] + "," + r[0]
                        else:
                            translation_dict[name] = field + "$to_columns$" + r[0]

        if "gen_link$" in v[0]:
            link_ids.append(v[0].split("$")[1])
    if "uuid" not in return_keys:
        return_keys.append("uuid")
        translation_dict["uuid"] = "meta/instanceID"
    link_ids = set(link_ids)
    links_by_type, links_by_name = get_links(config_directory +
                                             country_config["links_file"])
    # DB query, with yield_per(200) for memory reasons
    columns = [Data, form_tables(param_config)[form_name]]

    link_id_index = {}
    joins = []

    if query_links:
        link_data = shelve.open(base_folder + "/exported_data/" + uuid)
        link_data_query = session.query(Links).filter(Links.type == link_name).yield_per(300)
        for row in link_data_query:
            link_data[row.uuid_to] = row.data_to

    for i, l in enumerate(link_ids):
        form = aliased(form_tables(param_config)[links_by_name[l]["to_form"]])
        joins.append((form, Data.links[(l, -1)].astext == form.uuid))
        link_id_index[l] = i + 2
        columns.append(form.data)

    number_query = session2.query(func.count(Data.id)).join(
        form_tables(param_config)[form_name], Data.uuid == form_tables(param_config)[form_name].uuid)

    results = session2.query(*columns).join(
        form_tables(param_config)[form_name], Data.uuid == form_tables(param_config)[form_name].uuid)
    for join in joins:
        results = results.outerjoin(join[0], join[1])

    total_number = number_query.filter(*conditions).first()[0]
    results = results.filter(*conditions).yield_per(200)

    locs = get_locations(session)
    list_rows = []

    filename = base_folder + "/exported_data/" + uuid + "/" + download_name
    os.mkdir(base_folder + "/exported_data/" + uuid)
    csv_content = open(filename + ".csv", "w")
    csv_writer = csv.writer(csv_content)
    csv_writer.writerows([return_keys])

    # XlsxWriter with "constant_memory" set to true, flushes mem after each row
    xls_content = open(filename + ".xlsx", "wb")
    xls_book = xlsxwriter.Workbook(xls_content, {'constant_memory': True})
    xls_sheet = xls_book.add_worksheet()

    # xls_sheet = pyexcel.Sheet([keys])

    # Little utility function write a row to file.
    def write_xls_row(data, row, sheet):
        for cell in range(len(data)):
            xls_sheet.write(row, cell, data[cell])

    write_xls_row(return_keys, 0, xls_sheet)

    i = 0

    def _list_category_variables(category, data_row):
        """
        Lists the variables from the specified category that are assigned to
        the specified row. This can be used to create data columns such as
        'Age Group' using 'category$ncd_age'.
        """
        # Get the category's variables' data, indexed by ID.
        cat_variables = {}
        variable_list = ""
        db_results = session.query(AggregationVariables).filter(
            AggregationVariables.category.has_key(category)
        )
        for variable in db_results:
            cat_variables[variable.id] = variable
        # Build a string listing the row's variables from specified category.
        for var_id, var in cat_variables.items():
            if var_id in r[0].variables:
                variable_list += var.name + ", "
        # Remove the last comma and space.
        return variable_list[:-2]

    # Prepare each row
    for r in results:
        list_row = [''] * len(return_keys)
        if not is_child(allowed_location, r[0].clinic, locs):
            continue

        dates = {}
        for k in return_keys:
            form_var = translation_dict[k]
            index = return_keys.index(k)

            raw_data = r[1].data
            if "many_links&" in form_var:
                link_name, number, form_var = form_var.split("&")[1:]
                number = int(number)
                if link_name in r[0].links:
                    links = r[0].links[link_name]
                    if len(links) >= number + 1:
                        link_uuid = links[number]
                        raw_data = link_data[link_uuid]
                    else:
                        list_row[index] = None
                        continue

                else:
                    list_row[index] = None
                    continue

            if "icd_name$" in form_var:
                fields = form_var.split("$")
                if len(fields) > 2:
                    field = fields[1]
                else:
                    field = "icd_code"
                if raw_data[field] in icd_code_to_name[form_var]:
                    list_row[index] = icd_code_to_name[form_var][raw_data[
                        field]]
                else:
                    list_row[index] = None
            elif form_var == "clinic":
                list_row[index] = locs[r[0].clinic].name
            elif form_var == "region":
                list_row[index] = locs[r[0].region].name
            elif form_var == "zone":
                list_row[index] = locs[r[0].zone].name
            elif form_var == "district":
                if r[0].district:
                    list_row[index] = locs[r[0].district].name
                else:
                    list_row[index] = None
            elif "$year" in form_var:
                field = form_var.split("$")[0]
                if field in raw_data and raw_data[field]:
                    if field not in dates:
                        dates[field] = parse(raw_data[field])
                    list_row[index] = dates[field].year
                else:
                    list_row[index] = None
            elif "$month" in form_var:
                field = form_var.split("$")[0]
                if field in raw_data and raw_data[field]:
                    if field not in dates:
                        dates[field] = parse(raw_data[field])
                    list_row[index] = dates[field].month
                else:
                    list_row[index] = None
            elif "$day" in form_var:
                field = form_var.split("$")[0]
                if field in raw_data and raw_data[field]:
                    if field not in dates:
                        dates[field] = parse(raw_data[field])
                    list_row[index] = dates[field].day
                else:
                    list_row[index] = None
            elif "$quarter" in form_var:
                field = form_var.split("$")[0]
                if raw_data.get(field):
                    if field not in dates:
                        dates[field] = parse(raw_data[field])
                    quarter = 1 + (dates[field].month - 1)//3
                    list_row[index] = quarter
                else:
                    list_row[index] = None
            elif "$epi_week" in form_var:
                field = form_var.split("$")[0]
                if field in raw_data and raw_data[field]:
                    if field not in dates:
                        dates[field] = parse(raw_data[field])
                    list_row[index] = epi_week_for_date(dates[field])[1]
                else:
                    list_row[index] = None

            # A general framework for referencing links in the
            # download data.
            # link$<link id>$<linked form field>
            elif "gen_link$" in form_var:
                link = form_var.split("$")[1]
                link_index = link_id_index[link]
                if r[link_index]:
                    list_row[index] = r[link_index].get(
                        form_var.split("$")[2],
                        None
                    )
                else:
                    list_row[index] = None

            elif "code" == form_var.split("$")[0]:
                # code$cod_1,cod_2,Text_1,Text_2$default_value
                split = form_var.split("$")
                codes = split[1].split(",")
                text = split[2].split(",")
                if len(split) > 3:
                    default_value = split[3]
                else:
                    default_value = None
                final_text = []
                for j in range(len(codes)):
                    if codes[j] in r[0].variables:
                        final_text.append(text[j])
                if len(final_text) > 0:
                    list_row[index] = " ".join(final_text)
                else:
                    list_row[index] = default_value

            elif "category" == form_var.split("$")[0]:
                list_row[index] = _list_category_variables(
                    form_var.split("$")[1],
                    r
                )

            elif "code_value" == form_var.split("$")[0]:
                code = form_var.split("$")[1]
                if code in r[0].variables:
                    list_row[index] = float(r[0].variables[code])
                else:
                    list_row[index] = None
            elif "value" == form_var.split(":")[0]:
                list_row[index] = form_var.split(":")[1]
            elif "$to_columns$" in form_var:
                int_has_code = 0
                field = form_var.split("$")[0]
                codes = form_var.split("$")[-1].split(",")
                str_elements = raw_data.get(field)
                if type(str_elements) == str:
                    elements = str_elements.split(" ")
                    has_code = any(code in elements for code in codes)
                    int_has_code = int(has_code)
                list_row[index] = int_has_code
            else:
                if form_var.split("$")[0] in raw_data:
                    list_row[index] = raw_data[form_var.split("$")[0]]
                else:
                    list_row[index] = None

            # Standardise date formating
            if "$date" in form_var:
                field = form_var.split("$")[0]
                if list_row[index]:
                    if field not in dates:
                        dates[field] = parse(list_row[index])
                    list_row[index] = dates[field].strftime(
                        "%d/%m/%Y"
                    )
                else:
                    list_row[index] = None

            # If the final value is a float, round to 2 dp.
            # This proceedure ensures integers are shown as integers.
            # Also accepts string values.
            try:
                a = float(list_row[index])
                b = int(float(list_row[index]))
                if a == b:
                    list_row[index] = b
                else:
                    list_row[index] = round(a, 2)
            except (ValueError, TypeError):
                pass

            # If a translation dictionary is defined in which the key exists...
            if min_translation and k in min_translation and list_row[index]:
                tr_dict = min_translation[k]
                if list_row[index] in tr_dict:
                    list_row[index] = tr_dict[list_row[index]]
                else:
                    parts = [x.strip() for x in str(list_row[index]).split(' ')]
                    for x in range(len(parts)):
                        # Get the translation using the appropriate key.
                        # If that doesn't exist get the wild card key: *
                        # If that doesn't exist just return the value
                        parts[x] = str(
                            tr_dict.get(parts[x], tr_dict.get('*', parts[x]))
                        )
                    list_row[index] = ' '.join(list(filter(bool, parts)))

            if translation_dir and language != "en" and list_row[index]:
                list_row[index] = t.gettext(list_row[index])

        list_rows.append(list_row)
        # Can write row immediately to xls file as memory is flushed after.
        write_xls_row(list_row, i + 1, xls_sheet)
        # Append the row to list of rows to be written to csv.
        if i % 1000 == 0:
            logging.warning("{} rows completed...".format(i))
            csv_writer.writerows(list_rows)
            list_rows = []
            status.status = i / total_number
            session.commit()
        i += 1
    csv_writer.writerows(list_rows)

    csv_content.close()
    xls_book.close()

    xls_content.close()
    status.status = 1
    status.success = 1
    session.commit()

    if query_links:
        link_data.close()
        dir_path = os.path.dirname(os.path.realpath(__file__))
        filename = dir_path + "/exported_data/" + uuid
        logging.warning("Filename: " + filename)
        if os.path.exists(filename + ".dir"):
            os.remove(filename + ".dir")
        if os.path.exists(filename + ".dat"):
            os.remove(filename + ".dat")
    return True