args = parser.parse_args()
    user = args.user[0]
    input_path = args.input[0]
    coding_mode = args.coding_mode[0]
    coded_input_directory = args.coding_input[0]
    json_output_path = args.json_output[0]

    # Load data from JSON file
    with open(input_path, "r") as f:
        data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    if coding_mode == "coda":
        # Merge manually coded Coda files into the cleaned dataset.
        with open(path.join(coded_input_directory, "gender.csv"), "r") as f:
            data = list(
                TracedDataCodaIO.import_coda_to_traced_data_iterable(
                    user, data, "GENDER_R", "GENDER_R_clean", f, True))

        with open(path.join(coded_input_directory, "age.csv"), "r") as f:
            data = list(
                TracedDataCodaIO.import_coda_to_traced_data_iterable(
                    user, data, "AGE_R", "AGE_R_clean", f, True))

        if len(data) > 0 and "LOCATION_R" in data[0]:
            with open(path.join(coded_input_directory, "location.csv"),
                      "r") as f:
                data = list(
                    TracedDataCodaIO.import_coda_to_traced_data_iterable(
                        user, data, "LOCATION_R", "LOCATION_R_clean", f, True))

            with open(path.join(coded_input_directory, "nationality.csv"),
                      "r") as f:
예제 #2
0
    # Output messages to Coda
    IOUtils.ensure_dirs_exist_for_file(coda_output_path)
    if os.path.exists(prev_coda_path):
        # TODO: Modifying this line once the coding frame has been developed to include lots of Nones feels a bit
        # TODO: cumbersome. We could instead modify export_traced_data_iterable_to_coda to support a prev_f argument.
        # TODO: Modify by adding code scheme keys once they are ready
        scheme_keys = {
            "Relevance": None,
            "Code 1": None,
            "Code 2": None,
            "Code 3": None,
            "Code 4": None
        }
        with open(coda_output_path, "w") as f, open(prev_coda_path,
                                                    "r") as prev_f:
            TracedDataCodaIO.export_traced_data_iterable_to_coda_with_scheme(
                show_messages, show_message_key, scheme_keys, f, prev_f=prev_f)
    else:
        with open(coda_output_path, "w") as f:
            TracedDataCodaIO.export_traced_data_iterable_to_coda(
                show_messages, show_message_key, f)

    # Randomly select some messages to export for ICR
    random.seed(0)
    random.shuffle(show_messages)
    icr_messages = show_messages[:ICR_MESSAGES_COUNT]

    # Output ICR data to a CSV file
    run_id_key = "{} (Run ID) - {}".format(variable_name, flow_name)
    raw_text_key = "{} (Text) - {}".format(variable_name, flow_name)
    IOUtils.ensure_dirs_exist_for_file(icr_output_path)
    with open(icr_output_path, "w") as f:
    json_output_path = args.json_output_path[0]
    csv_output_path = args.csv_output_path[0]

    key_of_clean = "{}_clean".format(key_of_raw)

    # Load data from JSON file
    with open(json_input_path, "r") as f:
        data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    # Merge coded data into the loaded data file
    if coding_mode == "coda":
        # Merge manually coded Coda files into the cleaned dataset
        with open(path.join(coded_input_path, "{}.csv".format(key_of_raw)),
                  "r") as f:
            data = list(
                TracedDataCodaIO.import_coda_to_traced_data_iterable(
                    user, data, key_of_raw, key_of_clean, f, True))
    else:
        assert coding_mode == "coding-csv", "coding_mode was not one of 'coda' or 'coding-csv'"

        # Merge manually coded CSV files into the cleaned dataset
        with open(path.join(coded_input_path, "{}.csv".format(key_of_raw)),
                  "r") as f:
            data = list(
                TracedDataCodingCSVIO.
                import_coding_csv_to_traced_data_iterable(
                    user, data, key_of_raw, key_of_clean, key_of_raw,
                    key_of_clean, f, True))

    # Write coded data back out to disk
    if os.path.dirname(json_output_path) is not "" and not os.path.exists(
            os.path.dirname(json_output_path)):
예제 #4
0
    json_input_path = args.json_input_path
    coding_mode = args.coding_mode
    coded_input_path = args.coded_input_path
    json_output_path = args.json_output_path

    # Load data from JSON file
    with open(json_input_path, "r") as f:
        data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    # Merge coded data into the loaded data file
    if coding_mode == "coda":
        # Merge manually coded Coda files into the cleaned dataset
        # FIXME: Set the <example-arguments> to import a particular column e.g. "age", "age_clean", "Age"
        with open(path.join(coded_input_path, "<input-file>.csv"), "r") as f:
            data = list(
                TracedDataCodaIO.import_coda_to_traced_data_iterable(
                    user, data, "<key-of-raw>", "<key-of-coded>", f, True))

        # FIXME: Re-use the above code sample to export other columns which need importing.
    else:
        assert coding_mode == "coding-csv", "coding_mode was not one of 'coda' or 'coding-csv'"

        # Merge manually coded CSV files into the cleaned dataset
        # FIXME: Set the <example-arguments> to import a particular column e.g. "age", "age_clean", "Age"
        with open(path.join(coded_input_path, "<input-file>.csv"), "r") as f:
            data = list(
                TracedDataCodingCSVIO.
                import_coding_csv_to_traced_data_iterable(
                    user, data, "<key_of_raw_in_data>",
                    "<key_of_coded_in_data>", "<key_of_raw_in_f>",
                    "<key_of_coded_in_f>", f, True))
예제 #5
0
            os.path.dirname(json_output_path)):
        os.makedirs(os.path.dirname(json_output_path))
    with open(json_output_path, "w") as f:
        TracedDataJsonIO.export_traced_data_iterable_to_json(data,
                                                             f,
                                                             pretty_print=True)

    # Output for manual verification + coding
    if coding_mode == "coda":
        # Write Coda output
        if not os.path.exists(coded_output_path):
            os.makedirs(coded_output_path)

        # FIXME: Set the <example-arguments> to export a particular column e.g. "age", "age_clean", "Age"
        with open(path.join(coded_output_path, "<output-file>.csv"), "w") as f:
            TracedDataCodaIO.export_traced_data_iterable_to_coda_with_scheme(
                data, "<key-of-raw>", "<key-of-coded>", "<name-in-Coda>", f)

        # FIXME: Re-use the above code sample to export other columns which need verifying/coding.
    else:
        assert coding_mode == "coding-csv", "coding_mode was not one of 'coda' or 'coding-csv'"

        # Write Coding CSV output
        if not os.path.exists(coded_output_path):
            os.makedirs(coded_output_path)

        # FIXME: Set the <example-arguments> to export a particular column e.g. "age", "age_clean", "Age"
        with open(path.join(coded_output_path, "<output-file>.csv"), "w") as f:
            TracedDataCodingCSVIO.export_traced_data_iterable_to_coding_csv_with_scheme(
                data, "<key-of-raw>", "<key-of-coded>", f)

        # FIXME: Re-use the above code sample to export other columns which need verifying/coding.
예제 #6
0
                        help="Path to write merged dataset to, as JSON",
                        nargs=1)

    args = parser.parse_args()
    user = args.user[0]
    json_input_path = args.input[0]
    coda_input_path = args.coda_input[0]
    output_path = args.output[0]

    col_raw = args.raw_column[0]
    col_coded = args.coded_column[0]

    # Load data from a JSON file
    with open(json_input_path, "r") as f:
        data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    # Merge Coda input into the data imported from the JSON
    with open(coda_input_path, "r") as f:
        data = list(
            TracedDataCodaIO.import_coda_to_traced_data_iterable(
                user, data, col_raw, col_coded, f))

    # Write merged output
    if os.path.dirname(output_path) is not "" and not os.path.exists(
            os.path.dirname(output_path)):
        os.makedirs(os.path.dirname(output_path))
    with open(output_path, "w") as f:
        TracedDataJsonIO.export_traced_data_iterable_to_json(data,
                                                             f,
                                                             pretty_print=True)
    for plan_item in merge_plans:
        coda_file_path = path.join(coded_input_path, plan_item.coda_filename)

        if not path.exists(coda_file_path):
            print("Warning: Coda file '{}' not found".format(
                plan_item.coda_filename))
            for td in surveys:
                for td_col in plan_item.scheme_keys.values():
                    td.append_data({td_col: None},
                                   Metadata(user, Metadata.get_call_location(),
                                            time.time()))
            continue

        with open(coda_file_path, "r") as f:
            TracedDataCodaIO.import_coda_to_traced_data_iterable(
                user, surveys, plan_item.key_of_raw, plan_item.scheme_keys, f,
                True)

    # Items coded under household sickness are not explicitly coded under People. For these cases, set the
    # people column to 'NC'
    sickness_people_key = "Household_Sickness (Text) - wt_practice_coded_people"
    household_sickness_key = "Household_Sickness (Text) - wt_practice_coded"
    for td in surveys:
        if td.get(household_sickness_key
                  ) is not None and td.get(sickness_people_key) is None:
            td.append_data({sickness_people_key: "NC"},
                           Metadata(user, Metadata.get_call_location(), user))

    # Import Trustworthy Advisors using matrix imports
    coda_file_path = path.join(coded_input_path,
                               "Trustworthy_Advisors_coded.csv")
예제 #8
0
    # Write json output
    if os.path.dirname(json_output_path) is not "" and not os.path.exists(
            os.path.dirname(json_output_path)):
        os.makedirs(os.path.dirname(json_output_path))
    with open(json_output_path, "w") as f:
        TracedDataJsonIO.export_traced_data_iterable_to_json(data,
                                                             f,
                                                             pretty_print=True)

    if coding_mode == "coda":
        # Write Coda output
        if not os.path.exists(coded_output_directory):
            os.makedirs(coded_output_directory)

        with open(path.join(coded_output_directory, "gender.csv"), "w") as f:
            TracedDataCodaIO.export_traced_data_iterable_to_coda_with_scheme(
                data, "GENDER_R", "GENDER_R_clean", "Gender", f)

        with open(path.join(coded_output_directory, "age.csv"), "w") as f:
            TracedDataCodaIO.export_traced_data_iterable_to_coda_with_scheme(
                data, "AGE_R", "AGE_R_clean", "Age", f)

        # ATA uses LOCATION/NATIONALITY whereas BIBLIA uses LOCATION 1/LOCATION 2
        if len(data) > 0 and "LOCATION_R" in data[0]:
            with open(path.join(coded_output_directory, "location.csv"),
                      "w") as f:
                TracedDataCodaIO.export_traced_data_iterable_to_coda(
                    data, "LOCATION_R", f)
            with open(path.join(coded_output_directory, "nationality.csv"),
                      "w") as f:
                TracedDataCodaIO.export_traced_data_iterable_to_coda(
                    data, "NATIONALITY_R", f)
예제 #9
0
    elif show_number == 5:
        key_of_raw = "S06E05_Water_Quality (Text) - wt_s06e05_activation"
        key_of_coded_prefix = "{}_coded_".format(key_of_raw)
        coda_yes_no_scheme = "Relevance"
        coda_reason_schemes = {"reason 2", "reason 3"}
    else:
        assert False, "Unrecognised show_number '{}'. Must be a number from 1-5 inclusive.".format(
            show_number)

    # Merge yes/no responses from the manually coded Coda files into the cleaned dataset
    if coda_yes_no_scheme is not None:
        with open(coded_input_path, "r") as f:
            TracedDataCodaIO.import_coda_to_traced_data_iterable(
                user,
                show_messages,
                key_of_raw,
                {coda_yes_no_scheme: "{}yes_no".format(key_of_coded_prefix)},
                f,
                overwrite_existing_codes=True)

    # Merge matrix data from the manually coded Coda files into the cleaned dataset
    with open(coded_input_path, "r") as f:
        TracedDataCodaIO.import_coda_to_traced_data_iterable_as_matrix(
            user,
            show_messages,
            key_of_raw,
            coda_reason_schemes,
            f,
            key_of_coded_prefix=key_of_coded_prefix)

    # Write coded data back out to disk
예제 #10
0
        os.makedirs(os.path.dirname(csv_output_path))
    with open(csv_output_path, "w") as f:
        TracedDataCSVIO.export_traced_data_iterable_to_csv(
            data,
            f,
            headers=[
                "avf_phone_id",
                "{} (Run ID) - {}".format(variable_name, flow_name),
                "{} (Time) - {}".format(variable_name, flow_name),
                "{} (Text) - {}".format(variable_name, flow_name)
            ])

    # Output messages to Coda
    IOUtils.ensure_dirs_exist_for_file(coda_output_path)
    with open(coda_output_path, "w") as f:
        TracedDataCodaIO.export_traced_data_iterable_to_coda(
            data, "{} (Text) - {}".format(variable_name, flow_name), f)

    # Get 200 non-noise messages and output to CSVs for ICR.
    print("Noise items:")
    show_message_key = "{} (Text) - {}".format(variable_name, flow_name)
    not_noise = []
    for td in data:
        if somali.DemographicCleaner.is_noise(td[show_message_key]):
            print(td[show_message_key])
        else:
            not_noise.append(td)

    # Take 200 items pseudo-randomly for ICR
    random.seed(0)
    random.shuffle(not_noise)
    icr_messages = not_noise[:200]
    # Write json output
    if os.path.dirname(json_output_path) is not "" and not os.path.exists(
            os.path.dirname(json_output_path)):
        os.makedirs(os.path.dirname(json_output_path))
    with open(json_output_path, "w") as f:
        TracedDataJsonIO.export_traced_data_iterable_to_json(data,
                                                             f,
                                                             pretty_print=True)

    # Output for manual verification + coding
    if coding_mode == "coda":
        # Write Coda output
        if not os.path.exists(coded_output_path):
            os.makedirs(coded_output_path)

        with open(path.join(coded_output_path, "{}.csv".format(key_of_raw)),
                  "w") as f:
            TracedDataCodaIO.export_traced_data_iterable_to_coda(
                data, key_of_raw, f)
    else:
        assert coding_mode == "coding-csv", "coding_mode was not one of 'coda' or 'coding-csv'"

        # Write Coding CSV output
        if not os.path.exists(coded_output_path):
            os.makedirs(coded_output_path)

        with open(path.join(coded_output_path, "{}.csv".format(key_of_raw)),
                  "w") as f:
            TracedDataCodingCSVIO.export_traced_data_iterable_to_coding_csv(
                data, key_of_raw, f)
                                        time.time()))

    # Write json output
    IOUtils.ensure_dirs_exist_for_file(json_output_path)
    with open(json_output_path, "w") as f:
        TracedDataJsonIO.export_traced_data_iterable_to_json(all_survey_data,
                                                             f,
                                                             pretty_print=True)

    # Output for manual verification + coding
    IOUtils.ensure_dirs_exist(coded_output_path)
    # TODO: Tidy up the usage of keys here once the format of the keys has been updated.
    for key in cleaning_plan.keys():
        coded_output_file_path = path.join(coded_output_path,
                                           "{}.csv".format(key.split(" ")[0]))
        prev_coded_output_file_path = path.join(
            prev_coded_path, "{}_coded.csv".format(key.split(" ")[0]))

        if os.path.exists(prev_coded_output_file_path):
            with open(coded_output_file_path,
                      "w") as f, open(prev_coded_output_file_path,
                                      "r") as prev_f:
                TracedDataCodaIO.export_traced_data_iterable_to_coda_with_scheme(
                    all_survey_data, key,
                    {key.split(" ")[0]: "{}_clean".format(key)}, f, prev_f)
        else:
            with open(coded_output_file_path, "w") as f:
                TracedDataCodaIO.export_traced_data_iterable_to_coda_with_scheme(
                    all_survey_data, key,
                    {key.split(" ")[0]: "{}_clean".format(key)}, f)
    gender_col_clean = "{}_clean".format(
        gender_col)  # Appending _clean follows AVF practice in Dreams

    # Load data from JSON file
    with open(input_path, "r") as f:
        data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)

    # Clean data
    for td in data:
        cleaned = DemographicCleaner.clean_gender(td[gender_col])
        td.append_data({gender_col_clean: cleaned},
                       Metadata(user, Metadata.get_call_location(),
                                time.time()))

    # Write json output
    if os.path.dirname(json_output_path) is not "" and not os.path.exists(
            os.path.dirname(json_output_path)):
        os.makedirs(os.path.dirname(json_output_path))
    with open(json_output_path, "w") as f:
        TracedDataJsonIO.export_traced_data_iterable_to_json(data,
                                                             f,
                                                             pretty_print=True)

    # Write Coda output
    if os.path.dirname(coda_output_path) is not "" and not os.path.exists(
            os.path.dirname(coda_output_path)):
        os.makedirs(os.path.dirname(coda_output_path))
    with open(coda_output_path, "w") as f:
        TracedDataCodaIO.export_traced_data_iterable_to_coda(
            data, gender_col, f, exclude_coded_with_key=gender_col_clean)