Beispiel #1
0
def main(user, google_cloud_credentials_file_path, pipeline_configuration_file_path, raw_data_dir):
    # Read the settings from the configuration file
    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    log.info("Downloading Firestore UUID Table credentials...")
    firestore_uuid_table_credentials = json.loads(google_cloud_utils.download_blob_to_string(
        google_cloud_credentials_file_path,
        pipeline_configuration.phone_number_uuid_table.firebase_credentials_file_url
    ))

    phone_number_uuid_table = FirestoreUuidTable(
        pipeline_configuration.phone_number_uuid_table.table_name,
        firestore_uuid_table_credentials,
        "avf-phone-uuid-"
    )
    log.info("Initialised the Firestore UUID table")

    log.info(f"Fetching data from {len(pipeline_configuration.raw_data_sources)} sources...")
    for i, raw_data_source in enumerate(pipeline_configuration.raw_data_sources):
        log.info(f"Fetching from source {i + 1}/{len(pipeline_configuration.raw_data_sources)}...")
        if isinstance(raw_data_source, RapidProSource):
            fetch_from_rapid_pro(user, google_cloud_credentials_file_path, raw_data_dir, phone_number_uuid_table,
                                 raw_data_source)
        elif isinstance(raw_data_source, GCloudBucketSource):
            fetch_from_gcloud_bucket(google_cloud_credentials_file_path, raw_data_dir, raw_data_source)
        elif isinstance(raw_data_source, RecoveryCSVSource):
            fetch_from_recovery_csv(user, google_cloud_credentials_file_path, raw_data_dir, phone_number_uuid_table,
                                    raw_data_source)

        else:
            assert False, f"Unknown raw_data_source type {type(raw_data_source)}"
import geopandas
import matplotlib.pyplot as plt
import plotly.express as px
from core_data_modules.cleaners import Codes
from core_data_modules.data_models.code_scheme import CodeTypes
from core_data_modules.logging import Logger
from core_data_modules.traced_data.io import TracedDataJsonIO
from core_data_modules.util import IOUtils

from src import AnalysisUtils
from configuration.code_schemes import CodeSchemes
from src.lib.configuration_objects import CodingModes
from src.mapping_utils import MappingUtils
from src.lib.pipeline_configuration import PipelineConfiguration

log = Logger(__name__)

IMG_SCALE_FACTOR = 10  # Increase this to increase the resolution of the outputted PNGs
CONSENT_WITHDRAWN_KEY = "consent_withdrawn"

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Runs automated analysis over the outputs produced by "
        "`generate_outputs.py`, and optionally uploads the outputs to Drive.")

    parser.add_argument("user", help="User launching this program")
    parser.add_argument("pipeline_configuration_file_path",
                        metavar="pipeline-configuration-file",
                        help="Path to the pipeline configuration json file")

    parser.add_argument(
Beispiel #3
0
import geopandas
import matplotlib.pyplot as plt
from core_data_modules.cleaners import Codes
from core_data_modules.data_models.code_scheme import CodeTypes
from core_data_modules.logging import Logger
from core_data_modules.traced_data.io import TracedDataJsonIO
from core_data_modules.util import IOUtils

from src import AnalysisUtils
from configuration.code_schemes import CodeSchemes
from src.lib.configuration_objects import CodingModes
from src.mapping_utils import MappingUtils
from src.lib.pipeline_configuration import PipelineConfiguration

log = Logger(__name__)

IMG_SCALE_FACTOR = 10  # Increase this to increase the resolution of the outputted PNGs
CONSENT_WITHDRAWN_KEY = "consent_withdrawn"

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Runs automated analysis over the outputs produced by "
        "`generate_outputs.py`, and optionally uploads the outputs to Drive.")

    parser.add_argument("user", help="User launching this program")
    parser.add_argument("pipeline_configuration_file_path",
                        metavar="pipeline-configuration-file",
                        help="Path to the pipeline configuration json file")

    parser.add_argument(
Beispiel #4
0
import argparse
import csv
import json

from core_data_modules.cleaners import Codes
from core_data_modules.logging import Logger
from core_data_modules.traced_data.io import TracedDataJsonIO
from id_infrastructure.firestore_uuid_table import FirestoreUuidTable
from storage.google_cloud import google_cloud_utils

from src.lib import PipelineConfiguration

log = Logger(__name__)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description=
        "Generates lists of phone numbers to advertise to using project "
        "traced data and KK exclusion lists")

    parser.add_argument(
        "--exclusion-list-file-path",
        nargs="?",
        help="List of phone numbers to exclude from the ad group")
    parser.add_argument(
        "google_cloud_credentials_file_path",
        metavar="google-cloud-credentials-file-path",
        help=
        "Path to a Google Cloud service account credentials file to use to access the "
        "credentials bucket")
    parser.add_argument("pipeline_configuration_file_path",
import argparse
import json
import os

from core_data_modules.logging import Logger
from storage.google_cloud import google_cloud_utils
from storage.google_drive import drive_client_wrapper

from src.lib import PipelineConfiguration

log = Logger(__name__)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Uploads output files")

    parser.add_argument("user", help="User launching this program")
    parser.add_argument(
        "google_cloud_credentials_file_path",
        metavar="google-cloud-credentials-file-path",
        help=
        "Path to a Google Cloud service account credentials file to use to access the "
        "credentials bucket")
    parser.add_argument("pipeline_configuration_file_path",
                        metavar="pipeline-configuration-file-path",
                        help="Path to the pipeline configuration json file")
    parser.add_argument("run_id",
                        metavar="run-id",
                        help="Identifier of this pipeline run")
    parser.add_argument(
        "production_csv_input_path",
        metavar="production-csv-input-path",
import argparse
import json
import os

from core_data_modules.logging import Logger
from core_data_modules.traced_data.io import TracedDataJsonIO
from core_data_modules.util import IOUtils
from id_infrastructure.firestore_uuid_table import FirestoreUuidTable
from storage.google_cloud import google_cloud_utils
from storage.google_drive import drive_client_wrapper

from src import AutoCodeShowMessages, AutoCodeSurveys, CombineRawDatasets, \
    ProductionFile, TranslateRapidProKeys, AnalysisFile, ApplyManualCodes
from src.lib import PipelineConfiguration

Logger.set_project_name("LQ")
log = Logger(__name__)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Runs the post-fetch phase of the ReDSS pipeline",
        # Support \n and long lines
        formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument("user", help="User launching this program")
    parser.add_argument(
        "google_cloud_credentials_file_path",
        metavar="google-cloud-credentials-file-path",
        help=
        "Path to a Google Cloud service account credentials file to use to access the "
        "credentials bucket")
Beispiel #7
0
import json
import argparse
from dateutil.parser import isoparse

from temba_client.v2 import Message
from core_data_modules.cleaners import PhoneCleaner
from core_data_modules.logging import Logger

log = Logger(__name__)
log.set_project_name("ComputeWindowOfDowntime")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Compute maximum window of time with 0 messages")
    parser.add_argument(
        "raw_messages_file_path",
        metavar="input-file",
        help="File to read the raw messages data downloaded as JSON")
    parser.add_argument(
        "window_of_downtimes_output_file_path",
        metavar="output-file",
        help="File to write the raw messages data downloaded as jSON.")
    parser.add_argument("target_operator",
                        metavar="operator",
                        help="Operator to analyze for downtime")
    parser.add_argument(
        "target_message_direction",
        metavar="direction-of-message",
        choices=('in', 'out'),
        help="Direction of messages to limit the search for downtime to")
    parser.add_argument(
Beispiel #8
0
import geopandas
import matplotlib.pyplot as plt
from core_data_modules.cleaners import Codes
from core_data_modules.data_models.code_scheme import CodeTypes
from core_data_modules.logging import Logger
from core_data_modules.traced_data.io import TracedDataJsonIO
from core_data_modules.util import IOUtils

from src import AnalysisUtils
from configuration.code_schemes import  CodeSchemes
from src.lib.configuration_objects import CodingModes
from src.mapping_utils import MappingUtils
from src.lib.pipeline_configuration import PipelineConfiguration

log = Logger(__name__)

IMG_SCALE_FACTOR = 10  # Increase this to increase the resolution of the outputted PNGs
CONSENT_WITHDRAWN_KEY = "consent_withdrawn"

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Runs automated analysis over the outputs produced by "
                                                 "`generate_outputs.py`, and optionally uploads the outputs to Drive.")

    parser.add_argument("user", help="User launching this program")
    parser.add_argument("pipeline_configuration_file_path", metavar="pipeline-configuration-file",
                        help="Path to the pipeline configuration json file")

    parser.add_argument("messages_json_input_path", metavar="messages-json-input-path",
                        help="Path to a JSONL file to read the TracedData of the messages data from")
    parser.add_argument("individuals_json_input_path", metavar="individuals-json-input-path",
import argparse

from core_data_modules.logging import Logger
from storage.google_cloud import google_cloud_utils

from src.lib import PipelineConfiguration

Logger.set_project_name("OCHA")
log = Logger(__name__)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Uploads output files")

    parser.add_argument("user", help="User launching this program")
    parser.add_argument("google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path",
                        help="Path to a Google Cloud service account credentials file to use to access the "
                             "credentials bucket")
    parser.add_argument("pipeline_configuration_file_path", metavar="pipeline-configuration-file-path",
                        help="Path to the pipeline configuration json file")
    parser.add_argument("run_id", metavar="run-id",
                        help="Identifier of this pipeline run")
    parser.add_argument("memory_profile_file_path", metavar="memory-profile-file-path",
                        help="Path to the memory profile log file to upload")
    parser.add_argument("data_archive_file_path", metavar="data-archive-file-path",
                        help="Path to the data archive file to upload")

    args = parser.parse_args()

    user = args.user
    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    pipeline_configuration_file_path = args.pipeline_configuration_file_path
Beispiel #10
0
# Validation tool for firebase collections
# Relies on an undocumented .collections() API call and a hard coded list of top level collections

import time
import json
import sys
import os.path

import firebase_util
import validate_nook_model as model
import validate_nook_model_custom as custom
from core_data_modules.logging import Logger

log = Logger(__name__)
firebase_client = None


def validate_documents(collection_root, validationMethod):
    log.info(f"validate_documents {collection_root}")

    time_start = time.perf_counter_ns()

    doc_count = 0
    for doc in firebase_client.collection(collection_root).stream():
        log.info(f"validating '{doc.id}'")
        data = doc.to_dict()
        try:
            validationMethod("doc", doc.id, data)
        except model.ValidationError as e:
            print(f"")
            print(f"Validation failed:")
import argparse
import csv
import json

from core_data_modules.logging import Logger
from core_data_modules.traced_data.io import TracedDataJsonIO
from id_infrastructure.firestore_uuid_table import FirestoreUuidTable
from storage.google_cloud import google_cloud_utils

from src.lib import PipelineConfiguration
from src.lib.code_schemes import CodeSchemes

Logger.set_project_name("UNDP-RCO")
log = Logger(__name__)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description=
        "Generates lists of phone numbers of UNDP-RCO respondents who "
        "reported living in baidoa or bossaso")

    parser.add_argument(
        "google_cloud_credentials_file_path",
        metavar="google-cloud-credentials-file-path",
        help=
        "Path to a Google Cloud service account credentials file to use to access the "
        "credentials bucket")
    parser.add_argument("pipeline_configuration_file_path",
                        metavar="pipeline-configuration-file",
                        help="Path to the pipeline configuration json file")
    parser.add_argument(
Beispiel #12
0
import argparse
import csv

from core_data_modules.logging import Logger
from core_data_modules.util import TimeUtils
from rapid_pro_tools.rapid_pro_client import RapidProClient
from storage.google_cloud import google_cloud_utils
from temba_client.v2 import Message

log = Logger(__name__)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Downloads all inbound messages from Rapid Pro and exports "
                                                 "the phone numbers we heard from")

    parser.add_argument("google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path",
                        help="Path to a Google Cloud service account credentials file to use to access the "
                             "credentials bucket")
    parser.add_argument("rapid_pro_domain", help="URL of the Rapid Pro server to download data from")
    parser.add_argument("rapid_pro_token_file_url", metavar="rapid-pro-token-file-url",
                        help="GS URLs of a text file containing the authorisation token for the Rapid Pro server")
    parser.add_argument("output_file_path", metavar="output-file-path",
                        help="Output CSV file to write the phone numbers to")

    args = parser.parse_args()

    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    rapid_pro_domain = args.rapid_pro_domain
    rapid_pro_token_file_url = args.rapid_pro_token_file_url
    output_file_path = args.output_file_path
import argparse
import json

from core_data_modules.logging import Logger
from core_data_modules.util import TimeUtils
from src import FirestoreWrapper
from storage.google_cloud import google_cloud_utils

from rapid_pro_tools.rapid_pro_client import RapidProClient

log = Logger(__name__)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description=
        "Downloads the definitions for all the flows being used by this "
        "project, and uploads them to a bucket.")

    parser.add_argument(
        "google_cloud_credentials_file_path",
        metavar="google-cloud-credentials-file-path",
        help=
        "Path to a Google Cloud service account credentials file to use to access the "
        "credentials bucket")
    parser.add_argument(
        "firestore_credentials_url",
        metavar="firestore-credentials-url",
        help=
        "GS URL to the credentials file to use to access the Firestore instance containing "
        "the operations statistics")
Beispiel #14
0
import plotly.express as px
from core_data_modules.cleaners import Codes
from core_data_modules.data_models.code_scheme import CodeTypes
from core_data_modules.logging import Logger
from core_data_modules.traced_data.io import TracedDataJsonIO
from core_data_modules.util import IOUtils
from storage.google_cloud import google_cloud_utils
from storage.google_drive import drive_client_wrapper

from configuration.code_schemes import CodeSchemes
from src import AnalysisUtils
from src.lib import PipelineConfiguration
from src.lib.configuration_objects import CodingModes
from src.mapping_utils import MappingUtils

log = Logger(__name__)

IMG_SCALE_FACTOR = 10  # Increase this to increase the resolution of the outputted PNGs
CONSENT_WITHDRAWN_KEY = "consent_withdrawn"

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Runs automated analysis over the outputs produced by "
                                                 "`generate_outputs.py`, and optionally uploads the outputs to Drive.")

    parser.add_argument("user", help="User launching this program")
    parser.add_argument("google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path",
                        help="Path to a Google Cloud service account credentials file to use to access the "
                             "credentials bucket")
    parser.add_argument("pipeline_configuration_file_path", metavar="pipeline-configuration-file",
                        help="Path to the pipeline configuration json file")
import argparse
import csv
import sys

from core_data_modules.cleaners import Codes
from core_data_modules.logging import Logger
from core_data_modules.traced_data.io import TracedDataJsonIO
from core_data_modules.util import PhoneNumberUuidTable

Logger.set_project_name("OCHA")
log = Logger(__name__)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Exports a list of phone numbers for the consenting participants "
                                                 "to REACH")

    parser.add_argument("traced_data_path", metavar="traced-data-path",
                        help="Path to the REACH traced data file to extract phone numbers from")
    parser.add_argument("phone_number_uuid_table_path", metavar="phone-number-uuid-table-path",
                        help="JSON file containing the phone number <-> UUID lookup table for the messages/surveys "
                             "datasets")
    parser.add_argument("output_path", metavar="output-path",
                        help="CSV file to write the REACH contacts to")

    args = parser.parse_args()

    traced_data_path = args.traced_data_path
    phone_number_uuid_table_path = args.phone_number_uuid_table_path
    output_path = args.output_path

    sys.setrecursionlimit(15000)
Beispiel #16
0
import argparse
import json
import os
from glob import glob

from core_data_modules.logging import Logger
from storage.google_cloud import google_cloud_utils
from storage.google_drive import drive_client_wrapper

from src.lib import PipelineConfiguration

log = Logger(__name__)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Uploads analysis output files to google drive")

    parser.add_argument("user", help="User launching this program")
    parser.add_argument("pipeline_run_mode",
                        help="whether to generate analysis files or not",
                        choices=["all-stages", "auto-code-only"])
    parser.add_argument(
        "google_cloud_credentials_file_path",
        metavar="google-cloud-credentials-file-path",
        help=
        "Path to a Google Cloud service account credentials file to use to access the "
        "credentials bucket")
    parser.add_argument("pipeline_configuration_file_path",
                        metavar="pipeline-configuration-file-path",
                        help="Path to the pipeline configuration json file")
    parser.add_argument("run_id",
Beispiel #17
0
import glob
import json
from collections import OrderedDict

import altair
from core_data_modules.cleaners import Codes
from core_data_modules.logging import Logger
from core_data_modules.traced_data.io import TracedDataJsonIO
from core_data_modules.util import IOUtils
from storage.google_cloud import google_cloud_utils
from storage.google_drive import drive_client_wrapper

from src.lib import PipelineConfiguration
from src.lib.pipeline_configuration import CodingModes

log = Logger(__name__)

IMG_SCALE_FACTOR = 10  # Increase this to increase the resolution of the outputted PNGs

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Generates graphs for analysis")

    parser.add_argument("user", help="User launching this program")
    parser.add_argument("google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path",
                        help="Path to a Google Cloud service account credentials file to use to access the "
                             "credentials bucket")
    parser.add_argument("pipeline_configuration_file_path", metavar="pipeline-configuration-file",
                        help="Path to the pipeline configuration json file")

    parser.add_argument("messages_json_input_path", metavar="messages-json-input-path",
                        help="Path to a JSONL file to read the TracedData of the messages data from")
Beispiel #18
0
import argparse
import hashlib
import json
import sys
import time

from core_data_modules.logging import Logger
from datetime import datetime, timezone, timedelta

import demogs_helper as demogs

log = Logger(__name__)

CONVERSATIONS_COLLECTION_KEY = 'nook_conversations'
CONVERSATION_SHARDS_COLLECTION_KEY = 'nook_conversation_shards'
CONVERSATION_TAGS_COLLECTION_KEY = 'conversationTags'
DAILY_TAG_METRICS_COLLECTION_KEY = 'daily_tag_metrics'
TOTAL_COUNTS_METRICS_COLLECTION_KEY = 'total_counts_metrics'
NEEDS_REPLY_METRICS_COLLECTION_KEY = 'needs_reply_metrics'

NEEDS_REPLY_TAG = "Needs Reply"
ESCALATE_TAG = "escalate"

KK_PROJECT = None

coda_tags = {}

tag_id_to_name = {}


def tag_ids(tags):
Beispiel #19
0
from core_data_modules.analysis import AnalysisConfiguration, engagement_counts, theme_distributions, \
    repeat_participations, sample_messages, traffic_analysis, analysis_utils, traffic_analysis
from core_data_modules.analysis.mapping import participation_maps, somalia_mapper
from core_data_modules.cleaners import Codes
from core_data_modules.data_models.code_scheme import CodeTypes
from core_data_modules.logging import Logger
from core_data_modules.traced_data.io import TracedDataJsonIO
from core_data_modules.util import IOUtils
from dateutil.parser import isoparse

from configuration.code_schemes import CodeSchemes
from src.lib.configuration_objects import CodingModes
from src.lib.pipeline_configuration import PipelineConfiguration

log = Logger(__name__)

IMG_SCALE_FACTOR = 10  # Increase this to increase the resolution of the outputted PNGs
CONSENT_WITHDRAWN_KEY = "consent_withdrawn"
SENT_ON_KEY = "sent_on"


def coding_plans_to_analysis_configurations(coding_plans):
    analysis_configurations = []
    for plan in coding_plans:
        for cc in plan.coding_configurations:
            if not cc.include_in_theme_distribution:
                continue

            analysis_configurations.append(
                AnalysisConfiguration(cc.analysis_file_key, plan.raw_field, cc.coded_field, cc.code_scheme)
Beispiel #20
0
import argparse
import json
import os

from core_data_modules.logging import Logger
from core_data_modules.traced_data.io import TracedDataJsonIO
from core_data_modules.util import IOUtils
from storage.google_cloud import google_cloud_utils
from storage.google_drive import drive_client_wrapper

from src import LoadData, TranslateRapidProKeys, AutoCode, ProductionFile, \
    ApplyManualCodes, AnalysisFile, WSCorrection
from src.lib import PipelineConfiguration, MessageFilters
from configurations.code_schemes import CodeSchemes

Logger.set_project_name("WUSC-KEEP-II")
log = Logger(__name__)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Runs the post-fetch phase of the pipeline")

    parser.add_argument("user", help="User launching this program")
    parser.add_argument(
        "google_cloud_credentials_file_path",
        metavar="google-cloud-credentials-file-path",
        help=
        "Path to a Google Cloud service account credentials file to use to access the "
        "credentials bucket")
    parser.add_argument("pipeline_configuration_file_path",
                        metavar="pipeline-configuration-file",
Beispiel #21
0
# Migration tool for firebase collections
# Relies on an undocumented .collections() API call and a hard coded list of top level collections

import time
import json
import sys
import os.path

from firebase_admin import firestore
import migrate_nook_model
import firebase_util
from core_data_modules.logging import Logger

log = Logger(__name__)
firebase_client = None


def read_document_ids(collection_root):
    log.info(f"read_document_ids {collection_root}")
    cache_file_path = f"{cache_dir}/{collection_root}_doc_ids.json"
    doc_ids = []

    if reset_flag and os.path.exists(cache_file_path):
        os.remove(cache_file_path)

    if os.path.isfile(cache_file_path):
        log.info(f"reloading cached ids from {cache_file_path}")
        with open(cache_file_path, "r") as f:
            doc_ids = json.load(f)
    else:
        time_start = time.perf_counter_ns()
import pytz
from core_data_modules.cleaners import Codes, PhoneCleaner
from core_data_modules.cleaners.cleaning_utils import CleaningUtils
from core_data_modules.logging import Logger
from core_data_modules.traced_data import Metadata, TracedData
from core_data_modules.traced_data.io import TracedDataJsonIO
from core_data_modules.util import IOUtils, TimeUtils, SHAUtils
from id_infrastructure.firestore_uuid_table import FirestoreUuidTable
from rapid_pro_tools.rapid_pro_client import RapidProClient
from storage.google_cloud import google_cloud_utils
from temba_client.v2 import Contact, Run

from src.lib import PipelineConfiguration, CodeSchemes
from src.lib.pipeline_configuration import RapidProSource, GCloudBucketSource, ShaqadoonCSVSource

Logger.set_project_name("OCHA")
log = Logger(__name__)


def label_somalia_operator(user, traced_runs, phone_number_uuid_table):
    # Set the operator codes for each message.
    uuids = {td["avf_phone_id"] for td in traced_runs}
    uuid_to_phone_lut = phone_number_uuid_table.uuid_to_data_batch(uuids)
    for td in traced_runs:
        operator_raw = uuid_to_phone_lut[td["avf_phone_id"]][:5]  # Returns the country code 252 and the next two digits

        operator_code = PhoneCleaner.clean_operator(operator_raw)
        if operator_code == Codes.NOT_CODED:
            operator_label = CleaningUtils.make_label_from_cleaner_code(
                CodeSchemes.SOMALIA_OPERATOR,
                CodeSchemes.SOMALIA_OPERATOR.get_code_with_control_code(Codes.NOT_CODED),
import argparse
import json

from core_data_modules.logging import Logger
from dateutil.parser import isoparse
from id_infrastructure.firestore_uuid_table import FirestoreUuidTable
from rapid_pro_tools.rapid_pro_client import RapidProClient
from storage.google_cloud import google_cloud_utils

from src.lib import PipelineConfiguration

log = Logger(__name__)

# TODO: Read these from pipeline configuration rather than hard-coding
rapid_pro_domain = "textit.in"
rapid_pro_token_url = "gs://avf-credentials/covid19-2-text-it-token.txt"
demog_flow_name = "undp_kenya_s01_demog"
demogs_attempted_variable = "undp_kenya_s01_demogs_attempted"

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Triggers demogs to people who haven't yet received them")

    parser.add_argument(
        "google_cloud_credentials_file_path",
        metavar="google-cloud-credentials-file-path",
        help=
        "Path to a Google Cloud service account credentials file to use to access the "
        "credentials bucket")
    parser.add_argument("pipeline_configuration_file_path",
                        metavar="pipeline-configuration-file",
Beispiel #24
0
import argparse
import json

from core_data_modules.data_models import Message, Label, Origin
from core_data_modules.logging import Logger
from core_data_modules.traced_data import Metadata
from core_data_modules.util import TimeUtils

Logger.set_project_name("CodeMerge")
log = Logger(__name__)

parser = argparse.ArgumentParser(
    description="Performs a code merge on a local dataset. "
    "To use with Coda, use get.py, code_merge.py, then set.py")
parser.add_argument(
    "messages_input_file_path",
    metavar="messages-input-file-path",
    help="Path to the file to read the Coda messages to be code-merged from")
parser.add_argument("code_ids_to_merge",
                    metavar="code-ids-to-merge",
                    nargs="+",
                    help="Ids of the codes to merge")
parser.add_argument("merged_code_id",
                    metavar="merged-code-id",
                    help="Id of the code to merge the source codes to")
parser.add_argument(
    "messages_output_file_path",
    metavar="messages-output-file-path",
    help=
    "Path to the Coda messages file to write the messages to after performing the code merge"
)
import argparse

from core_data_modules.logging import Logger
from core_data_modules.traced_data.io import TracedDataJsonIO
from core_data_modules.util import IOUtils

from src import LoadData, TranslateSourceKeys, AutoCode, ProductionFile, \
    ApplyManualCodes, AnalysisFile, WSCorrection
from src.lib import PipelineConfiguration, MessageFilters

log = Logger(__name__)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Runs the post-fetch phase of the pipeline")

    parser.add_argument("user", help="User launching this program")
    parser.add_argument("pipeline_run_mode",
                        help="whether to generate analysis files or not",
                        choices=["all-stages", "auto-code-only"])
    parser.add_argument("pipeline_configuration_file_path",
                        metavar="pipeline-configuration-file",
                        help="Path to the pipeline configuration json file")

    parser.add_argument(
        "raw_data_dir",
        metavar="raw-data-dir",
        help=
        "Path to a directory containing the raw data files exported by fetch_raw_data.py"
    )
    parser.add_argument(
Beispiel #26
0
from core_data_modules.cleaners.codes import KenyaCodes
from core_data_modules.cleaners.location_tools import KenyaLocations
from core_data_modules.data_models.code_scheme import CodeTypes
from core_data_modules.logging import Logger
from core_data_modules.traced_data.io import TracedDataJsonIO
from core_data_modules.util import IOUtils
from storage.google_cloud import google_cloud_utils
from storage.google_drive import drive_client_wrapper

from configuration.code_schemes import CodeSchemes
from src import AnalysisUtils
from src.lib import PipelineConfiguration
from src.lib.configuration_objects import CodingModes
from src.mapping_utils import MappingUtils

log = Logger(__name__)

IMG_SCALE_FACTOR = 10  # Increase this to increase the resolution of the outputted PNGs
CONSENT_WITHDRAWN_KEY = "consent_withdrawn"

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Runs automated analysis over the outputs produced by "
        "`generate_outputs.py`, and optionally uploads the outputs to Drive.")

    parser.add_argument("user", help="User launching this program")
    parser.add_argument(
        "google_cloud_credentials_file_path",
        metavar="google-cloud-credentials-file-path",
        help=
        "Path to a Google Cloud service account credentials file to use to access the "
Beispiel #27
0
from core_data_modules.logging import Logger
from core_data_modules.cleaners import PhoneCleaner
from temba_client.v2 import Message


def date_time_range(start, end, delta):
    current = start
    intervals = []
    while current < end:
        intervals.append(current)
        current += delta
    return intervals


log = Logger(__name__)
log.set_project_name("ComputeMessagesBetweenTwoFirebaseTimePeriods")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Compute message difference between two firebase time periods `the time period for firebase is a constant number`")
    parser.add_argument("raw_messages_input_file_path", metavar="raw-messages-input-file-path",
                        help="File to read the serialized Rapid Pro message data from")
    parser.add_argument("messages_difference_per_two_firebase_time_period_output_file_path", metavar="message-difference-output-file-path",
                        help=" File to write the messages difference between two firebase time periods data downloaded as JSON")
    parser.add_argument("target_operator", metavar="target-operator",
                        help="Operator to compute message difference between two firebase time periods")
    parser.add_argument("target_message_direction", metavar="target-message-direction", choices=('in', 'out'),
                        help="Direction of messages to limit the search for downtime to")
    parser.add_argument("start_date", metavar="start-date", type=lambda s: isoparse(s),
                        help="The start date as ISO 8601 string from which the number of messages will be computed")
import argparse
import csv
import json

from core_data_modules.cleaners import PhoneCleaner
from core_data_modules.logging import Logger
from id_infrastructure.firestore_uuid_table import FirestoreUuidTable
from storage.google_cloud import google_cloud_utils

from src.lib import PipelineConfiguration

log = Logger(__name__)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="De-identifies a CSV by converting the phone numbers in "
                                                 "the specified column to avf phone ids")

    parser.add_argument("csv_input_path", metavar="recovered-csv-input-url",
                        help="Path to a CSV file to de-identify a column of")
    parser.add_argument("google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path",
                        help="Path to a Google Cloud service account credentials file to use to access the "
                             "credentials bucket")
    parser.add_argument("pipeline_configuration_file_path", metavar="pipeline-configuration-file",
                        help="Path to the pipeline configuration json file")
    parser.add_argument("column_to_de_identify", metavar="column-to-de-identify",
                        help="Name of the column containing phone numbers to be de-identified")
    parser.add_argument("de_identified_csv_output_path", metavar="de-identified-csv-output-path",
                        help="Path to write the de-identified CSV to")

    args = parser.parse_args()
import time

from core_data_modules.cleaners import Codes
from core_data_modules.cleaners.cleaning_utils import CleaningUtils
from core_data_modules.logging import Logger
from core_data_modules.traced_data import Metadata
from core_data_modules.traced_data.io import TracedDataCodaV2IO

from src.lib import PipelineConfiguration
from src.lib.configuration_objects import CodingModes

log = Logger(__name__)


class _WSUpdate(object):
    def __init__(self, message, timestamp, source_field, source_td):
        self.message = message
        self.timestamp = timestamp
        self.source_field = source_field
        self.source_td = source_td


class WSCorrection(object):
    @staticmethod
    def move_wrong_scheme_messages(user, data, coda_input_dir):
        log.info("Importing manually coded Coda files to '_WS' fields...")
        for plan in PipelineConfiguration.RQA_CODING_PLANS + PipelineConfiguration.SURVEY_CODING_PLANS:
            if plan.coda_filename is None:
                continue

            TracedDataCodaV2IO.compute_message_ids(user, data, plan.raw_field,
import argparse
import os
import re

from core_data_modules.logging import Logger
from storage.google_cloud import google_cloud_utils

from src.lib import PipelineConfiguration

log = Logger(__name__)


def get_file_paths(dir_path):
    # search for .gzip (data archive) and .profile (memory profile) files only because os.listdir(dir_path)
    # returns all files in the directory
    files_list = [
        file for file in os.listdir(dir_path)
        if file.endswith((".gzip", ".profile"))
    ]
    file_paths = [os.path.join(dir_path, basename) for basename in files_list]

    return file_paths


def get_uploaded_file_dates(uploaded_files_list, date_pattern):
    dates_match = [
        re.search(date_pattern, file) for file in uploaded_files_list
    ]
    uploaded_file_dates = []
    for date_match in dates_match:
        if date_match == None: