def load_geo_pub(base_path, conf, schema_name="schema_name"):
    """Load the geographic information in PUB

    This function assumes the directory structure described at the start of
    this module. Further, it uses a dictionary called conf to connect to the
    database.

    :param base_path [string] The path to the directory containing the
     subdirectories described at the start of this module. Specifically,
     base_path + "/geo_pub" should contain *.rar files containing the data.
    :param conf [dict] A dict specifying information needed to connect to
     and load to the database. Expects the fields "USER", "PASSWORD", and
     "HOST". See dbu.csv_to_db_cmds() describing how the connection is
     established.
    :param schema_name [string] The name to the (assumed existing) schema to
     add the table to. Defaults to the schema name.
    :return None
    :rtype None
    :side-effects Loads the tables cuis_* to the database specified in conf.
    """
    logger.info("Loading geographic pub data...")
    dbu.unpack_all_in_dir(os.path.join(base_path, "geo_pub"))
    dbu.dbfs_to_csv(os.path.join(base_path, "geo_pub"))
    dbu.merge_csvs_in_dir(os.path.join(base_path, "geo_pub"), "geo_pub.csv")
    dbu.csv_to_db_table(os.path.join(base_path, "geo_pub", "geo_pub.csv"),
                        conf, "geo_pub", schema_name)
def load_home_verification(base_path, conf, schema_name="schema_name"):
    """Load the home verification data

    This function assumes the directory structure described at the start of
    this module. Further, it uses a dictionary called conf to connect to the
    database.

    :param base_path [string] The path to the directory containing the
     subdirectories described at the start of this module. Specifically,
     base_path + "/home_verification" should contain a file
     "home_verification.rar"
    :param conf [dict] A dict specifying information needed to connect to
     and load to the database. Expects the fields "USER", "PASSWORD", and
     "HOST". See dbu.csv_to_db_cmds() describing how the connection is
     established.
    :param schema_name [string] The name to the (assumed existing) schema to
     add the table to. Defaults to the schema name.
    :return None
    :rtype None
    :side-effects Loads the tables cuis_* to the database specified in conf.
    """
    logger.info("Loading home verification data...")
    dbu.unpack_in_place(os.path.join(base_path, "home_verification"),
                        "VERIFICACION.rar")

    for year in ["2013", "2014"]:
        cur_path = os.path.join(base_path, "home_verification", "VERIFICACION",
                                year)
        cur_year = "home_verification_" + year

        dbu.dbfs_to_csv(cur_path)
        dbu.merge_csvs_in_dir(cur_path, cur_year + ".csv")
        dbu.csv_to_db_table(os.path.join(cur_path, cur_year + ".csv"), conf,
                            cur_year, schema_name)
def load_equivalencias(base_path, conf, schema_name="schema_name"):
    """Load equivalencias data

    This function assumes the directory structure described at the start of
    this module. Further, it uses a dictionary called conf to connect to the
    database.

    :param base_path [string] The path to the directory containing the
     subdirectories described at the start of this module. This specifically
     needs a directory called equivalencias containing the file
     CATALOGO_LOCALIDADES_EQUIVALENCIAS.rar
    :param conf [dict] A dict specifying information needed to connect to
     and load to the database. Expects the fields "USER", "PASSWORD", and
     "HOST". See dbu.csv_to_db_cmds() describing how the connection is
     established.
    :param schema_name [string] The name to the (assumed existing) schema to
     add the table to. Defaults to the schema name.
    :return None
    :rtype None
    :side-effects Loads the tables equivalencias data to the database specified
     in conf.
    """
    archive_dir = os.path.join(base_path, "equivalencias")
    dbu.unpack_all_in_dir(archive_dir)
    equiv_file = os.path.join(archive_dir, "TABLA_DE_EQUIVALENCIA_ABR16")
    dbu.iconv_conversion(equiv_file + ".dbf")
    dbu.dbfs_to_csv(archive_dir)

    dbu.csv_to_db_table(equiv_file + ".csv", conf, "equivalencias",
                        schema_name)
def load_sepomex(base_path, conf, schema_name="schema_name"):
    """Load the SEPOMEX data

    This function assumes the directory structure described at the start of
    this module. Further, it uses a dictionary called conf to connect to the
    database.

    :param base_path [string] The path to the directory containing the
     subdirectories described at the start of this module. Specifically,
     base_path + "/sepomex" should contain the file "MX.zip".
    :param conf [dict] A dict specifying information needed to connect to
     and load to the database. Expects the fields "USER", "PASSWORD", and
     "HOST". See dbu.csv_to_db_cmds() describing how the connection is
     established.
    :param schema_name [string] The name to the (assumed existing) schema to
     add the table to. Defaults to the schema name.
    :return None
    :rtype None
    :side-effects Loads the tables cuis_* to the database specified in conf.
    """
    logger.info("Loading SEPOMEX data...")
    dbu.unpack_in_place(os.path.join(base_path, "sepomex"), "MX.zip", "zip")

    # additional conversion from tsv to csv
    file_path = os.path.join(base_path, "sepomex", "MX.txt")
    os.system("sed -i 's/\t/,/g' %s" % file_path)
    dbu.csv_to_db_table(file_path, conf, "sepomex", schema_name, False)
def load_geo_sifode(base_path, conf, schema_name="schema_name"):
    """Load the geographical data

    This function assumes the directory structure described at the start of
    this module. Further, it uses a dictionary called conf to connect to the
    database.

    :param base_path [string] The path to the directory containing the
     subdirectories described at the start of this module. Specifically,
     base_path + "/sifode" should contain .rar files with names like
     "SIFODE_DOMICILIOS_NOMVIAL_1.rar"...
    :param conf [dict] A dict specifying information needed to connect to
     and load to the database. Expects the fields "USER", "PASSWORD", and
     "HOST". See dbu.csv_to_db_cmds() describing how the connection is
     established.
    :param schema_name [string] The name to the (assumed existing) schema to
     add the table to. Defaults to the schema name.
    :return None
    :rtype None
    :side-effects Loads the geographical sifode data to the database.
    """
    logger.info("Loading geographic sifode data...")
    sifode_dir = os.path.join(base_path, "sifode")
    dbu.unpack_all_in_dir(sifode_dir)
    dbu.dbfs_to_csv(sifode_dir)
    dbu.merge_csvs_in_dir(sifode_dir, "sifode.csv")
    dbu.csv_to_db_table(os.path.join(sifode_dir, "sifode.csv"), conf,
                        "geo_cuis", schema_name)
def load_coneval(base_path, conf, schema_name="schema_name"):
    """Load the coneval data

    This function assumes the directory structure described at the start of
    this module. Further, it uses a dictionary called conf to connect to the
    database.

    :param base_path [string] The path to the directory containing the
     subdirectories described at the start of this module. Specifically,
     base_path + "/coneval" should contain a file "pobreza_14.rar"
    :param conf [dict] A dict specifying information needed to connect to
     and load to the database. Expects the fields "USER", "PASSWORD", and
     "HOST". See dbu.csv_to_db_cmds() describing how the connection is
     established.
    :param schema_name [string] The name to the (assumed existing) schema to
     add the table to. Defaults to the schema name.
    :return None
    :rtype None
    :side-effects Loads the tables cuis_* to the database specified in conf.
    """
    logger.info("Loading CONEVAL...")
    dbu.csv_to_db_table(os.path.join(base_path, "coneval", "pobreza_14.csv"),
                        conf, "coneval_pobreza", schema_name)
    dbu.csv_to_db_table(
        os.path.join(base_path, "coneval", "indice_de_rezago_social.csv"),
        conf, "indice_de_rezago_social", schema_name)
def load_census(base_path, conf, schema_name="schema_name"):
    """Load the 2015 partial census data

    This function assumes the directory structure described at the start of
    this module. Further, it uses a dictionary called conf to connect to the
    database.

    :param base_path [string] The path to the directory containing the
     subdirectories described at the start of this module. Specifically,
     base_path + "/census/2015" should contain a file
     "2015_Partial_Census-2016-05-20.zip".
    :param conf [dict] A dict specifying information needed to connect to
     and load to the database. Expects the fields "USER", "PASSWORD", and
     "HOST". See dbu.csv_to_db_cmds() describing how the connection is
     established.
    :param schema_name [string] The name to the (assumed existing) schema to
     add the table to. Defaults to the schema name.
    :return None
    :rtype None
    :side-effects Loads the tables cuis_* to the database specified in conf.
    """
    logger.info("Loading 2015 partial census data...")
    dbu.unpack_all_in_dir(os.path.join(base_path, "census", "2015"), "zip")
    merge_census(base_path, "TR_PERSONA")
    merge_census(base_path, "TR_VIVIENDA")

    # merge csvs
    csv_paths = [
        os.path.join(base_path, "census", "2015", "TR_PERSONA"),
        os.path.join(base_path, "census", "2015", "TR_VIVIENDA")
    ]
    csv_files = [
        "partial_census_2015_personas.csv", "partial_census_2015_viviendas.csv"
    ]

    for i, path in enumerate(csv_paths):
        dbu.merge_csvs_in_dir(path, csv_files[i])
        file_without_extension = csv_files[i].split(".")[0]
        cur_file = os.path.join(path, csv_files[i])
        dbu.iconv_conversion(cur_file)

        dbu.csv_to_db_table(cur_file, conf, file_without_extension,
                            schema_name)
def load_manzana_link(base_path, conf, schema_name="schema_name"):
    """Load the link between id_unico and geocoded manzana ID

    This function assumes the directory structure described at the start of
    this module. Further, it uses a dictionary called conf to connect to the
    database.

    :param base_path [string] The path to the directory containing the
     subdirectories described at the start of this module. Specifically,
     base_path + "/spatial_objects/" should contain a file "manzana_link.csv"
    :param conf [dict] A dict specifying information needed to connect to
     and load to the database. Expects the fields "USER", "PASSWORD", and
     "HOST". See dbu.csv_to_db_cmds() describing how the connection is
     established.
    :param schema_name [string] The name to the (assumed existing) schema to
     add the table to. Defaults to the schema name.
    :return None
    :rtype None
    :side-effects Loads the table manzana_link to the database.
    """
    logger.info("Loading manzana links...")
    data_path = os.path.join(base_path, "spatial_objects", "manzana_link.csv")
    dbu.csv_to_db_table(data_path, conf, "manzana_link", schema_name)
def load_pub_sub(base_path, conf, schema_name="schema_name"):
    """Load a subset of the PUB data

    This function assumes the directory structure described at the start of
    this module. Further, it uses a dictionary called conf to connect to the
    database.

    :param base_path [string] The path to the directory containing the
     subdirectories described at the start of this module. Specifically,
     base_path + "/pub/" should contain PUB_subset.csv.
    :param conf [dict] A dict specifying information needed to connect to
     and load to the database. Expects the fields "USER", "PASSWORD", and
     "HOST". See dbu.csv_to_db_cmds() describing how the connection is
     established.
    :param schema_name [string] The name to the (assumed existing) schema to
     add the table to. Defaults to the schema name.
    :return None
    :rtype None
    :side-effects Loads the tables cuis_* to the database specified in conf.
    """
    logger.info("Loading PUB subset...")
    file_path = os.path.join(base_path, "pub", "PUB_subset.csv")
    os.system("sed -i 's/|/,/g' %s" % file_path)
    dbu.csv_to_db_table(file_path, conf, "pub_sub", schema_name)