def load_home_verification(base_path, conf, schema_name="schema_name"): """Load the home verification data This function assumes the directory structure described at the start of this module. Further, it uses a dictionary called conf to connect to the database. :param base_path [string] The path to the directory containing the subdirectories described at the start of this module. Specifically, base_path + "/home_verification" should contain a file "home_verification.rar" :param conf [dict] A dict specifying information needed to connect to and load to the database. Expects the fields "USER", "PASSWORD", and "HOST". See dbu.csv_to_db_cmds() describing how the connection is established. :param schema_name [string] The name to the (assumed existing) schema to add the table to. Defaults to the schema name. :return None :rtype None :side-effects Loads the tables cuis_* to the database specified in conf. """ logger.info("Loading home verification data...") dbu.unpack_in_place(os.path.join(base_path, "home_verification"), "VERIFICACION.rar") for year in ["2013", "2014"]: cur_path = os.path.join(base_path, "home_verification", "VERIFICACION", year) cur_year = "home_verification_" + year dbu.dbfs_to_csv(cur_path) dbu.merge_csvs_in_dir(cur_path, cur_year + ".csv") dbu.csv_to_db_table(os.path.join(cur_path, cur_year + ".csv"), conf, cur_year, schema_name)
def load_equivalencias(base_path, conf, schema_name="schema_name"): """Load equivalencias data This function assumes the directory structure described at the start of this module. Further, it uses a dictionary called conf to connect to the database. :param base_path [string] The path to the directory containing the subdirectories described at the start of this module. This specifically needs a directory called equivalencias containing the file CATALOGO_LOCALIDADES_EQUIVALENCIAS.rar :param conf [dict] A dict specifying information needed to connect to and load to the database. Expects the fields "USER", "PASSWORD", and "HOST". See dbu.csv_to_db_cmds() describing how the connection is established. :param schema_name [string] The name to the (assumed existing) schema to add the table to. Defaults to the schema name. :return None :rtype None :side-effects Loads the tables equivalencias data to the database specified in conf. """ archive_dir = os.path.join(base_path, "equivalencias") dbu.unpack_all_in_dir(archive_dir) equiv_file = os.path.join(archive_dir, "TABLA_DE_EQUIVALENCIA_ABR16") dbu.iconv_conversion(equiv_file + ".dbf") dbu.dbfs_to_csv(archive_dir) dbu.csv_to_db_table(equiv_file + ".csv", conf, "equivalencias", schema_name)
def load_geo_pub(base_path, conf, schema_name="schema_name"): """Load the geographic information in PUB This function assumes the directory structure described at the start of this module. Further, it uses a dictionary called conf to connect to the database. :param base_path [string] The path to the directory containing the subdirectories described at the start of this module. Specifically, base_path + "/geo_pub" should contain *.rar files containing the data. :param conf [dict] A dict specifying information needed to connect to and load to the database. Expects the fields "USER", "PASSWORD", and "HOST". See dbu.csv_to_db_cmds() describing how the connection is established. :param schema_name [string] The name to the (assumed existing) schema to add the table to. Defaults to the schema name. :return None :rtype None :side-effects Loads the tables cuis_* to the database specified in conf. """ logger.info("Loading geographic pub data...") dbu.unpack_all_in_dir(os.path.join(base_path, "geo_pub")) dbu.dbfs_to_csv(os.path.join(base_path, "geo_pub")) dbu.merge_csvs_in_dir(os.path.join(base_path, "geo_pub"), "geo_pub.csv") dbu.csv_to_db_table(os.path.join(base_path, "geo_pub", "geo_pub.csv"), conf, "geo_pub", schema_name)
def load_cve_mapping(base_path, conf, schema_name="schema_name"): """Load the CVE mapping catalogs This function assumes the directory structure described at the start of this module. Further, it uses a dictionary called conf to connect to the database. These data are useful for normalizing the names of localidads and municipios across data sets. :param base_path [string] The path to the directory containing the subdirectories described at the start of this module. This function specifically requires a subdirectory cve_mapping/ containing the files localidads.dbf and municipalities.dbf :param conf [dict] A dict specifying information needed to connect to and load to the database. Expects the fields "USER", "PASSWORD", and "HOST". See dbu.csv_to_db_cmds() describing how the connection is established. :param schema_name [string] The name to the (assumed existing) schema to add the table to. Defaults to the schema name. :return None :rtype None :side-effects Loads the cve mapping catalogsto the database """ logger.info("Loading the CVE mapping data...") cur_path = os.path.join(base_path, "cve_mapping") dbu.dbfs_to_csv(cur_path) tables_to_load = { "cve_mapping_localities": os.path.join(cur_path, "localidads.csv"), "cve_mapping_municipalities": os.path.join(cur_path, "municipalities.csv"), } dbu.csv_to_db_table_wrapper(tables_to_load, conf, schema_name)
def load_geo_sifode(base_path, conf, schema_name="schema_name"): """Load the geographical data This function assumes the directory structure described at the start of this module. Further, it uses a dictionary called conf to connect to the database. :param base_path [string] The path to the directory containing the subdirectories described at the start of this module. Specifically, base_path + "/sifode" should contain .rar files with names like "SIFODE_DOMICILIOS_NOMVIAL_1.rar"... :param conf [dict] A dict specifying information needed to connect to and load to the database. Expects the fields "USER", "PASSWORD", and "HOST". See dbu.csv_to_db_cmds() describing how the connection is established. :param schema_name [string] The name to the (assumed existing) schema to add the table to. Defaults to the schema name. :return None :rtype None :side-effects Loads the geographical sifode data to the database. """ logger.info("Loading geographic sifode data...") sifode_dir = os.path.join(base_path, "sifode") dbu.unpack_all_in_dir(sifode_dir) dbu.dbfs_to_csv(sifode_dir) dbu.merge_csvs_in_dir(sifode_dir, "sifode.csv") dbu.csv_to_db_table(os.path.join(sifode_dir, "sifode.csv"), conf, "geo_cuis", schema_name)