def update_catalog(): from pydatajson import writers, DataJson # Chequeo que la caché del datajson exista antes de pasar su path como parámetro if not os.path.isfile(CACHE_FILENAME): # No existe, así que la genero update_datajson_cache() catalog = DataJson(CACHE_FILENAME) catalog['themeTaxonomy'] = catalog.get('themeTaxonomy', []) new_catalog_filename = '%s/catalog.xlsx' % tempfile.mkdtemp( dir=CACHE_DIRECTORY) writers.write_xlsx_catalog(catalog, new_catalog_filename) os.rename(new_catalog_filename, XLSX_FILENAME) os.rmdir(new_catalog_filename.replace('/catalog.xlsx', ''))
def nodes_to_df(input_path): """Lee los catálogos de la red de nodos a un DataFrame.""" with open(input_path) as f: nodes = json.load(f) rows = [] for jurisdiction in nodes["jurisdictions"]: for catalog in jurisdiction["catalogs"]: print("Leyendo catálogo '{}' de la jurisdiccion '{}' ({})".format( catalog["id"], jurisdiction["id"], jurisdiction["title"]), end=" ") try: dj = DataJson(catalog["url_json"]) print("...OK") except Exception as e: dj = {} print("...ERROR") print(e) rows.append({ "jurisdiction_id": jurisdiction["id"], "jurisdiction_title": jurisdiction["title"], "catalog_id": catalog["id"], "catalog_title": dj.get("title"), "catalog_homepage": dj.get("homepage"), "catalog_url_json": catalog["url_json"], "catalog_url_xlsx": catalog.get("url_xlsx"), "catalog_url_datosgobar": catalog.get("url_datosgobar") }) fields = [ "jurisdiction_id", "jurisdiction_title", "catalog_id", "catalog_title", "catalog_homepage", "catalog_url_json", "catalog_url_xlsx", "catalog_url_datosgobar" ] return pd.DataFrame(rows)[fields]
def __init__(self, node: Node, task: IndexMetadataTask, index: str): self.node = node self.task = task self.index_name = index self.elastic: Elasticsearch = connections.get_connection() if not self.elastic.indices.exists(self.index_name): init_index(self.index_name) self.fields_meta = {} self.init_fields_meta_cache() try: data_json = DataJson(node.catalog_url) themes = data_json.get('themeTaxonomy', []) self.themes = self.get_themes(themes) except Exception: raise ValueError("Error de lectura de los themes del catálogo")