def main(): HenriqueLogger.attach_stderr2loggers() logger = HenriqueLogger.func_level2logger(main, logging.DEBUG) j_list = list(Port2MongoDB.postgres2j_iter()) logger.debug({"# j_list": len(j_list)}) Port2MongoDB.j_iter2mongodb(j_list)
def h_qterm2j_doc(cls): logger = HenriqueLogger.func_level2logger(cls.h_qterm2j_doc, logging.DEBUG) j_doc_list = list(TradegoodDocument.j_doc_iter_all()) jpath = TradegoodDocument.jpath_names() h_list = [{ cls._query2qterm(name): j_doc } for j_doc in j_doc_list for name_list_lang in jdown(j_doc, jpath).values() for name in name_list_lang] logger.debug({ "h_list": iter2duplicate_list( lmap(lambda h: iter2singleton(h.keys()), h_list)), "jpath": jpath, "j_doc_list[0]": j_doc_list[0], "query[0]": jdown(j_doc_list[0], jpath) }) qterm_list_duplicate = iter2duplicate_list( map(lambda h: iter2singleton(h.keys()), h_list)) h_list_clean = lfilter( lambda h: iter2singleton(h.keys()) not in qterm_list_duplicate, h_list) h = merge_dicts(h_list_clean, vwrite=vwrite_no_duplicate_key) return h
def postgres2j_iter(cls): logger = HenriqueLogger.func_level2logger(cls.postgres2j_iter, logging.DEBUG) with PostgresHub.cursor() as cursor: sql = SQL("SELECT * from {}").format(Identifier(PortTable.NAME)) cursor.execute(sql) for t in PostgresTool.fetch_iter(cursor): j = t[PortTable.index_json()] # logger.debug({"j":j}) h_lang2names = {} for lang, name in j["name"].items(): h_lang2names[lang] = lchain(h_lang2names.get(lang, []), [name]) for lang, nickname_list in j.get("nicknames", {}).items(): h_lang2names[lang] = lchain(h_lang2names.get(lang, []), nickname_list) j[PortDocument.F.NAMES] = { lang: luniq(name_list) for lang, name_list in h_lang2names.items() } for k in ["name", "nicknames"]: j.pop(k, None) # logger.debug({'j["names"]':j["names"]}) j[PortDocument.F.Key] = j["names"]["en"][0] yield j
def test_01(self): logger = HenriqueLogger.func_level2logger(self.test_01, logging.DEBUG) with PostgresHub.cursor() as cursor: cursor.execute("""SELECT * from unchartedwatersonline_port""") l = cursor.fetchall() logger.debug({"l": l})
def j_port_lang2name(cls, j_port, lang): logger = HenriqueLogger.func_level2logger(cls.j_port2culture_name, logging.DEBUG) name_list = jdown(j_port, [cls.F.NAMES, lang]) logger.debug({"j_port":j_port, "lang":lang, "name_list":name_list, }) return name_list[0]
def j_iter2mongodb(cls, j_iter, chunk_size=100000): logger = HenriqueLogger.func_level2logger(cls.j_iter2mongodb, logging.DEBUG) j_list = list(j_iter) n = len(j_list) logger.debug({"n": n}) write_concern = WriteConcern(w=3, wtimeout=chunk_size) collection = PortCollection.collection(write_concern=write_concern) for i, j_list_chunk in enumerate( ChunkToolkit.chunk_size2chunks(j_list, chunk_size)): logger.debug({"i/n": "{}/{}".format(i * chunk_size, n)}) j_pair_list = [ (JToolkit.j_jpaths2filtered(j, [[PortDocument.F.KEY]]), j) for j in j_list_chunk ] MongoDBToolkit.j_pair_iter2upsert(collection, j_pair_list)
def conn(cls): logger = HenriqueLogger.func_level2logger(cls.conn, logging.DEBUG) host = EnvToolkit.k2v(cls.Env.HOST) port = EnvToolkit.k2v(cls.Env.PORT) user = EnvToolkit.k2v(cls.Env.USER) password = EnvToolkit.k2v(cls.Env.PASSWORD) dbname = EnvToolkit.k2v(cls.Env.DBNAME) j_connect = { "host": host, "port": port, "user": user, "password": password, "dbname": dbname, } logger.debug({"j_connect": j_connect}) conn = psycopg2.connect(**j_connect) return conn
def setUpClass(cls): HenriqueLogger.attach_stderr2loggers()