def test_transfer_coll(self): # TODO: check cursor = pg.conn.cursor() # reset CM in the database cursor.execute(query["table_drop_purr_cm"]) cursor.execute(query["table_drop_company"]) cursor.execute(query["table_drop_employee"]) create_and_populate_company_mdb() cm.create_table(pg, mock.coll_config) # collection which will be transferred collection = mock.coll_names[0] relation = mock.rel_names[0] coll_config = copy.deepcopy(mock.coll_config_company_employee) ex = extractor.Extractor(pg, mongo, mock.setup_pg, mock.settings, coll_config) ex.transfer_coll(collection) cursor.execute("SELECT count(*) FROM %s" % (relation)) cnt_pg = cursor.fetchone() cnt_mongo = mongo[collection].count() print("Postgres:", cnt_pg[0]) print("MongoDB:", cnt_mongo) cursor.close() del ex assert cnt_mongo == cnt_pg[0]
def test_update_coll_map_changed(self): # collection map is changed and # extractor.coll_map_cur needs to be updated # purr_collection_map needs old values cursor = pg.conn.cursor() cursor.execute(query["table_drop_purr_cm"]) # create table for CM in the database cm.create_table(pg, mock.coll_config) coll_config = copy.deepcopy(mock.coll_config_company_employee) ex = extractor.Extractor(pg, mongo, mock.setup_pg, mock.settings, coll_config) # this pulls the map from the db (mock.coll_config) ex.update_coll_map() print("NEW", ex.coll_map_cur) print("MOCK", mock.coll_config_db_company_employee) # case 2: collection map is changed and # extractor.coll_map_cur needs to be updated cursor.close() res = (ex.coll_map_cur == mock.coll_config_db) del ex assert res
def test_transfer(self): # drop/truncate table # create schema # transfers collections cursor = pg.conn.cursor() # reset CM in the database cursor.execute(query["table_drop_purr_cm"]) cursor.execute(query["table_drop_company"]) cursor.execute(query["table_drop_employee"]) create_and_populate_company_mdb() cm.create_table(pg, mock.coll_config) # collection which will be transferred collection_names = mock.coll_names coll_config = copy.deepcopy(mock.coll_config_company_employee) ex = extractor.Extractor(pg, mongo, mock.setup_pg, mock.settings, coll_config) ex.transfer(collection_names) for i in range(len(mock.rel_names)): relation = mock.rel_names[i] cursor.execute("SELECT count(*) FROM %s" % (relation)) cnt_pg = cursor.fetchone() cnt_mongo = mongo[collection_names[i]].count() if cnt_mongo != cnt_pg[0]: print("Postgres:", cnt_pg[0]) print("MongoDB:", cnt_mongo) assert False cursor.close() del ex assert True
def test_update_multiple(self): # no unset # TODO: test when there is value in unset cursor = pg.conn.cursor() # reset CM in the database cursor.execute(query["table_drop_purr_cm"]) cursor.execute(query["table_drop_company"]) create_and_populate_company_mdb() cm.create_table(pg, mock.coll_config) coll = mock.coll_names[0] rel = mock.rel_names[0] docs = [] mock_updated = mock.data_mdb_company_updated for doc in mongo[coll].find(): docs.append({ "_id": doc["_id"], "active": mock_updated["active"], "signupCode": mock_updated["signupCode"], "domains": mock_updated["domains"] }) coll_config = copy.deepcopy(mock.coll_config_company_employee) ex = extractor.Extractor(pg, mongo, mock.setup_pg, mock.settings, coll_config) schema = mock.setup_pg["schema_name"] r = relation.Relation(pg, schema, rel, True) attrs = mock.attrs_company types = mock.types_company r.create(attrs, types) ex.update_multiple(docs, r, coll) cmd = "SELECT %s FROM %s order by id" % (", ".join(attrs[1:]), rel) cursor.execute(cmd) mocked = mock.data_pg_company_updated_no_id res = cursor.fetchall() print("MOCKED") print(mocked) print("RESULT") print(res) cursor.close() del r del ex assert mocked == res
def create_and_populate_company_pg(): cursor = pg.conn.cursor() cursor.execute(query["table_drop_purr_cm"]) # create table for CM in the database cm.create_table(pg, mock.coll_config) cursor.execute(query["table_drop_company"]) cursor.execute(query["table_create_company"]) cursor.execute("""insert into company( id, active, domains, signup_code ) values( '12345', 'true', '{"domain": ["pelotonland.com"]}', 'xfLfdsFD3S')""") cursor.close()
def test_table_untrack(self): # TODO: # - check if the table is left in the PG database # - try to insert new data to mongodb and check # if its left out from the data transfer # this one should remove employee from the collection map create_and_populate_company_mdb() create_and_populate_employee_mdb() cursor = pg.conn.cursor() cursor.execute(query["table_drop_company"]) cursor.execute(query["table_drop_employee"]) cursor.execute(query["table_drop_purr_cm"]) # create table for CM in the database cm.create_table(pg, mock.coll_config) coll_config = copy.deepcopy(mock.coll_config_company_employee) ex = extractor.Extractor(pg, mongo, mock.setup_pg, mock.settings, coll_config) # # changes extractor's collection definition for every collection # # and transfers coll_map_old = copy.deepcopy(mock.coll_config_db_company_employee) coll_map_new = copy.deepcopy(mock.coll_config_db) ex.table_untrack(coll_map_old, coll_map_new) mocked = copy.deepcopy(mock.coll_config) if (len(ex.coll_def) != len(mocked)): print("NEW", ex.coll_def) print("OlD", mocked) assert False for k, v in ex.coll_def.items(): if v != mocked[k]: print(k) print(v[":columns"]) print(mocked[k][":columns"]) if v[":columns"] != mocked[k][":columns"]: assert False cursor.close() del ex assert True
def test_table_track(self): # TODO: see if the table is transfered to the PG database create_and_populate_company_mdb() create_and_populate_employee_mdb() cursor = pg.conn.cursor() cursor.execute(query["table_drop_company"]) cursor.execute(query["table_drop_employee"]) cursor.execute(query["table_drop_purr_cm"]) coll_config = copy.deepcopy(mock.coll_config) # create table for CM in the database cm.create_table(pg, mock.coll_config) ex = extractor.Extractor(pg, mongo, mock.setup_pg, mock.settings, coll_config) # # changes extractor's collection definition for every collection # # and transfers coll_map_old = mock.coll_config_db coll_map_new = mock.coll_config_db_company_employee ex.table_track(coll_map_old, coll_map_new) mocked = mock.coll_config_company_employee if (len(ex.coll_def) != len(mocked)): assert False for k, v in ex.coll_def.items(): if v != mocked[k]: print(k) print(v[":columns"]) print(mocked[k][":columns"]) if v[":columns"] != mocked[k][":columns"]: assert False cursor.execute(query["table_drop_purr_cm"]) cursor.close() del ex assert True
def test_update_coll_map_unchanged(self): # collection map is not changed # extractor.coll_map_cur stays the same cursor = pg.conn.cursor() cursor.execute(query["table_drop_purr_cm"]) # create table for CM in the database cm.create_table(pg, mock.coll_config) coll_config = copy.deepcopy(mock.coll_config) ex = extractor.Extractor(pg, mongo, mock.setup_pg, mock.settings, coll_config) ex.update_coll_map() print("OLD", ex.coll_map_cur) print("NEW", mock.coll_config_db) cursor.close() res = (ex.coll_map_cur == mock.coll_config_db) del ex assert res
def start(settings, coll_map): """ Starts Purr. Returns ------- - Parameters ---------- settings : dict : basic settings for both PG and MongoDB (connection strings, schema name) coll_map : dict : config file for collections TODO ---- - create table with attributes and types """ logger.info("Starting Purr v%s ..." % get_version(), CURR_FILE) logger.info("PID=%s" % os.getpid()) mode_tailing = False if settings["tailing"] or settings["tailing_from"] or settings[ "tailing_from"]: mode_tailing = True logger.info("TAILING=%s" % ("ON" if mode_tailing else "OFF")) setup_pg = settings["postgres"] setup_mdb = settings["mongo"] pg = postgres.PgConnection(setup_pg["connection"]) mongo = mongodb.MongoConnection(setup_mdb) cm.create_table(pg, coll_map, setup_pg["schema_name"]) transfer_info.save_logs_to_db(pg, setup_pg["schema_name"]) ex = extractor.Extractor(pg, mongo.conn, setup_pg, settings, coll_map) start_date_time = datetime.utcnow() if mode_tailing: try: # first just transfer the data tailing_start = False thr_transfer = transfer.TransferThread(settings, coll_map, pg, mongo, ex, tailing_start) thr_transfer.start() # wait until thread is finished thr_transfer.join() handle_coll_map_changes(settings, coll_map, pg, mongo, ex, start_date_time) except (KeyboardInterrupt, SystemExit): logger.error('Stopping transfer.', CURR_FILE) for t in THREADS: t.stop() raise SystemExit() except Exception as ex: logger.error("Unable to start transfer thread. Details: %s" % ex, CURR_FILE) raise SystemExit() else: transfer.start(ex, coll_map)