def __init__(self): try: params = config(section='event_processor') self.conn = psycopg2.connect(**params) except (Exception) as error: print(error) self.conn = None raise
def __init__(self): try: params = config(section='event_processor') self.conn = psycopg2.connect(**params) mdb_config = config(section='eao_data') self.mdb_client = MongoClient( 'mongodb://%s:%s@%s:%s/%s' % (mdb_config['user'], mdb_config['password'], mdb_config['host'], mdb_config['port'], mdb_config['database'])) self.mdb_db = self.mdb_client[mdb_config['database']] except (Exception) as error: print(error) print(traceback.print_exc()) self.conn = None self.mdb_client = None self.mdb_db = None raise
def mongo_sample_data(n_inspections, n_observations): mdb_config = config(section='eao_data') client = MongoClient('mongodb://%s:%s@%s:%s' % (mdb_config['user'], mdb_config['password'], mdb_config['host'], mdb_config['port'])) db = client[mdb_config['database']] users = db['User'] teams = db['Team'] inspections = db['Inspection'] observations = db['Observation'] audios = db['Audio'] photos = db['Photo'] videos = db['Video'] user = gen_user() user_id = users.insert_one(user).inserted_id user = users.find_one({"_id": user_id}) team = gen_team(user) team_id = teams.insert_one(team).inserted_id team = teams.find_one({"_id": team_id}) for i in range(n_inspections): inspection = gen_inspection(user, team) inspection_id = inspections.insert_one(inspection).inserted_id inspection = inspections.find_one({"_id": inspection_id}) for j in range(n_observations): observation = gen_observation(inspection) observation_id = observations.insert_one(observation).inserted_id observation = observations.find_one({"_id": observation_id}) audio = gen_audio(observation) audio_id = audios.insert_one(audio).inserted_id photo = gen_photo(observation) photo_id = photos.insert_one(photo).inserted_id video = gen_video(observation) video_id = videos.insert_one(video).inserted_id
async def process_credential_queue(self, single_thread=False): sql1 = """SELECT RECORD_ID, SYSTEM_TYPE_CD, CREDENTIAL_TYPE_CD, CREDENTIAL_ID, CREDENTIAL_JSON, SCHEMA_NAME, SCHEMA_VERSION, ENTRY_DATE FROM CREDENTIAL_LOG WHERE RECORD_ID IN ( SELECT RECORD_ID FROM CREDENTIAL_LOG WHERE PROCESS_DATE is null ORDER BY RECORD_ID LIMIT """ + str(CREDS_BATCH_SIZE) + """ ) ORDER BY RECORD_ID;""" sql1a = """SELECT count(*) cnt FROM CREDENTIAL_LOG WHERE PROCESS_DATE is null""" """ Connect to the PostgreSQL database server """ #conn = None cur = None try: params = config(section='event_processor') pool = mpool.ThreadPool(MAX_CREDS_REQUESTS) loop = asyncio.get_event_loop() tasks = [] http_client = aiohttp.ClientSession() # create a cursor cred_count = 0 cur = self.conn.cursor() cur.execute(sql1a) row = cur.fetchone() if row is not None: cred_count = row[0] cur.close() cur = None i = 0 cred_count_remaining = cred_count start_time = time.perf_counter() processing_time = 0 processed_count = 0 max_processing_time = 10 * 60 while 0 < cred_count_remaining and processing_time < max_processing_time: # create a cursor cur = self.conn.cursor() cur.execute(sql1) row = cur.fetchone() credentials = [] cred_owner_id = '' while row is not None: i = i + 1 processed_count = processed_count + 1 if processed_count >= 100: print('>>> Processing {} of {} credentials.'.format( i, cred_count)) processing_time = time.perf_counter() - start_time print('Processing: ' + str(processing_time)) processed_count = 0 credential = { 'RECORD_ID': row[0], 'SYSTEM_TYP_CD': row[1], 'CREDENTIAL_TYPE_CD': row[2], 'CREDENTIAL_ID': row[3], 'CREDENTIAL_JSON': row[4], 'SCHEMA_NAME': row[5], 'SCHEMA_VERSION': row[6], 'ENTRY_DATE': row[7] } # TODO make sure to include all credentials for the same client id within the same batch if CREDS_REQUEST_SIZE <= len( credentials ): # and credential['CORP_NUM'] != cred_owner_id: post_creds = credentials.copy() creds_task = loop.create_task( post_credentials(http_client, self.conn, post_creds)) tasks.append(creds_task) #await asyncio.sleep(1) if single_thread: # running single threaded - wait for each task to complete await creds_task else: # multi-threaded, check if we are within MAX_CREDS_REQUESTS active requests active_tasks = len( [task for task in tasks if not task.done()]) #print("Added task - active = ", active_tasks, ", posted creds = ", len(post_creds)) while active_tasks >= MAX_CREDS_REQUESTS: #await asyncio.gather(*tasks) done, pending = await asyncio.wait( tasks, return_when=asyncio.FIRST_COMPLETED) active_tasks = len(pending) # print("Waited task - active = ", active_tasks) credentials = [] cred_owner_id = '' credentials.append(credential) #TODO cred_owner_id = credential['CORP_NUM'] row = cur.fetchone() cur.close() cur = None if 0 < len(credentials): post_creds = credentials.copy() tasks.append( loop.create_task( post_credentials(http_client, self.conn, post_creds))) credentials = [] cred_owner_id = '' # wait for the current batch of credential posts to complete for response in await asyncio.gather(*tasks): pass # print('response:' + response) tasks = [] print('>>> Processing {} of {} credentials.'.format( i, cred_count)) processing_time = time.perf_counter() - start_time print('Processing: ' + str(processing_time)) cur = self.conn.cursor() cur.execute(sql1a) row = cur.fetchone() if row is not None: cred_count_remaining = row[0] cur.close() cur = None except (Exception, psycopg2.DatabaseError) as error: print(error) print(traceback.print_exc()) finally: await http_client.close() if cur is not None: cur.close()
#!/usr/bin/python import psycopg2 from pymongo import MongoClient import datetime from von_pipeline.config import config from tests.gen_test_data import * print("TODO load some test data") mongo_sample_data(3, 2) mdb_config = config(section='eao_data') print(mdb_config['host'], mdb_config['port'], mdb_config['database']) client = MongoClient('mongodb://%s:%s@%s:%s' % (mdb_config['user'], mdb_config['password'], mdb_config['host'], mdb_config['port'])) db = client[mdb_config['database']] collections = db.collection_names(include_system_collections=False) print(collections)