def _run(self): logger.info('Start UploadFile worker', extra=journal_context( {"MESSAGE_ID": DATABRIDGE_START_UPLOAD}, {})) self.immortal_jobs = { 'upload_to_doc_service': spawn(self.upload_to_doc_service), 'upload_to_tender': spawn(self.upload_to_tender), 'retry_upload_to_doc_service': spawn(self.retry_upload_to_doc_service), 'retry_upload_to_tender': spawn(self.retry_upload_to_tender) } try: while not self.exit: gevent.sleep(self.delay) for name, job in self.immortal_jobs.items(): if job.dead: logger.warning( "{} worker dead try restart".format(name), extra=journal_context( { "MESSAGE_ID": 'DATABRIDGE_RESTART_{}'.format( name.lower()) }, {})) self.immortal_jobs[name] = gevent.spawn( getattr(self, name)) logger.info("{} worker is up".format(name)) except Exception as e: logger.error(e) gevent.killall(self.immortal_jobs.values(), timeout=5)
def get_tenders(self, params={}, direction=""): response = self.initialize_sync(params=params, direction=direction) while not (params.get('descending') and not len(response.data) and params.get('offset') == response.next_page.offset): tenders = response.data if response else [] params['offset'] = response.next_page.offset for tender in tenders: if self.should_process_tender(tender): yield tender else: logger.info( 'Skipping tender {} with status {} with procurementMethodType {}' .format(tender['id'], tender['status'], tender['procurementMethodType']), extra=journal_context( {"MESSAGE_ID": DATABRIDGE_INFO}, params={"TENDER_ID": tender['id']})) logger.info('Sleep {} sync...'.format(direction), extra=journal_context( {"MESSAGE_ID": DATABRIDGE_SYNC_SLEEP})) gevent.sleep(self.delay + self.sleep_change_value.time_between_requests) try: response = self.tenders_sync_client.sync_tenders( params, extra_headers={'X-Client-Request-ID': generate_req_id()}) self.sleep_change_value.decrement() except ResourceError as re: if re.status_int == 429: self.sleep_change_value.increment() logger.info("Received 429, will sleep for {}".format( self.sleep_change_value.time_between_requests)) else: raise re
def _run(self): logger.info('Start EDR Handler', extra=journal_context( {"MESSAGE_ID": DATABRIDGE_START_EDR_HANDLER}, {})) self.immortal_jobs = { 'get_edr_data': spawn(self.get_edr_data), 'retry_get_edr_data': spawn(self.retry_get_edr_data) } try: while not self.exit: gevent.sleep(self.delay) for name, job in self.immortal_jobs.items(): if job.dead: logger.warning( "EDR handler worker {} dead try restart".format( name), extra=journal_context( { "MESSAGE_ID": "DATABRIDGE_RESTART_{}".format( name.lower()) }, {})) self.immortal_jobs[name] = gevent.spawn( getattr(self, name)) logger.info("EDR handler worker {} is up".format(name)) except Exception as e: logger.error(e) gevent.killall(self.immortal_jobs.values(), timeout=5)
def get_tenders_backward(self): logger.info('Start backward data sync worker...') params = { 'opt_fields': 'status,procurementMethodType', 'descending': 1, 'mode': '_all_' } try: for tender in self.get_tenders(params=params, direction="backward"): logger.info( 'Backward sync: Put tender {} to process...'.format( tender['id']), extra=journal_context( {"MESSAGE_ID": DATABRIDGE_TENDER_PROCESS}, {"TENDER_ID": tender['id']})) self.filtered_tender_ids_queue.put(tender['id']) except Exception as e: logger.warning('Backward worker died!', extra=journal_context( {"MESSAGE_ID": DATABRIDGE_WORKER_DIED}, {})) logger.exception("Message: {}".format(e.message)) return False else: logger.info('Backward data sync finished.') return True
def process_item(self, response, tender, tender_id, item, item_name): logger.info( 'Processing tender {} bid {} {} {}'.format(tender['id'], item_id(item), item_name, item['id']), extra=journal_context({"MESSAGE_ID": DATABRIDGE_TENDER_PROCESS}, journal_item_params(tender, item))) if self.should_process_item(item): if item_name == 'award': for supplier in item['suppliers']: self.process_award_supplier(response, tender, item, supplier) elif item_name == 'qualification': self.process_qualification(response, tender, item) else: logger.info( 'Tender {} bid {} {} {} is not in status pending or award has already document ' 'with documentType registerExtract.'.format( tender_id, item_id(item), item_name, item['id']), extra=journal_context( params={ "TENDER_ID": tender['id'], "BID_ID": item_id(item), journal_item_name(item): item['id'] }))
def retry_upload_to_doc_service(self): """Get data from retry_upload_to_doc_service_queue; If upload were successful put Data obj to upload_to_tender_queue, otherwise put Data obj back to retry_upload_file_queue""" while not self.exit: self.services_not_available.wait() try: tender_data = self.retry_upload_to_doc_service_queue.peek() document_id = tender_data.file_content.get('meta', {}).get('id') except LoopExit: gevent.sleep(0) continue item_name_id = tender_data.item_name[:-1].upper() + "_ID" try: # create patch request to award/qualification with document to upload self.client.headers.update( {'X-Client-Request-ID': document_id}) response = self.client_upload_to_doc_service(tender_data) except Exception as e: logger.warning( 'Exception while uploading file to doc service {} doc_id: {}. Message: {}. ' 'Lost tender_data'.format(data_string(tender_data), document_id, e.message), extra=journal_context( { "MESSAGE_ID": DATABRIDGE_UNSUCCESS_UPLOAD_TO_DOC_SERVICE }, params={ "TENDER_ID": tender_data.tender_id, item_name_id: tender_data.item_id, "DOCUMENT_ID": document_id })) logger.exception("Message: {}".format(e.message)) self.retry_upload_to_doc_service_queue.get() self.process_tracker.update_items_and_tender( tender_data.tender_id, tender_data.item_id) raise e else: if response.status_code == 200: self.move_to_tender_queue('retry', tender_data, response, document_id, item_name_id) else: logger.info( 'Not successful response in retry from document service while uploading {} {} {} {}. Response {}' .format(tender_data.tender_id, tender_data.item_name, tender_data.item_id, document_id, response.status_code), extra=journal_context( { "MESSAGE_ID": DATABRIDGE_UNSUCCESS_UPLOAD_TO_DOC_SERVICE. format('_retry') }, params={ "TENDER_ID": tender_data.tender_id, item_name_id: tender_data.item_id, "DOCUMENT_ID": document_id })) gevent.sleep(0)
def upload_to_doc_service(self): """Get data from upload_to_doc_service_queue; Create file of the Data.file_content data; If upload successful put Data object to upload_file_to_tender, otherwise put Data to retry_upload_file_queue.""" while not self.exit: self.services_not_available.wait() try: tender_data = self.upload_to_doc_service_queue.peek() document_id = tender_data.file_content.get('meta', {}).get('id') except LoopExit: gevent.sleep(0) continue item_name_id = tender_data.item_name[:-1].upper() + "_ID" try: response = self.doc_service_client.upload( file_name, create_file(tender_data.file_content), 'application/yaml', headers={'X-Client-Request-ID': document_id}) except Exception as e: logger.warning( 'Exception while uploading file to doc service {}. Message: {}. ' 'Put tender_data to retry queue '.format( data_string(tender_data), e.message), extra=journal_context( { "MESSAGE_ID": DATABRIDGE_UNSUCCESS_UPLOAD_TO_DOC_SERVICE }, params={ "TENDER_ID": tender_data.tender_id, item_name_id: tender_data.item_id, "DOCUMENT_ID": document_id })) logger.exception("Message: {}".format(e.message)) self.retry_upload_to_doc_service_queue.put(tender_data) self.upload_to_doc_service_queue.get() else: if response.status_code == 200: self.move_to_tender_queue('', tender_data, response, document_id, item_name_id) else: logger.info( 'Not successful response from document service while uploading {} doc_id: {}. Response {}' .format(data_string(tender_data), document_id, response.status_code), extra=journal_context( { "MESSAGE_ID": DATABRIDGE_UNSUCCESS_UPLOAD_TO_DOC_SERVICE }, params={ "TENDER_ID": tender_data.tender_id, item_name_id: tender_data.item_id, "DOCUMENT_ID": document_id })) self.retry_upload_to_doc_service_queue.put(tender_data) self.upload_to_doc_service_queue.get() gevent.sleep(0)
def retry_process_tender_data(self, tender_data): logger.info('Get {} from retry_edrpou_codes_queue'.format(tender_data), extra=journal_context( {"MESSAGE_ID": DATABRIDGE_GET_TENDER_FROM_QUEUE}, tender_data.log_params())) self.retry_try_get_edr_data(tender_data) self.until_too_many_requests_event.wait()
def _restart_synchronization_workers(self): logger.warning('Restarting synchronization', extra=journal_context( {"MESSAGE_ID": DATABRIDGE_RESTART}, {})) for j in self.jobs: j.kill(timeout=5) self._start_synchronization_workers()
def successfully_uploaded_to_tender(self, tender_data, is_retry): logger.info('Successfully uploaded file to {} doc_id: {}'.format( tender_data, tender_data.doc_id()), extra=journal_context( {"MESSAGE_ID": DATABRIDGE_SUCCESS_UPLOAD_TO_TENDER}, tender_data.log_params())) self.remove_data(tender_data, is_retry)
def revive_job(self, name): logger.warning( "{} dead try restart".format(name), extra=journal_context( {"MESSAGE_ID": 'DATABRIDGE_RESTART_{}'.format(name.lower())}, {})) self.immortal_jobs[name] = gevent.spawn(getattr(self, name)) logger.info("{} is up".format(name))
def remove_invalid_item(self, tender, item, item_name, code): self.filtered_tender_ids_queue.get() logger.info( u'Tender {} bid {} {} {} identifier id {} is not valid.'.format( tender['id'], item_name, item_id(item), item['id'], code), extra=journal_context( {"MESSAGE_ID": DATABRIDGE_TENDER_NOT_PROCESS}, params=journal_item_params(tender, item)))
def move_data_to_retry_or_leave(self, response, tender_data, is_retry): logger.info('Not successful response from document service while uploading {} doc_id: {}. Response {}'. format(tender_data, tender_data.doc_id(), response.status_code), extra=journal_context({"MESSAGE_ID": DATABRIDGE_UNSUCCESS_UPLOAD_TO_DOC_SERVICE}, tender_data.log_params())) if not is_retry: self.retry_upload_to_doc_service_queue.put(tender_data) self.upload_to_doc_service_queue.get()
def put_tenders_to_process(self, params, direction): for tender in self.get_tenders(params=params, direction=direction): logger.info('Backward sync: Put tender {} to process...'.format( tender['id']), extra=journal_context( {"MESSAGE_ID": DATABRIDGE_TENDER_PROCESS}, {"TENDER_ID": tender['id']})) self.filtered_tender_ids_queue.put(tender['id'])
def run(self): logger.info('Start EDR API Data Bridge', extra=journal_context({"MESSAGE_ID": DATABRIDGE_START}, {})) self._start_jobs() counter = 0 try: while True: gevent.sleep(self.delay) self.check_services() if counter == 20: counter = 0 logger.info( 'Current state: Filtered tenders {}; Edrpou codes queue {}; Retry edrpou codes queue {};' 'Upload to doc service {}; Retry upload to doc service {}; ' 'Upload to tender {}; Retry upload to tender {}'. format( self.filtered_tender_ids_queue.qsize(), self.edrpou_codes_queue.qsize(), self.jobs['edr_handler'].retry_edrpou_codes_queue. qsize() if self.jobs['edr_handler'] else 0, self.upload_to_doc_service_queue.qsize(), self.jobs['upload_file']. retry_upload_to_doc_service_queue.qsize() if self.jobs['upload_file'] else 0, self.upload_to_tender_queue.qsize(), self.jobs['upload_file']. retry_upload_to_tender_queue.qsize() if self.jobs['upload_file'] else 0)) counter += 1 for name, job in self.jobs.items(): logger.debug("{}.dead: {}".format(name, job.dead)) if job.dead: logger.warning('Restarting {} worker'.format(name), extra=journal_context({ "MESSAGE_ID": DATABRIDGE_RESTART_WORKER })) self.jobs[name] = gevent.spawn(getattr(self, name)) except KeyboardInterrupt: logger.info('Exiting...') gevent.killall(self.jobs, timeout=5) except Exception as e: logger.error(e)
def decrease_request_frequency(self, re, tender_data): logger.info( "Accept 429 while uploading to {} doc_id: {}. Message {}".format( tender_data, tender_data.doc_id(), re.msg), extra=journal_context( { "MESSAGE_ID": DATABRIDGE_ITEM_STATUS_CHANGED_WHILE_PROCESSING }, tender_data.log_params())) self.sleep_change_value.increment()
def handle_status_response(self, response, tender_id): """Process unsuccessful request""" if response.status_code == 429: seconds_to_wait = response.headers.get('Retry-After', self.delay) logger.info( 'Too many requests to EDR API. Msg: {}, wait {} seconds.'. format(response.text, seconds_to_wait), extra=journal_context(params={"TENDER_ID": tender_id})) self.wait_until_too_many_requests(seconds_to_wait) elif is_payment_required(response): logger.warning( 'Payment required for requesting info to EDR. Message: {err}'. format(err=response.text), extra=journal_context(params={"TENDER_ID": tender_id})) else: logger.warning( 'Error appeared while requesting to EDR. Description: {err}'. format(err=response.text), extra=journal_context(params={"TENDER_ID": tender_id}))
def removing_data(self, re, tender_data, is_retry): logger.warning( "Accept {} while uploading to {} doc_id: {}. Message {}".format( re.status_int, tender_data, tender_data.doc_id(), re.msg), extra=journal_context( { "MESSAGE_ID": DATABRIDGE_ITEM_STATUS_CHANGED_WHILE_PROCESSING }, tender_data.log_params())) self.remove_data(tender_data, is_retry)
def _run(self): logger.info('Start Filter Tenders', extra=journal_context( {"MESSAGE_ID": DATABRIDGE_START_FILTER_TENDER}, {})) self.job = spawn(self.prepare_data) try: while not self.exit: gevent.sleep(self.delay) if self.job.dead: logger.warning( "Filter tender job die. Try to restart.", extra=journal_context( {"MESSAGE_ID": DATABRIDGE_RESTART_FILTER_TENDER}, {})) self.job = spawn(self.prepare_data) logger.info("filter tenders job restarted.") except Exception as e: logger.error(e) self.job.kill(timeout=5)
def move_to_tender_queue(self, tender_data, response, is_retry): data = tender_data data.file_content = dict(response.json(), **{'meta': {'id': tender_data.doc_id()}}) self.upload_to_tender_queue.put(data) if not is_retry: self.upload_to_doc_service_queue.get() else: self.retry_upload_to_doc_service_queue.get() logger.info('Successfully uploaded file to doc service {} doc_id: {}'.format(tender_data, tender_data.doc_id()), extra=journal_context({"MESSAGE_ID": DATABRIDGE_SUCCESS_UPLOAD_TO_DOC_SERVICE}, tender_data.log_params()))
def handle_error(self, re, tender_data, is_retry): logger.info( 'Error while uploading file to {} doc_id: {}. Status: {}. Message: {}' .format(tender_data, tender_data.doc_id(), getattr(re, "status_int", None), re.message), extra=journal_context( {"MESSAGE_ID": DATABRIDGE_UNSUCCESS_UPLOAD_TO_TENDER}, tender_data.log_params())) self.sleep_change_value.decrement() if not is_retry: self.retry_upload_to_tender_queue.put(tender_data) self.upload_to_tender_queue.get()
def check_doc_service(self): try: request("{host}:{port}/".format(host=self.doc_service_host, port=self.doc_service_port)) except RequestError as e: logger.info('DocService connection error, message {}'.format(e), extra=journal_context( {"MESSAGE_ID": DATABRIDGE_DOC_SERVICE_CONN_ERROR}, {})) raise e else: return True
def check_openprocurement_api(self): """Makes request to the TendersClient, returns True if it's up, raises RequestError otherwise""" try: self.client.head('/api/{}/spore'.format(self.api_version)) except (RequestError, ResourceError) as e: logger.info('TendersServer connection error, message {}'.format(e), extra=journal_context( {"MESSAGE_ID": DATABRIDGE_DOC_SERVICE_CONN_ERROR}, {})) raise e else: return True
def remove_bad_data(self, tender_data, e, is_retry): logger.exception('Exception while uploading file to doc service {} doc_id: {}. Message: {}. {}'. format(tender_data, tender_data.doc_id(), e, "Removed tender data" if is_retry else ""), extra=journal_context({"MESSAGE_ID": DATABRIDGE_UNSUCCESS_UPLOAD_TO_DOC_SERVICE}, tender_data.log_params())) if is_retry: self.retry_upload_to_doc_service_queue.get() self.process_tracker.update_items_and_tender(tender_data.tender_id, tender_data.item_id, tender_data.doc_id()) raise e else: self.retry_upload_to_doc_service_queue.put(tender_data) self.upload_to_doc_service_queue.get()
def _run(self): self.services_not_available.wait() logger.info('Start {} worker'.format(type(self).__name__), extra=journal_context( {"MESSAGE_ID": DATABRIDGE_START_UPLOAD}, {})) self.immortal_jobs = self._start_jobs() try: while not self.exit: gevent.sleep(self.delay) self.check_and_revive_jobs() except Exception as e: logger.error(e) gevent.killall(self.immortal_jobs.values(), timeout=5)
def check_proxy(self): """Check whether proxy is up and has the same sandbox mode (to prevent launching wrong pair of bot-proxy)""" try: self.proxy_client.health(self.sandbox_mode) except RequestException as e: logger.info('Proxy server connection error, message {} {}'.format( e, self.sandbox_mode), extra=journal_context( {"MESSAGE_ID": DATABRIDGE_PROXY_SERVER_CONN_ERROR}, {})) raise e else: return True
def retry_try_get_edr_data(self, tender_data): try: response = self.get_edr_data_request(tender_data.param(), tender_data.code, tender_data.doc_id()) tender_data.add_unique_req_id(response) except RetryException as re: self.handle_status_response(re.args[1], tender_data.tender_id) res_json = get_res_json(re.args[1]) if is_no_document_in_edr(re.args[1], res_json): self.move_data_nonexistent_edr(res_json, tender_data, True) else: logger.info( 'Put {} in back of retry_edrpou_codes_queue. Response {}'. format(tender_data, res_json), extra=journal_context(params=tender_data.log_params())) self.retry_edrpou_codes_queue.put( self.retry_edrpou_codes_queue.get()) gevent.sleep() except Exception as e: logger.info( 'Put {} in back of retry_edrpou_codes_queue. Error: {}'.format( tender_data, e.message), extra=journal_context(params=tender_data.log_params())) self.retry_edrpou_codes_queue.put( self.retry_edrpou_codes_queue.get()) gevent.sleep() else: if response.status_code == 429: seconds_to_wait = response.headers.get('Retry-After', self.delay) logger.info( 'retry_get_edr_id: Too many requests to EDR API. Msg: {}, wait {} seconds.' .format(response.text, seconds_to_wait), extra=journal_context(params=tender_data.log_params())) self.wait_until_too_many_requests(seconds_to_wait) elif response.status_code == 200: self.move_data_existing_edr(response, tender_data, True)
def get_data_and_move_to_upload_or_retry(self, tender_data): logger.info('Get {} from edrpou_codes_queue'.format(tender_data), extra=journal_context( {"MESSAGE_ID": DATABRIDGE_GET_TENDER_FROM_QUEUE}, tender_data.log_params())) self.until_too_many_requests_event.wait() response = self.proxy_client.verify( tender_data.param(), tender_data.code, headers={'X-Client-Request-ID': tender_data.doc_id()}) tender_data.add_unique_req_id(response) res_json = get_res_json(response) if is_no_document_in_edr(response, res_json): self.move_data_nonexistent_edr(response.json(), tender_data, False) elif response.status_code == 200: self.move_data_existing_edr(response, tender_data, False) else: self.handle_status_response(response, tender_data.tender_id) self.retry_edrpou_codes_queue.put(tender_data) logger.info( 'Put {} to retry_edrpou_codes_queue'.format(tender_data), extra=journal_context(params=tender_data.log_params())) self.edrpou_codes_queue.get()
def process_items_and_move(self, response, tender_id): self.sleep_change_value.decrement() if response.status_int == 200: tender = munchify(loads(response.body_string()))['data'] logger.info( 'Get tender {} from filtered_tender_ids_queue'.format(tender_id), extra=journal_context( {"MESSAGE_ID": DATABRIDGE_GET_TENDER_FROM_QUEUE}, params={"TENDER_ID": tender['id']})) if 'awards' in tender: self.process_items(response, tender, "award") elif 'qualifications' in tender: self.process_items(response, tender, "qualification") self.filtered_tender_ids_queue.get()
def handle_status_response(self, response, tender_id): """Process unsuccessful request""" if response.status_code == 429: seconds_to_wait = response.headers.get('Retry-After', self.delay) logger.info( 'Too many requests to EDR API. Msg: {}, wait {} seconds.'. format(response.text, seconds_to_wait), extra=journal_context(params={"TENDER_ID": tender_id})) self.wait_until_too_many_requests(seconds_to_wait) elif response.status_code == 403 and response.headers.get( 'content-type', '') == 'application/json' and response.json( ).get('errors')[0].get('description') == [{ 'message': 'Payment required.', 'code': 5 }]: logger.warning( 'Payment required for requesting info to EDR. Error description: {err}' .format(err=response.text), extra=journal_context(params={"TENDER_ID": tender_id})) else: logger.warning( 'Error appeared while requesting to EDR. Description: {err}'. format(err=response.text), extra=journal_context(params={"TENDER_ID": tender_id}))