def producer_call(self, product_info): logger.info( 'product info {0} is being sent to the kafka que from the producer' .format(product_info['asin'])) if product_info: ack = self.producer.send('product_to_scrape', {'product_info': product_info}) logger.debug( 'The kafka acknowledgment after sending the message to que {0}' .format(ack)) try: record_metadata = ack.get(timeout=10) logger.info('Record_meta data for kafka que {0}'.format( record_metadata)) except KafkaError: # Decide what to do if produce request failed... logger.error('kafka connection error {0}'.format(KafkaError)) pass else: logger.debug( 'The product info is none so could not send it kafka que')
def status_count(self, tenant_id): ''' This method is to retrieve the healthy status count of the products for each tenant ''' session = Session() logger.debug( 'Product health status count is being retreived for the tenant {0}' .format(tenant_id)) total_count = session.query( func.count('*')).filter(Product.tenant_id == tenant_id).filter( Product.archive != True).all() healthy_count = session.query( func.count('*')).filter(Product.tenant_id == tenant_id).filter( Product.health_status == 'Healthy').filter( Product.archive != True).all() unhealthy_count = session.query( func.count('*')).filter(Product.tenant_id == tenant_id).filter( Product.health_status == 'Unhealthy').filter( Product.archive != True).all() session.commit() product_count = { 'total': total_count[0][0], 'healthy': healthy_count[0][0], 'unhealthy': unhealthy_count[0][0] } logger.debug( 'product healthy status count for the tenant {0} is {1}'.format( tenant_id, json.dumps(product_count))) return product_count
def kafkaproducer_call(self, product_info): ''' This method is to pass the product info to the kafka que ''' logger.debug('The product {0} will be sent to kafka producer'.format( product_info['asin'])) self.kafka_producer.producer_call(product_info)
def row2dict(self, row): ''' This method is to conver a product instance to dict ''' product_dict = {} logger.debug( 'The product {0} instance is being converted to dict'.format( row.asin)) for column in row.__table__.columns: product_dict[column.name] = str(getattr(row, column.name)) return product_dict
def post(self): ''' This controller method is to update the alert for the given criteria and tenant ''' alert_payload = request.get_json(force=True) user_id = get_jwt_identity() tenant_id = get_jwt_claims()['tenant_id'] logger.debug( 'Post method to update the alert with paylod {0} for the user {1} and tenant {2} is invoked' .format(json.dumps(alert_payload), user_id, tenant_id)) self.alert_service.update(alert_payload, user_id, tenant_id) return results(status="success", message="Alert Updated Successfully!", data='', format_json=True)
def post(self): ''' This controller method is to retrieve the product given anysearch criteria As of now, it only retrieves the products for each tenant ''' tenant_id = get_jwt_claims()['tenant_id'] logger.debug( 'Post method is invoked to retrieve the products for the tenant {0}' .format(tenant_id)) product = self.product_service.search({}, tenant_id) return results(status="success", message="Fetched Products", data=product, format_json=True)
def delete(self): ''' This controller method is to delete a new product This needs jwt authentication and retrieves user id and tenant id from it It invokes delete method in the product service module ''' product_id = request.args.get("product_id") tenant_id = get_jwt_claims()['tenant_id'] logger.debug('Delete method is invoked to delete a product with product_id {0} and tenant_id {1}'.format( product_id, tenant_id)) self.product_service.delete(int(product_id), int(tenant_id)) return results(status="success", message="Deleted Product", data='', format_json=True)
def get(self): ''' This controller method is to retrieve an existing product This needs jwt authentication and retrieves user id and tenant id from it It invokes get method in the product service module ''' product_id = request.args.get("product_id") tenant_id = get_jwt_claims()['tenant_id'] logger.debug('Get method is invoked to retrieve a product with product_id {0} and tenant_id {1}'.format( product_id, tenant_id)) product = self.product_service.get(int(product_id), tenant_id) return results(status="success", message="Fetched Product", data=product, format_json=True)
def get(self): ''' This controller method is to retrieve the product health status count This requires jwt authentication ''' tenant_id = get_jwt_claims()['tenant_id'] logger.debug( 'Get method is invoked to get the products health status for the tenant {0}' .format(tenant_id)) feed = self.product_service.status_count(tenant_id) return results(status="success", message="Fetched the Status Count", data=feed, format_json=True)
def update(self, alert_payload, user_id, tenant_id): ''' This method is update the alert with the given info ''' session = Session() logger.debug('Updating the alerts') alert_payload['update_dt'] = datetime.utcnow() alert_payload['updated_by'] = int(user_id) alert = Alert(**alert_payload) session.query(Alert).filter(Alert.id == alert.id).filter( Alert.tenant_id == alert.tenant_id).update(alert_payload) session.commit() return 'Alert Status Updated'
def post(self): ''' This controller is to update the product info for a given user ''' product_payload = request.get_json(force=True) user_id = get_jwt_identity() tenant_id = get_jwt_claims()['tenant_id'] logger.debug( 'Post method is invoked to update the product{0} with user info{1}' .format(json.dumps(product_payload), json.dumps(tenant_id))) product = self.product_service.update(product_payload, user_id, tenant_id) return results(status="success", message="Updated Product", data=product, format_json=True)
def post(self): ''' This controller method is invoke scrapping for a product product ''' product_criteria = request.get_json(force=True) user_info = {'tenant_id': get_jwt_claims()['tenant_id']} logger.debug( 'Post method is invoked to scrape the product {0} with the user info {1}' .format(json.dumps(product_criteria), json.dumps(user_info))) criteria = {**product_criteria, **user_info} self.product_service.scrape_limited(criteria) return results(status="success", message="Scraping Initiated", data='', format_json=True)
def post(self): ''' This controller method is to add a new product This needs jwt authentication and retrieves user id and tenant id from it It invokes add method in the product service module ''' products = request.get_json(force=True) user_info = {'user_id': get_jwt_identity( ), 'tenant_id': get_jwt_claims()['tenant_id']} logger.debug('Post method is invoked to add a new product {0} by user {1}'.format( json.dumps('products'), json.dumps(user_info))) products = {**products, **user_info} products_info = self.product_service.add(products) return results(status="success", message="Added Products", data=products_info, format_json=True)
def daily_digest_alerts(self, product_id, tenant_id): ''' This method is to retrieve the alerts in the past 1 day for a given product and tenant ''' session = Session() alert_messages = [] logger.info( 'The alerts are being retreived for the product_id {0} and tenant_id {1}' .format(product_id, tenant_id)) alerts = session.query(Alert).filter( Alert.product_id == product_id).filter( Alert.tenant_id == tenant_id).filter( Alert.create_dt >= datetime.utcnow() - timedelta(days=1)).all() if alerts: for alert in alerts: logger.debug('The alert message is {0}'.format(alert.message)) diff_attributes = [] if alert.message: if alert.meta_data: attributes = json.loads(alert.meta_data) for attribute in attributes: updated_attribute = self.aletrnate_names_create( attribute) diff_attributes.append(updated_attribute) alert_info = alert.message + ' : ' + \ ', '.join(diff_attributes) alert_messages.append(alert_info) logger.info('alert_messages:{0}'.format(alert_messages)) if not alert_messages: return None return alert_messages
def start_scheduler(): ''' This method adds the jobs to the scheduler. It checks if the table exists and creates if it doesn't Need to drop the table if we need to add a new job currently schedules two jobs - scrapping for every one hour - daily digest every day at 5am ''' logger.debug('In scheduler start method') if 'apscheduler_jobs' not in engine.table_names(): scheduler.add_job(start_scraping, 'interval', hours=23) scheduler.add_job(start_daily_digest, trigger='cron', hour='09') try: scheduler.start() except: scheduler.shutdown()
def scrape_all(self): ''' This method is to send all the products for scrapping. All the products will be retrieved and sent to kafka que ''' session = Session() products = session.query(Product).filter(Product.archive != True).all() logger.debug( 'The products for scraping are retrived and being sent to kafka que' ) for product in products: product.product_info_status = 'Verification In Progress' session.commit() for product in products: logger.debug( 'The product {0} is being sent to kafka que for scraping'. format(product.asin)) self.kafkaproducer_call(self.row2dict(product))
def add(self, products): ''' This method is to add new product(s) in the db It checks if the products exists or archvied earlier. If archived, it sets the archive to false A few default attributes will be added Once the products are added, the product info will be sent to kafka que for further scraping ''' session = Session() product_list = [] ideal_state = {} added_products = [] existing_products = [] invalid_products = [] default_input = { 'health_status': 'Healthy', 'ideal_state': json.dumps(ideal_state), 'product_info_status': 'Pending Update', 'create_dt': datetime.utcnow(), 'created_by': products['user_id'], 'title': 'Hold Tight! We are Retrieving Product Info', 'archive': False, 'tenant_id': int(products['tenant_id']) } for product_info in products['products']: if len(product_info['asin'].strip()) == 10: default_input['uid'] = str(uuid.uuid1()) product_info['asin'] = product_info['asin'].strip() product_input = {**product_info, **default_input} product = Product(**product_input) instance = session.query(Product).filter( Product.asin == product.asin).filter( Product.tenant_id == int( products['tenant_id'])).one_or_none() archived_product = session.query(Product).filter( Product.asin == product.asin).filter( Product.tenant_id == int(products['tenant_id']) ).filter(Product.archive == True).one_or_none() if instance == None and archived_product == None: logger.debug( 'The product with asin {0} is being added'.format( product_info['asin'])) product_list.append(product) added_products.append({'asin': product_info['asin']}) elif archived_product != None: logger.debug( 'The product {0} is being retrieved from the archived state' .format(product_info['asin'])) archived_product.archive = False session.add(archived_product) else: logger.debug('The product {0} already exists'.format( product_info['asin'])) existing_products.append({ 'asin': product_info['asin'], 'message': 'This asin is already present so ignored' }) session.add_all(product_list) session.commit() else: logger.info('The given asin {0} is invalid'.format( product_info['asin'])) invalid_products.append({ 'asin': product_info['asin'], 'message': 'Invalid asin length' }) products_new = session.query(Product).filter( Product.product_info_status == 'Pending Update').filter( Product.archive != True).filter( Product.tenant_id == int(products['tenant_id'])).all() for instance in products_new: logger.info( 'The product {0} is added and being sent to kafka que for scraping' .format(instance.asin)) self.kafkaproducer_call(self.row2dict(instance)) # wait for 1-3 seconds for each product sleep(randint(1, 3)) status = { 'success': added_products, 'warning': existing_products, 'error': invalid_products } return status