def queue_start(request_id): try: container = UnpackHelpers.start_docker_container( container_name=UnpackHelpers. DOCKER_CONTAINER_NAMES['QUEUE_MANAGER'], request_id=request_id, ) connection = pika.BlockingConnection( pika.ConnectionParameters(os.environ['UNPACK_HOST'])) channel = connection.channel() broadcaster_queue_name = UnpackHelpers.get_queue_name( queue_type='broadcast', request_id=request_id) channel.queue_declare(queue=broadcaster_queue_name) channel.basic_publish( exchange='', routing_key=broadcaster_queue_name, body=json.dumps( { 'event_name': UnpackHelpers.EVENT_NAME['REQUEST:QUEUED'], 'data': {}, }, default=str), properties=pika.BasicProperties( delivery_mode=2, # make message persistent )) connection.close() return jsonify({'container': container.id}) except Exception as e: logger.exception(f'Error starting queue: {request_id}') abort(500)
def main(): UnpackHelpers.get_sql_pool() is_debug = os.environ['UNPACK_DEV_ENV'] == 'TRUE' socketio.run(app, host='0.0.0.0', port='5000', debug=is_debug, use_reloader=False)
def get_node_and_links_from_db(cls, node_uuid, min_update_date=None): raw_node_details = UnpackHelpers.fetch_node( node_uuid, min_update_date=min_update_date) raw_links = [] if raw_node_details is not None: raw_links = UnpackHelpers.fetch_links_by_source(node_uuid) return raw_node_details, raw_links
def __init__(self, ch, method, properties, body): request_id = UnpackHelpers.get_request_id_from_name(method.routing_key) queue_event_keys = UnpackHelpers.get_queue_event_keys(request_id) body = json.loads(body) event_name = body['event_name'] socketio.emit( queue_event_keys[event_name], body['data'], namespace=f'/{request_id}', )
def queue_stop(request_id): try: connection = pika.BlockingConnection( pika.ConnectionParameters(os.environ['UNPACK_HOST'])) channel = connection.channel() fetcher_queue_name = UnpackHelpers.get_queue_name( queue_type='fetch', request_id=request_id) broadcaster_queue_name = UnpackHelpers.get_queue_name( queue_type='broadcast', request_id=request_id) channel.queue_delete(queue=fetcher_queue_name) channel.queue_delete(queue=broadcaster_queue_name) connection.close() return jsonify({'success': True}) except Exception as e: logger.exception(f'Error stopping queue: {request_id}') abort(500)
def process_links(self, raw_links, type_cls): has_reached_max_depth = self.state['level'] + 1 > self.rules['max_link_depth'] if has_reached_max_depth: if type_cls.TYPE != 'twitter': return elif self.rules['twitter_use_max_link_depth']: return for raw_link in raw_links: link_type = raw_link.get('link_type') if has_reached_max_depth and link_type in ['media', 'link']: continue source_node_url = self.node_url source_node_uuid = self.node_uuid target_node_url = raw_link.get('target_node_url') target_node_uuid = raw_link.get('target_node_uuid') nofollow_link = raw_link.get('nofollow', False) if not target_node_uuid and target_node_url: target_node_uuid = UnpackHelpers.fetch_node_uuid(target_node_url) if not target_node_url and target_node_uuid: target_node_url = UnpackHelpers.fetch_node_url(target_node_uuid) if not target_node_uuid or not target_node_url: continue if not nofollow_link: self.store_link( source_node_url=source_node_url, source_node_uuid=source_node_uuid, target_node_url=target_node_url, target_node_uuid=target_node_uuid, raw_link=raw_link, ) self.queue_next_node( source_node_url=source_node_url, source_node_uuid=source_node_uuid, target_node_url=target_node_url, target_node_uuid=target_node_uuid, raw_link=raw_link )
def publish_child(self, **kwargs): queue_name = UnpackHelpers.get_queue_name('fetch', self.request_id) self.channel.basic_publish( exchange='', routing_key=queue_name, body=json.dumps(kwargs), properties=pika.BasicProperties( delivery_mode=2, # make message persistent ))
def store_link(self, source_node_url, source_node_uuid, target_node_url, target_node_uuid, raw_link): UnpackHelpers.store_link( source_node_uuid, target_node_uuid=target_node_uuid, link_type=raw_link.get('link_type'), weight=raw_link.get('weight'), ) self.publish_broadcast( event_name=UnpackHelpers.EVENT_NAME['LINK:COMPLETED'], source_node_url=source_node_url, source_node_uuid=source_node_uuid, target_node_url=target_node_url, target_node_uuid=target_node_uuid, level=self.state['level'] + 1, link_type=raw_link.get('link_type'), weight=raw_link.get('weight'), )
def walk_node_tree(self): if self.node_url is None: return type_cls, node_url_match = Fetcher.get_node_type_class_by_url(self.node_url) node_details, raw_links = type_cls.fetch( self.node_uuid, self.node_url, url_matches=node_url_match, rules=self.rules ) node_details['was_no_follow'] = self.nofollow_link has_links = len(raw_links) > 0 if not node_details.get('is_from_db', True): UnpackHelpers.store_node( self.node_uuid, node_type=node_details.get('node_type'), data=node_details.get('data'), is_error=node_details.get('is_error'), ) self.publish_broadcast( event_name=UnpackHelpers.EVENT_NAME['NODE:COMPLETED'], node_uuid=self.node_uuid, node_url=self.node_url, node_details=node_details, ) if self.nofollow_link: return if not has_links: UnpackHelpers.store_link(self.node_uuid) return self.process_links( raw_links=raw_links, type_cls=type_cls, )
def publish_broadcast(self, event_name, **kwargs): queue_name = UnpackHelpers.get_queue_name('broadcast', self.request_id) self.channel.basic_publish( exchange='', routing_key=queue_name, body=json.dumps({ 'event_name': event_name, 'data': kwargs, }, default=str), properties=pika.BasicProperties( delivery_mode=2, # make message persistent ))
def __init__(self, ch, method, properties, body): self.channel = ch self.request_id = UnpackHelpers.get_request_id_from_name(method.routing_key) body = json.loads(body) if body.get('node_uuid') is None: body['node_uuid'] = UnpackHelpers.fetch_node_uuid(body.get('node_url')) if body.get('node_url') is None: body['node_url'] = UnpackHelpers.fetch_node_url(body.get('node_uuid')) state = body.get('state', {}) self.state = {**self.DEFAULT_STATE, **state} rules = body.get('rules', {}) rules = {} if rules is None else rules self.rules = {**self.DEFAULT_RULES, **rules} self.node_url = body['node_url'] self.node_uuid = body['node_uuid'] self.node_url_hash = UnpackHelpers.get_url_hash(body['node_url']) self.source_node_uuid = body.get('source_node_uuid') self.origin_source_node_url = body.get('origin_source_node_url', self.node_url) self.origin_source_uuid = UnpackHelpers.fetch_node_uuid(self.origin_source_node_url) self.is_origin_node = self.source_node_uuid is None self.nofollow_link = body.get('nofollow_link', False) self.publish_broadcast( event_name=UnpackHelpers.EVENT_NAME['NODE:IN_PROGRESS'], node_uuid=self.node_uuid, node_url=self.node_url, ) self.walk_node_tree() ch.basic_ack(delivery_tag=method.delivery_tag)
def queue_create(): try: node_url = request.json.get('url') rules = request.json.get('rules') node_uuid = UnpackHelpers.fetch_node_uuid(node_url) request_id = UnpackHelpers.get_request_id(node_uuid=node_uuid) event_keys = UnpackHelpers.get_queue_event_keys(request_id) connection = pika.BlockingConnection( pika.ConnectionParameters(os.environ['UNPACK_HOST'])) channel = connection.channel() fetcher_queue_name = UnpackHelpers.get_queue_name( queue_type='fetch', request_id=request_id) channel.queue_declare(queue=fetcher_queue_name) channel.basic_publish( exchange='', routing_key=fetcher_queue_name, body=json.dumps({ 'node_url': node_url, 'rules': rules, }), properties=pika.BasicProperties( delivery_mode=2, # make message persistent )) connection.close() return jsonify({ 'node_url': node_url, 'node_uuid': node_uuid, 'request_id': request_id, 'event_keys': event_keys, }) except Exception: logger.exception('Error GET queue_create') abort(500)
def main(request_id): queue_name = UnpackHelpers.get_queue_name(queue_type='broadcast', request_id=request_id) connection_params = pika.ConnectionParameters( os.environ['UNPACK_HOST'], heartbeat=4 * 60, blocked_connection_timeout=2 * 60) connection = pika.BlockingConnection(connection_params) channel = connection.channel() channel.basic_consume(queue=queue_name, on_message_callback=handle_message_callback, auto_ack=True) try: logger.info( ' [*] Waiting for Broadcaster messages. To exit press CTRL+C or delete the queue' ) channel.start_consuming() except KeyboardInterrupt: channel.stop_consuming() connection.close() except pika.exceptions.ConnectionClosedByBroker: pass
def main(request_id): empty_since = None queue_ttl = 1 * 30 check_queue_rate = 5 containers = [] connection = pika.BlockingConnection( pika.ConnectionParameters(os.environ['UNPACK_HOST']) ) channel = connection.channel() fetcher_queue_name = UnpackHelpers.get_queue_name( queue_type='fetch', request_id=request_id ) broadcaster_queue_name = UnpackHelpers.get_queue_name( queue_type='broadcast', request_id=request_id ) fetcher_q = channel.queue_declare(queue=fetcher_queue_name) broadcaster_q = channel.queue_declare(queue=broadcaster_queue_name) publish_broadcast( channel=channel, queue_name=broadcaster_queue_name, event_name=UnpackHelpers.EVENT_NAME['REQUEST:IN_PROGRESS'] ) logger.info(f'Creating queues for id: {request_id}') # Workers to create broadcast_container = UnpackHelpers.start_docker_container( container_name=UnpackHelpers.DOCKER_CONTAINER_NAMES['QUEUE_BROADCAST_WORKER'], request_id=request_id, ) containers.append(broadcast_container) for i in range(10): fetcher_container = UnpackHelpers.start_docker_container( container_name=UnpackHelpers.DOCKER_CONTAINER_NAMES['QUEUE_FETCHER_WORKER'], request_id=request_id, ) containers.append(fetcher_container) logger.info(f'Created containers {containers} for id {request_id}') # As long as queue has data in it and the TTL has not been reached while True: if empty_since is not None and (time.time() - empty_since) >= queue_ttl: handleEmptyQueue( channel=channel, request_id=request_id, containers=containers, fetcher_queue_name=fetcher_queue_name, broadcaster_queue_name=broadcaster_queue_name, ) break empty_since = tick( channel=channel, fetcher_queue_name=fetcher_queue_name, broadcaster_queue_name=broadcaster_queue_name, empty_since=empty_since, check_queue_rate=check_queue_rate )