def task_index_document(self, document_id): try: rebuild_lock = Lock.acquire_lock( 'document_indexing_task_do_rebuild_all_indexes') except LockError as exception: # A rebuild is happening, retry later raise self.retry(exc=exception, countdown=RETRY_DELAY) else: try: lock = Lock.acquire_lock( 'document_indexing_task_update_index_document_%d' % document_id) except LockError as exception: # This document is being reindexed by another task, retry later raise self.retry(exc=exception, countdown=RETRY_DELAY) else: try: document = Document.objects.get(pk=document_id) except Document.DoesNotExist: # Document was deleted before we could execute, abort about updating pass else: index_document(document) finally: lock.release() finally: rebuild_lock.release()
def task_do_rebuild_all_indexes(self): if Lock.filter(name__startswith='document_indexing_task_update_index_document'): # A document index update is happening, wait raise self.retry(countdown=RETRY_DELAY) try: lock = Lock.acquire_lock('document_indexing_task_do_rebuild_all_indexes') except LockError as exception: # Another rebuild is happening, retry later raise self.retry(exc=exception, countdown=RETRY_DELAY) else: try: do_rebuild_all_indexes() finally: lock.release()
def task_process_queue_document(queue_document_id): lock_id = u'task_proc_queue_doc-%d' % queue_document_id try: logger.debug('trying to acquire lock: %s' % lock_id) lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE) logger.debug('acquired lock: %s' % lock_id) queue_document = QueueDocument.objects.get(pk=queue_document_id) queue_document.state = QUEUEDOCUMENT_STATE_PROCESSING queue_document.node_name = platform.node() queue_document.save() try: do_document_ocr(queue_document) queue_document.delete() except Exception as exception: queue_document.state = QUEUEDOCUMENT_STATE_ERROR if settings.DEBUG: result = [] type, value, tb = sys.exc_info() result.append('%s: %s' % (type.__name__, value)) result.extend(traceback.format_tb(tb)) queue_document.result = '\n'.join(result) else: queue_document.result = exception queue_document.save() lock.release() except LockError: logger.debug('unable to obtain lock') pass
def submit(self): try: lock = Lock.acquire_lock('upload_stats') except LockError: pass else: self.set_properties() try: requests.post(FORM_SUBMIT_URL, data={ 'formkey': FORM_KEY, FORM_RECEIVER_FIELD: Property.get_reportable(as_json=True) }, timeout=TIMEOUT) except (requests.exceptions.Timeout, requests.exceptions.ConnectionError): pass else: self.is_first_run = False self.save() finally: lock.release()
def heartbeat_check(): ''' Find the node with the oldest hearbeat timestamp and query it ''' logging.debug('DEBUG: heartbeat_check()') siblings = Sibling.objects.filter().order_by('last_heartbeat') if siblings: oldest = siblings[0] try: lock = Lock.acquire_lock(u''.join(['heartbeat_check', oldest.uuid]), 20) node = RemoteCall(uuid=oldest.uuid) oldest.last_heartbeat = datetime.datetime.now() response = node.heartbeat() oldest.cpuload = int(float(response['cpuload'])) oldest.status = NODE_STATUS_UP oldest.failure_count = 0 oldest.save() lock.release() except LockError: pass except HeartbeatError: oldest.status = NODE_STATUS_DOWN oldest.failure_count += 1 oldest.save() if oldest.failure_count > HEARTBEAT_FAILURE_THRESHOLD: oldest.delete() lock.release()
def task_do_rebuild_all_indexes(self): if Lock.check_existing(name__startswith='document_indexing_task_update_index_document'): # A document index update is happening, wait raise self.retry() try: lock = Lock.acquire_lock( 'document_indexing_task_do_rebuild_all_indexes' ) except LockError as exception: # Another rebuild is happening, retry later raise self.retry(exc=exception) else: try: IndexInstanceNode.objects.rebuild_all_indexes() finally: lock.release()
def task_do_rebuild_all_indexes(self): if Lock.filter( name__startswith='document_indexing_task_update_index_document'): # A document index update is happening, wait raise self.retry(countdown=RETRY_DELAY) try: lock = Lock.acquire_lock( 'document_indexing_task_do_rebuild_all_indexes') except LockError as exception: # Another rebuild is happening, retry later raise self.retry(exc=exception, countdown=RETRY_DELAY) else: try: do_rebuild_all_indexes() finally: lock.release()
def task_do_rebuild_all_indexes(self): if Lock.check_existing( name__startswith='document_indexing_task_update_index_document'): # A document index update is happening, wait raise self.retry() try: lock = Lock.acquire_lock( 'document_indexing_task_do_rebuild_all_indexes') except LockError as exception: # Another rebuild is happening, retry later raise self.retry(exc=exception) else: try: IndexInstanceNode.objects.rebuild_all_indexes() finally: lock.release()
def task_check_expired_check_outs(): logger.debug("executing...") lock_id = "task_expired_check_outs" try: logger.debug("trying to acquire lock: %s", lock_id) lock = Lock.acquire_lock(name=lock_id, timeout=CHECKOUT_EXPIRATION_LOCK_EXPIRE) logger.debug("acquired lock: %s", lock_id) DocumentCheckout.objects.check_in_expired_check_outs() lock.release() except LockError: logger.debug("unable to obtain lock")
def task_delete_empty_index_nodes(self): try: rebuild_lock = Lock.acquire_lock('document_indexing_task_do_rebuild_all_indexes') except LockError as exception: # A rebuild is happening, retry later raise self.retry(exc=exception, countdown=RETRY_DELAY) else: try: delete_empty_index_nodes() finally: rebuild_lock.release()
def submit(self): try: lock = Lock.acquire_lock('upload_stats') except LockError: pass else: try: dictionary = {} if self.is_lsb: dictionary.update( { 'is_lsb': unicode(self.is_lsb), 'distributor_id': unicode(self.distributor_id), 'description': unicode(self.description), 'release': unicode(self.release), 'codename': unicode(self.codename), 'sysinfo': unicode(self.sysinfo), } ) dictionary.update( { 'uuid': self.uuid, 'architecture': unicode(self.architecture), 'python_version': unicode(self.python_version), 'platform': unicode(self.platform), 'machine': unicode(self.machine), 'processor': unicode(self.processor), 'cpus': unicode(self.cpus), 'total_phymem': unicode(self.total_phymem), 'mayan_version': unicode(self.mayan_version), 'fabfile': unicode(self.fabfile), } ) if self.is_git_repo: dictionary.update( { 'repo_remotes': unicode(self.repo_remotes), 'repo_remotes_urls': unicode(self.repo_remotes_urls), 'repo_head_reference': unicode(self.repo_head_reference), 'headcommit_hexsha': unicode(self.headcommit_hexsha), 'headcommit_authored_date': unicode(self.headcommit_authored_date), 'headcommit_committed_date': unicode(self.headcommit_committed_date), 'headcommit_message': unicode(self.headcommit_message), } ) requests.post(FORM_SUBMIT_URL, data={'formkey': FORM_KEY, FORM_RECEIVER_FIELD: dumps(dictionary)}, timeout=TIMEOUT) except (requests.exceptions.Timeout, requests.exceptions.ConnectionError): pass else: self.is_first_run = False self.save() finally: lock.release()
def task_delete_empty_index_nodes(self): try: rebuild_lock = Lock.acquire_lock( 'document_indexing_task_do_rebuild_all_indexes') except LockError as exception: # A rebuild is happening, retry later raise self.retry(exc=exception, countdown=RETRY_DELAY) else: try: delete_empty_index_nodes() finally: rebuild_lock.release()
def task_check_expired_check_outs(): logger.debug('executing...') lock_id = 'task_expired_check_outs' try: logger.debug('trying to acquire lock: %s', lock_id) lock = Lock.acquire_lock(name=lock_id, timeout=CHECKOUT_EXPIRATION_LOCK_EXPIRE) logger.debug('acquired lock: %s', lock_id) DocumentCheckout.objects.check_in_expired_check_outs() lock.release() except LockError: logger.debug('unable to obtain lock')
def task_delete_empty_index_nodes(self): try: rebuild_lock = Lock.acquire_lock( 'document_indexing_task_do_rebuild_all_indexes') except LockError as exception: # A rebuild is happening, retry later raise self.retry(exc=exception) else: try: IndexInstanceNode.objects.delete_empty_index_nodes() finally: rebuild_lock.release()
def task_check_expired_check_outs(): logger.debug('executing...') lock_id = u'task_expired_check_outs' try: logger.debug('trying to acquire lock: %s' % lock_id) lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE) logger.debug('acquired lock: %s' % lock_id) DocumentCheckout.objects.check_in_expired_check_outs() lock.release() except LockError: logger.debug('unable to obtain lock') pass
def task_delete_empty_index_nodes(self): try: rebuild_lock = Lock.acquire_lock( 'document_indexing_task_do_rebuild_all_indexes' ) except LockError as exception: # A rebuild is happening, retry later raise self.retry(exc=exception) else: try: IndexInstanceNode.objects.delete_empty_index_nodes() finally: rebuild_lock.release()
def task_index_document(self, document_id): try: rebuild_lock = Lock.acquire_lock( 'document_indexing_task_do_rebuild_all_indexes' ) except LockError as exception: # A rebuild is happening, retry later raise self.retry(exc=exception) else: try: lock = Lock.acquire_lock( 'document_indexing_task_update_index_document_%d' % document_id ) except LockError as exception: # This document is being reindexed by another task, retry later raise self.retry(exc=exception) else: try: document = Document.objects.get(pk=document_id) except Document.DoesNotExist: # Document was deleted before we could execute, abort about # updating pass else: try: IndexInstanceNode.objects.index_document(document) except OperationalError as exception: logger.warning( 'Operational error while trying to index document: ' '%s; %s', document, exception ) lock.release() raise self.retry(exc=exception) else: lock.release() finally: lock.release() finally: rebuild_lock.release()
def task_index_document(self, document_id): try: rebuild_lock = Lock.acquire_lock( 'document_indexing_task_do_rebuild_all_indexes') except LockError as exception: # A rebuild is happening, retry later raise self.retry(exc=exception) else: try: lock = Lock.acquire_lock( 'document_indexing_task_update_index_document_%d' % document_id) except LockError as exception: # This document is being reindexed by another task, retry later raise self.retry(exc=exception) else: try: document = Document.objects.get(pk=document_id) except Document.DoesNotExist: # Document was deleted before we could execute, abort about # updating pass else: try: IndexInstanceNode.objects.index_document(document) except OperationalError as exception: logger.warning( 'Operational error while trying to index document: ' '%s; %s', document, exception) lock.release() raise self.retry(exc=exception) else: lock.release() finally: lock.release() finally: rebuild_lock.release()
def task_index_document(self, document_id): try: rebuild_lock = Lock.acquire_lock('document_indexing_task_do_rebuild_all_indexes') except LockError as exception: # A rebuild is happening, retry later raise self.retry(exc=exception, countdown=RETRY_DELAY) else: try: lock = Lock.acquire_lock('document_indexing_task_update_index_document_%d' % document_id) except LockError as exception: # This document is being reindexed by another task, retry later raise self.retry(exc=exception, countdown=RETRY_DELAY) else: try: document = Document.objects.get(pk=document_id) except Document.DoesNotExist: # Document was deleted before we could execute, abort about updating pass else: index_document(document) finally: lock.release() finally: rebuild_lock.release()
def submit(self): try: lock = Lock.acquire_lock('upload_registration') except LockError: pass else: try: requests.post(FORM_SUBMIT_URL, data={'formkey': FORM_KEY, FORM_RECEIVER_FIELD: self.registration_data}, timeout=TIMEOUT) except (requests.exceptions.Timeout, requests.exceptions.ConnectionError): pass else: self.registered = True self.save() finally: lock.release()
def check_file(self): try: lock_id = u'check_file-%d' % self.pk logger.debug('trying to acquire lock: %s' % lock_id) lock = Lock.acquire_lock(lock_id, 60) logger.debug('acquired lock: %s' % lock_id) try: self._check_file() except Exception as exception: logger.debug('unhandled exception: %s' % exception) raise finally: lock.release() except LockError: logger.debug('unable to obtain lock') pass
def task_process_queue_document(queue_document_id): lock_id = u'task_proc_queue_doc-%d' % queue_document_id try: lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE) queue_document = QueueDocument.objects.get(pk=queue_document_id) queue_document.state = QUEUEDOCUMENT_STATE_PROCESSING queue_document.node_name = platform.node() queue_document.save() try: do_document_ocr(queue_document) queue_document.delete() except Exception, e: queue_document.state = QUEUEDOCUMENT_STATE_ERROR queue_document.result = e queue_document.save() lock.release()
def submit(self): try: lock = Lock.acquire_lock('upload_stats') except LockError: pass else: self.set_properties() try: requests.post(FORM_SUBMIT_URL, data={'formkey': FORM_KEY, FORM_RECEIVER_FIELD: Property.get_reportable(as_json=True)}, timeout=TIMEOUT) except (requests.exceptions.Timeout, requests.exceptions.ConnectionError): pass else: self.is_first_run = False self.save() finally: lock.release()
def task_do_ocr(self, document_version_pk): lock_id = "task_do_ocr_doc_version-%d" % document_version_pk try: logger.debug("trying to acquire lock: %s", lock_id) # Acquire lock to avoid doing OCR on the same document version more than # once concurrently lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE) logger.debug("acquired lock: %s", lock_id) document_version = None try: document_version = DocumentVersion.objects.get(pk=document_version_pk) logger.info("Starting document OCR for document version: %s", document_version) TextExtractor.process_document_version(document_version) except OperationalError as exception: logger.warning("OCR error for document version: %d; %s. Retrying.", document_version_pk, exception) raise self.retry(exc=exception) except Exception as exception: logger.error("OCR error for document version: %d; %s", document_version_pk, exception) if document_version: entry, created = DocumentVersionOCRError.objects.get_or_create(document_version=document_version) if settings.DEBUG: result = [] type, value, tb = sys.exc_info() result.append("%s: %s" % (type.__name__, value)) result.extend(traceback.format_tb(tb)) entry.result = "\n".join(result) else: entry.result = exception entry.save() else: logger.info("OCR complete for document version: %s", document_version) try: entry = DocumentVersionOCRError.objects.get(document_version=document_version) except DocumentVersionOCRError.DoesNotExist: pass else: entry.delete() post_document_version_ocr.send(sender=self, instance=document_version) finally: lock.release() except LockError: logger.debug("unable to obtain lock: %s" % lock_id)
def task_process_queue_document(queue_document_id): lock_id = u'task_proc_queue_doc-%d' % queue_document_id try: logger.debug('trying to acquire lock: %s' % lock_id) lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE) logger.debug('acquired lock: %s' % lock_id) queue_document = QueueDocument.objects.get(pk=queue_document_id) queue_document.state = QUEUEDOCUMENT_STATE_PROCESSING queue_document.node_name = platform.node() queue_document.save() try: do_document_ocr(queue_document) queue_document.delete() except Exception, e: queue_document.state = QUEUEDOCUMENT_STATE_ERROR queue_document.result = e queue_document.save() lock.release()
def check_source_data(self): logger.info('Checking for new data for source: %s' % self.slug) try: lock_id = u'check_source_data-%d' % self.pk logger.debug('trying to acquire lock: %s' % lock_id) lock = Lock.acquire_lock(lock_id, 60) logger.debug('acquired lock: %s' % lock_id) try: self.check_origin_data() except Exception as exception: logger.debug('unhandled exception: %s' % exception) logger.error('Error when checking data for source: %s; %s' % (self.slug, exception)) raise finally: lock.release() except LockError: logger.debug('unable to obtain lock') logger.info('Unable to obtain lock to check for new data for source: %s' % self.slug) pass
def submit(self): try: lock = Lock.acquire_lock('upload_registration') except LockError: pass else: try: requests.post(FORM_SUBMIT_URL, data={ 'formkey': FORM_KEY, FORM_RECEIVER_FIELD: self.registration_data }, timeout=TIMEOUT) except Exception: raise else: self.registered = True self.save() finally: lock.release()
def submit(self): try: lock = Lock.acquire_lock('upload_stats') except LockError: pass else: try: dictionary = {} if self.is_lsb: dictionary.update( { 'is_lsb': unicode(self.is_lsb), 'distributor_id': unicode(self.distributor_id), 'description': unicode(self.description), 'release': unicode(self.release), 'codename': unicode(self.codename), 'sysinfo': unicode(self.sysinfo), } ) dictionary.update( { 'uuid': self.uuid, 'architecture': unicode(self.architecture), 'python_version': unicode(self.python_version), 'platform': unicode(self.platform), 'machine': unicode(self.machine), 'processor': unicode(self.processor), 'cpus': unicode(self.cpus), 'total_phymem': unicode(self.total_phymem), } ) requests.post(FORM_SUBMIT_URL, data={'formkey': FORM_KEY, FORM_RECEIVER_FIELD: dumps(dictionary)}, timeout=TIMEOUT) except Exception: pass else: self.is_first_run = False self.save() finally: lock.release()
def task_do_ocr(document_version_pk): lock_id = 'task_do_ocr_doc_version-%d' % document_version_pk try: logger.debug('trying to acquire lock: %s', lock_id) # Acquire lock to avoid doing OCR on the same document version more than # once concurrently lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE) logger.debug('acquired lock: %s', lock_id) document_version = None try: logger.info('Starting document OCR for document version: %d', document_version_pk) document_version = DocumentVersion.objects.get(pk=document_version_pk) do_document_ocr(document_version) except Exception as exception: logger.error('OCR error for document version: %d; %s', document_version_pk, exception) if document_version: entry, created = DocumentVersionOCRError.objects.get_or_create(document_version=document_version) if settings.DEBUG: result = [] type, value, tb = sys.exc_info() result.append('%s: %s' % (type.__name__, value)) result.extend(traceback.format_tb(tb)) entry.result = '\n'.join(result) else: entry.result = exception entry.save() else: logger.info('OCR for document: %d ended', document_version_pk) try: entry = DocumentVersionOCRError.objects.get(document_version=document_version) except DocumentVersionOCRError.DoesNotExist: pass else: entry.delete() finally: lock.release() except LockError: logger.debug('unable to obtain lock: %s' % lock_id) pass
def inventory_hash_check(): ''' Find the node with the oldest inventory timestamp and query it ''' logging.debug('DEBUG: inventory_hash_check()') siblings = Sibling.objects.filter(status=NODE_STATUS_UP).order_by('last_inventory_hash') if siblings: oldest = siblings[0] try: lock = Lock.acquire_lock(u''.join(['inventory_hash', oldest.uuid]), 20) oldest.last_inventory_hash = datetime.datetime.now() remote_api = RemoteCall(uuid=oldest.uuid) response = remote_api.inventory_hash() if oldest.inventory_hash != response['inventory_hash']: # Delete this holder from all it's resources to catch # later the ones it doesn't have anymore ResourceHolder.objects.filter(node__uuid=oldest.uuid).delete() for resource_item in remote_api.resource_list(): uuid, timestamp=resource_item['uuid'].split(TIMESTAMP_SEPARATOR) resource, created = NetworkResourceVersion.objects.get_or_create( uuid=uuid, timestamp=timestamp, defaults={ 'metadata': dumps(resource_item.get('metadata')), 'signature_properties': dumps(resource_item.get('signature_properties')), } ) resource.resourceholder_set.get_or_create(node=oldest) oldest.inventory_hash = response['inventory_hash'] oldest.save() # Delete network resources that have no holder NetworkResourceVersion.objects.filter(resourceholder=None).delete() lock.release() except LockError: pass except InventoryHashError: lock.release()
def check_source_data(self): logger.info('Checking for new data for source: %s' % self.slug) try: lock_id = u'check_source_data-%d' % self.pk logger.debug('trying to acquire lock: %s' % lock_id) lock = Lock.acquire_lock(lock_id, 60) logger.debug('acquired lock: %s' % lock_id) try: self.check_origin_data() pass except Exception as exception: logger.debug('unhandled exception: %s' % exception) logger.error('Error when checking data for source: %s; %s' % (self.slug, exception)) raise finally: lock.release() except LockError: logger.debug('unable to obtain lock') logger.info( 'Unable to obtain lock to check for new data for source: %s' % self.slug) pass