def task_run_core(): """ Main daemon task. Returns True when run successfully. False otherwise. """ # Dictionary of "plugin_name" -> func tickets_to_apply = task_get_option('tickets') write_message("Ticket plugins found: %s" % (str(tickets_to_apply), ), verbose=9) task_update_progress("Loading records") records_concerned = get_recids_to_load() write_message("%i record(s) found" % (len(records_concerned), )) records_processed = 0 for record, last_date in load_records_from_id(records_concerned): records_processed += 1 recid = record_id_from_record(record) task_update_progress( "Processing records %s/%s (%i%%)" % (records_processed, len(records_concerned), int(float(records_processed) / len(records_concerned) * 100))) task_sleep_now_if_required(can_stop_too=True) for ticket_name, plugin in tickets_to_apply.items(): if plugin: write_message("Running template %s for %s" % (ticket_name, recid), verbose=5) try: ticket = BibCatalogTicket(recid=int(recid)) if plugin['check_record'](ticket, record): ticket = plugin['generate_ticket'](ticket, record) write_message("Ticket to be generated: %s" % (ticket, ), verbose=5) res = ticket.submit() if res: write_message("Ticket #%s created for %s" % (ticket.ticketid, recid)) else: write_message("Ticket already exists for %s" % (recid, )) else: write_message("Skipping record %s", (recid, )) except Exception, e: write_message("Error submitting ticket for record %s:" % (recid, )) write_message(traceback.format_exc()) raise e else: raise BibCatalogPluginException("Plugin not valid in %s" % (ticket_name, )) if last_date: store_last_updated(recid, last_date, name="bibcatalog")
def task_run_core(): """ Main daemon task. Returns True when run successfully. False otherwise. """ # Dictionary of "plugin_name" -> func tickets_to_apply = task_get_option('tickets') write_message("Ticket plugins found: %s" % (str(tickets_to_apply),), verbose=9) task_update_progress("Loading records") records_concerned = get_recids_to_load() write_message("%i record(s) found" % (len(records_concerned),)) records_processed = 0 for record, last_date in load_records_from_id(records_concerned): records_processed += 1 recid = record_id_from_record(record) task_update_progress("Processing records %s/%s (%i%%)" % (records_processed, len(records_concerned), int(float(records_processed) / len(records_concerned) * 100))) task_sleep_now_if_required(can_stop_too=True) for ticket_name, plugin in tickets_to_apply.items(): if plugin: write_message("Running template %s for %s" % (ticket_name, recid), verbose=5) try: ticket = BibCatalogTicket(recid=int(recid)) if plugin['check_record'](ticket, record): ticket = plugin['generate_ticket'](ticket, record) write_message("Ticket to be generated: %s" % (ticket,), verbose=5) res = ticket.submit() if res: write_message("Ticket #%s created for %s" % (ticket.ticketid, recid)) else: write_message("Ticket already exists for %s" % (recid,)) else: write_message("Skipping record %s", (recid,)) except Exception, e: write_message("Error submitting ticket for record %s:" % (recid,)) write_message(traceback.format_exc()) raise e else: raise BibCatalogPluginException("Plugin not valid in %s" % (ticket_name,)) if last_date: store_last_updated(recid, last_date, name="bibcatalog")
def task_run_core(name=NAME): """Entry point for the arxiv-pdf-checker task""" # First gather recids to process recids = task_get_option("recids") if recids: start_date = None recids = [(recid, None) for recid in recids] else: start_date = datetime.now() dummy, last_date = fetch_last_updated(name) recids = fetch_updated_arxiv_records(last_date) updated_recids = set() try: for count, (recid, dummy) in enumerate(recids): if count % 50 == 0: msg = "Done %s of %s" % (count, len(recids)) write_message(msg) task_update_progress(msg) # BibTask sleep task_sleep_now_if_required(can_stop_too=True) write_message("processing %s" % recid, verbose=9) try: if process_one(recid): updated_recids.add(recid) time.sleep(6) except AlreadyHarvested: write_message("already harvested successfully") time.sleep(6) except FoundExistingPdf: write_message("pdf already attached (matching md5)") time.sleep(6) except PdfNotAvailable: write_message("no pdf available") time.sleep(20) except InvenioFileDownloadError, e: write_message("failed to download: %s" % e) time.sleep(20) finally: # We want to process updated records even in case we are interrupted msg = "Updated %s records" % len(updated_recids) write_message(msg) task_update_progress(msg) write_message(repr(updated_recids)) # For all updated records, we want to sync the 8564 tags # and reextract references if updated_recids: submit_fixmarc_task(updated_recids) submit_refextract_task(updated_recids) # Store last run date of the daemon # not if it ran on specific recids from the command line with --id # but only if it ran on the modified records if start_date: store_last_updated(0, start_date, name) return True
def task_run_core(name=NAME): """Entry point for the arxiv-pdf-checker task""" # First gather recids to process recids = task_get_option('recids') if recids: start_date = None recids = [(recid, None) for recid in recids] else: start_date = datetime.now() dummy, last_date = fetch_last_updated(name) recids = fetch_updated_arxiv_records(last_date) updated_recids = set() try: for count, (recid, dummy) in enumerate(recids): if count % 50 == 0: msg = 'Done %s of %s' % (count, len(recids)) write_message(msg) task_update_progress(msg) # BibTask sleep task_sleep_now_if_required(can_stop_too=True) write_message('processing %s' % recid, verbose=9) try: if process_one(recid): updated_recids.add(recid) time.sleep(6) except AlreadyHarvested: write_message('already harvested successfully') time.sleep(6) except FoundExistingPdf: write_message('pdf already attached (matching md5)') time.sleep(6) except PdfNotAvailable: write_message("no pdf available") time.sleep(20) except InvenioFileDownloadError, e: write_message("failed to download: %s" % e) time.sleep(20) finally: # We want to process updated records even in case we are interrupted msg = 'Updated %s records' % len(updated_recids) write_message(msg) task_update_progress(msg) write_message(repr(updated_recids)) # For all updated records, we want to sync the 8564 tags # and reextract references if updated_recids: submit_fixmarc_task(updated_recids) submit_refextract_task(updated_recids) # Store last run date of the daemon # not if it ran on specific recids from the command line with --id # but only if it ran on the modified records if start_date: store_last_updated(0, start_date, name) return True