def parse_option(key, value, dummy, args): """Parse command line options""" if args: # There should be no standalone arguments for any refextract job # This will catch args before the job is shipped to Bibsched raise StandardError("Error: Unrecognised argument '%s'." % args[0]) if key in ('-a', '--new'): task_set_option('new', True) elif key in ('-m', '--modified'): task_set_option('modified', True) elif key == '--rebuild': task_set_option('rebuild', True) elif key in ('-c', '--collections'): collections = task_get_option('collections') if not collections: collections = set() task_set_option('collections', collections) collections.update(split_cli_ids_arg(value)) elif key in ('-r', '--recids'): recids = task_get_option('recids') if not recids: recids = set() task_set_option('recids', recids) recids.update(split_cli_ids_arg(value)) return True
def task_parse_options(key, val, *_): """ Must be defined for bibtask to create a task """ if key in ("--all", "-a"): task_set_option("reset_rules", set(val.split(","))) elif key in ("--enable-rules", "-e"): task_set_option("enabled_rules", set(val.split(","))) elif key in ("--id", "-i"): task_set_option("record_ids", intbitset(split_cli_ids_arg(val))) elif key in ("--queue", "-q"): task_set_option("queue", val) elif key in ("--no-tickets", "-t"): task_set_option("no_tickets", True) elif key in ("--ticket-creation-policy", "-p"): task_set_option("ticket_creation_policy", val) elif key in ("--no-upload", "-b"): task_set_option("no_upload", True) elif key in ("--dry-run", "-n"): task_set_option("no_upload", True) task_set_option("no_tickets", True) elif key in ("--config", "-c"): task_set_option("config", val) elif key in ("--notimechange", ): task_set_option("notimechange", True) else: raise StandardError("Error: Unrecognised argument '%s'." % key) return True
def cb_parse_option(key, value, opts, args): """Parse command line options""" if args: # There should be no standalone arguments raise StandardError("Error: Unrecognised argument '%s'." % args[0]) if key in ('-i', '--id'): recids = task_get_option('recids') if not recids: recids = set() task_set_option('recids', recids) recids.update(split_cli_ids_arg(value)) return True
def parse_option(key, value, opts, args): """ Elaborate task submission parameter. """ if args: # There should be no standalone arguments raise StandardError("Error: Unrecognised argument '%s'." % args[0]) if key in ('-i', '--id'): recids = task_get_task_param('recids') if not recids: recids = set() task_set_task_param('recids', recids) recids.update(split_cli_ids_arg(value)) elif key in ('-a', '--all'): task_set_task_param('all', True) return True
def task_run_core(): """Run the task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call. """ fmts = task_get_option('format', 'HB,RECJSON') for fmt in fmts.split(','): last_updated = fetch_last_updated(fmt) write_message("last stored run date is %s" % last_updated) recids = intbitset() if task_has_option("all"): recids += all_records() if task_has_option("last"): recids += outdated_caches(fmt, last_updated) if task_has_option('ignore_without'): without_fmt = intbitset() else: without_fmt = missing_caches(fmt) recids += without_fmt cli_recids = split_cli_ids_arg(task_get_option('recids', '')) recids += cli_recids query_params = { 'collection': task_get_option('collection', ''), 'field': task_get_option('field', ''), 'pattern': task_get_option('pattern', ''), 'matching': task_get_option('matching', '') } recids += query_records(query_params) bibreformat_task(fmt, recids, without_fmt, not task_has_option('noprocess')) return True
def task_run_core(): """Run the task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call. """ fmts = task_get_option('format', 'HB,RECJSON') for fmt in fmts.split(','): last_updated = fetch_last_updated(fmt) write_message("last stored run date is %s" % last_updated) recids = intbitset() if task_has_option("all"): recids += all_records() if task_has_option("last"): recids += outdated_caches(fmt, last_updated) if task_has_option('ignore_without'): without_fmt = intbitset() else: without_fmt = missing_caches(fmt) recids += without_fmt cli_recids = split_cli_ids_arg(task_get_option('recids', '')) recids += cli_recids query_params = {'collection': task_get_option('collection', ''), 'field': task_get_option('field', ''), 'pattern': task_get_option('pattern', ''), 'matching': task_get_option('matching', '')} recids += query_records(query_params) bibreformat_task(fmt, recids, without_fmt, not task_has_option('noprocess')) return True
def task_parse_options(key, val, *_): """ Must be defined for bibtask to create a task """ if key in ("--all", "-a"): for rule_name in val.split(","): reset_rule_last_run(rule_name) elif key in ("--enable-rules", "-e"): task_set_option("enabled_rules", set(val.split(","))) elif key in ("--id", "-i"): task_set_option("record_ids", intbitset(split_cli_ids_arg(val))) elif key in ("--queue", "-q"): task_set_option("queue", val) elif key in ("--no-tickets", "-t"): task_set_option("no_tickets", True) elif key in ("--no-upload", "-b"): task_set_option("no_upload", True) elif key in ("--dry-run", "-n"): task_set_option("no_upload", True) task_set_option("no_tickets", True) elif key in ("--config", "-c"): task_set_option("config", val) else: raise StandardError("Error: Unrecognised argument '%s'." % key) return True
def task_run_core(): """Runs the task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call.""" ## initialize parameters if task_get_option('format'): fmts = task_get_option('format') else: fmts = 'HB' # default value if no format option given for fmt in fmts.split(','): last_updated = fetch_last_updated(fmt) write_message("last stored run date is %s" % last_updated) sql = { "all" : """SELECT br.id FROM bibrec AS br, bibfmt AS bf WHERE bf.id_bibrec = br.id AND bf.format = '%s'""" % fmt, "last": """SELECT br.id FROM bibrec AS br INNER JOIN bibfmt AS bf ON bf.id_bibrec = br.id WHERE br.modification_date >= '%(last_updated)s' AND bf.format='%(format)s' AND bf.last_updated < br.modification_date""" \ % {'format': fmt, 'last_updated': last_updated.strftime('%Y-%m-%d %H:%M:%S')}, "missing" : """SELECT br.id FROM bibrec as br LEFT JOIN bibfmt as bf ON bf.id_bibrec = br.id AND bf.format ='%s' WHERE bf.id_bibrec IS NULL AND br.id BETWEEN %%s AND %%s """ % fmt, } sql_queries = [] cds_query = {} if task_has_option("all"): sql_queries.append(sql['all']) if task_has_option("last"): sql_queries.append(sql['last']) if task_has_option("collection"): cds_query['collection'] = task_get_option('collection') else: cds_query['collection'] = "" if task_has_option("field"): cds_query['field'] = task_get_option('field') else: cds_query['field'] = "" if task_has_option("pattern"): cds_query['pattern'] = task_get_option('pattern') else: cds_query['pattern'] = "" if task_has_option("matching"): cds_query['matching'] = task_get_option('matching') else: cds_query['matching'] = "" if task_has_option("recids"): recids = list(split_cli_ids_arg(task_get_option('recids'))) else: recids = [] ### sql commands to be executed during the script run ### bibreformat_task(fmt, sql, sql_queries, cds_query, task_has_option('without'), not task_has_option('noprocess'), recids) return True
def test_complex(self): self.assertEqual(split_cli_ids_arg("1-1,7,10-11,4"), set([1, 4, 7, 10, 11]))
def test_multiple(self): self.assertEqual(split_cli_ids_arg("1,5,7"), set([1, 5, 7]))
def test_range(self): self.assertEqual(split_cli_ids_arg("1-5"), set([1, 2, 3, 4, 5]))
def test_one(self): self.assertEqual(split_cli_ids_arg("1"), set([1]))
recid = get_record_from_doi(doi) except APSHarvesterSearchError, e: write_message("Error while getting recid from %s: %s" % (doi, str(e))) continue if not recid: # Record not found on the system, we harvest from APS write_message("No recid found, we get record from APS") recid = None final_record_list.append(APSRecord(recid, doi)) if len(recids) > 0: write_message("Parsing record IDs...") # We are doing rec ids recids = split_cli_ids_arg(recids) for recid in recids: final_record_list.append(APSRecord(recid)) if query: write_message("Performing a search query...") # We are doing a search query, rg=0 allows the return of all results. result = perform_request_search(p=query, cc=CFG_APSHARVEST_SEARCH_COLLECTION, of='id', rg=0, wl=0) for recid in result: final_record_list.append(APSRecord(recid))
recid = get_record_from_doi(doi) except APSHarvesterSearchError, e: write_message("Error while getting recid from %s: %s" % (doi, str(e))) continue if not recid: # Record not found on the system, we harvest from APS write_message("No recid found, we get record from APS") recid = None final_record_list.append(APSRecord(recid, doi)) if len(parameters.get("recids")) > 0: write_message("Parsing record IDs...") # We are doing rec ids recids = split_cli_ids_arg(parameters.get("recids")) for recid in recids: final_record_list.append(APSRecord(recid)) if parameters.get("query"): write_message("Performing a search query...") # We are doing a search query, rg=0 allows the return of all results. result = perform_request_search(p=parameters.get("query"), cc=CFG_APSHARVEST_SEARCH_COLLECTION, of='id', rg=0, wl=0) for recid in result: final_record_list.append(APSRecord(recid))
recid = get_record_from_doi(doi) except APSHarvesterSearchError, e: write_message("Error while getting recid from %s: %s" % (doi, str(e))) continue if not recid: # Record not found on the system, we harvest from APS write_message("No recid found, we get record from APS") recid = None final_record_list.append(APSRecord(recid, doi)) if len(recids) > 0: write_message("Parsing record IDs...") # We are doing rec ids recids = split_cli_ids_arg(recids) for recid in recids: final_record_list.append(APSRecord(recid)) if query: write_message("Performing a search query...") # We are doing a search query, rg=0 allows the return of all results. result = perform_request_search( p=query, cc=CFG_APSHARVEST_SEARCH_COLLECTION, of='id', rg=0, wl=0) for recid in result: final_record_list.append(APSRecord(recid))