def parse_option(key, value, dummy, args): """Parse command line options""" if args: # There should be no standalone arguments for any refextract job # This will catch args before the job is shipped to Bibsched raise StandardError("Error: Unrecognised argument '%s'." % args[0]) if key in ('-a', '--new'): task_set_option('new', True) elif key in ('-m', '--modified'): task_set_option('modified', True) elif key == '--rebuild': task_set_option('rebuild', True) elif key in ('-c', '--collections'): collections = task_get_option('collections') if not collections: collections = set() task_set_option('collections', collections) collections.update(split_cli_ids_arg(value)) elif key in ('-r', '--recids'): recids = task_get_option('recids') if not recids: recids = set() task_set_option('recids', recids) recids.update(split_cli_ids_arg(value)) return True
def cb_parse_option(key, value, opts, args): """Parse command line options""" if args: # There should be no standalone arguments raise StandardError("Error: Unrecognised argument '%s'." % args[0]) if key in ('-i', '--id'): recids = task_get_option('recids') if not recids: recids = set() task_set_option('recids', recids) recids.update(split_cli_ids_arg(value)) return True
def cb_parse_option(key, value, opts, args): """Parse command line options""" if args: # There should be no standalone arguments raise StandardError("Error: Unrecognised argument '%s'." % args[0]) if key in ("-i", "--id"): recids = task_get_option("recids") if not recids: recids = set() task_set_option("recids", recids) recids.update(split_cli_ids_arg(value)) return True
def ids_from_input(ids_input, all_repr=ALL): """Return the list of IDs to check for from user-input. :param ids_input: Comma-separated list of requested record IDs. May contain, or be ALL. :type ids_input: str :returns: intbitset of IDs or ALL :rtype: seq :raises: ValueError """ if ALL in ids_input.split(','): return all_repr else: from invenio.utils.shell import split_cli_ids_arg return split_cli_ids_arg(ids_input)
def ids_from_input(ids_input, all_repr=ALL): """Return the list of IDs to check for from user-input. :param ids_input: Comma-separated list of requested record IDs. May contain, or be ALL. :type ids_input: str :returns: intbitset of IDs or ALL :rtype: seq :raises: ValueError """ if ALL in ids_input.split(','): return all_repr else: from invenio.utils.shell import split_cli_ids_arg return intbitset(split_cli_ids_arg(ids_input), sanity_checks=True)
def task_run_core(): """Run the task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call. """ fmts = task_get_option('format', 'HB,RECJSON') for fmt in fmts.split(','): last_updated = fetch_last_updated(fmt) write_message("last stored run date is %s" % last_updated) recids = intbitset() if task_has_option("all"): recids += all_records() if task_has_option("last"): recids += outdated_caches(fmt, last_updated) if task_has_option('ignore_without'): without_fmt = intbitset() else: without_fmt = missing_caches(fmt) recids += without_fmt cli_recids = split_cli_ids_arg(task_get_option('recids', '')) recids += cli_recids query_params = { 'collection': task_get_option('collection', ''), 'field': task_get_option('field', ''), 'pattern': task_get_option('pattern', ''), 'matching': task_get_option('matching', '') } recids += query_records(query_params) bibreformat_task(fmt, recids, without_fmt, not task_has_option('noprocess')) return True
def task_run_core(): """Run the task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call. """ fmts = task_get_option('format', 'HB,RECJSON') for fmt in fmts.split(','): last_updated = fetch_last_updated(fmt) write_message("last stored run date is %s" % last_updated) recids = intbitset() if task_has_option("all"): recids += all_records() if task_has_option("last"): recids += outdated_caches(fmt, last_updated) if task_has_option('ignore_without'): without_fmt = intbitset() else: without_fmt = missing_caches(fmt) recids += without_fmt cli_recids = split_cli_ids_arg(task_get_option('recids', '')) recids += cli_recids query_params = {'collection': task_get_option('collection', ''), 'field': task_get_option('field', ''), 'pattern': task_get_option('pattern', ''), 'matching': task_get_option('matching', '')} recids += query_records(query_params) bibreformat_task(fmt, recids, without_fmt, not task_has_option('noprocess')) return True
def task_parse_options(key, val, *_): """ Must be defined for bibtask to create a task """ if key in ("--all", "-a"): for rule_name in val.split(","): reset_rule_last_run(rule_name) elif key in ("--enable-rules", "-e"): task_set_option("enabled_rules", set(val.split(","))) elif key in ("--id", "-i"): task_set_option("record_ids", intbitset(split_cli_ids_arg(val))) elif key in ("--queue", "-q"): task_set_option("queue", val) elif key in ("--no-tickets", "-t"): task_set_option("no_tickets", True) elif key in ("--no-upload", "-b"): task_set_option("no_upload", True) elif key in ("--dry-run", "-n"): task_set_option("no_upload", True) task_set_option("no_tickets", True) elif key in ("--config", "-c"): task_set_option("config", val) else: raise StandardError("Error: Unrecognised argument '%s'." % key) return True
def test_complex(self): self.assertEqual(split_cli_ids_arg("1-1,7,10-11,4"), set([1, 4, 7, 10, 11]))
def test_multiple(self): self.assertEqual(split_cli_ids_arg("1,5,7"), set([1, 5, 7]))
def test_range(self): self.assertEqual(split_cli_ids_arg("1-5"), set([1, 2, 3, 4, 5]))
def test_one(self): self.assertEqual(split_cli_ids_arg("1"), set([1]))
def task_run_core(): """Runs the task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call.""" ## initialize parameters if task_get_option('format'): fmts = task_get_option('format') else: fmts = 'HB' # default value if no format option given for fmt in fmts.split(','): last_updated = fetch_last_updated(fmt) write_message("last stored run date is %s" % last_updated) sql = { "all" : """SELECT br.id FROM bibrec AS br, bibfmt AS bf WHERE bf.id_bibrec = br.id AND bf.format = '%s'""" % fmt, "last": """SELECT br.id FROM bibrec AS br INNER JOIN bibfmt AS bf ON bf.id_bibrec = br.id WHERE br.modification_date >= '%(last_updated)s' AND bf.format='%(format)s' AND bf.last_updated < br.modification_date""" \ % {'format': fmt, 'last_updated': last_updated.strftime('%Y-%m-%d %H:%M:%S')}, "missing" : """SELECT br.id FROM bibrec as br LEFT JOIN bibfmt as bf ON bf.id_bibrec = br.id AND bf.format ='%s' WHERE bf.id_bibrec IS NULL AND br.id BETWEEN %%s AND %%s """ % fmt, } sql_queries = [] cds_query = {} if task_has_option("all"): sql_queries.append(sql['all']) if task_has_option("last"): sql_queries.append(sql['last']) if task_has_option("collection"): cds_query['collection'] = task_get_option('collection') else: cds_query['collection'] = "" if task_has_option("field"): cds_query['field'] = task_get_option('field') else: cds_query['field'] = "" if task_has_option("pattern"): cds_query['pattern'] = task_get_option('pattern') else: cds_query['pattern'] = "" if task_has_option("matching"): cds_query['matching'] = task_get_option('matching') else: cds_query['matching'] = "" if task_has_option("recids"): recids = list(split_cli_ids_arg(task_get_option('recids'))) else: recids = [] ### sql commands to be executed during the script run ### bibreformat_task(fmt, sql, sql_queries, cds_query, task_has_option('without'), not task_has_option('noprocess'), recids) return True