예제 #1
0
파일: task.py 프로젝트: chokribr/invenio-1
def get_recids_for_rules(rules):
    """
    Generates the final list of record IDs to load.

    @param rules dict of rules {rule_name: rule_dict}
    @type rules: dict of rules

    @return dict {rule_name: array of record IDs}
    """
    override_record_ids = task_get_option("record_ids")
    recids = {}
    for rule_name, rule in rules.iteritems():
        if "filter_pattern" in rule:
            query = rule["filter_pattern"]
            if "filter_collection" in rule:
                collections = rule["filter_collection"].split()
            else:
                collections = None
            write_message("Performing given search query: '%s'" % query)
            if collections:
                result = perform_request_search(p=query,
                                                of='intbitset',
                                                wl=rule.get('filter_limit', 0),
                                                f=rule.get(
                                                    'filter_field', None),
                                                c=collections)
            else:
                result = search_pattern(
                    p=query,
                    wl=rule.get('filter_limit', 0),
                    f=rule.get('filter_field', None),
                )
        else:
            result = intbitset(trailing_bits=True)

        if override_record_ids is not None:
            result.intersection_update(override_record_ids)
        else:
            last_run = get_rule_lastrun(rule_name)
            modified_recids = get_modified_records_since(last_run)
            if not "consider_deleted_records" in rule:
                modified_recids -= search_unit_in_bibxxx(p='DELETED',
                                                         f='980__%',
                                                         type='e')
                if CFG_CERN_SITE:
                    modified_recids -= search_unit_in_bibxxx(p='DUMMY',
                                                             f='980__%',
                                                             type='e')
            result.intersection_update(modified_recids)
        recids[rule_name] = result

    return recids
예제 #2
0
파일: task.py 프로젝트: SCOAP3/invenio
def get_recids_for_rules(rules):
    """
    Generates the final list of record IDs to load.

    @param rules dict of rules {rule_name: rule_dict}
    @type rules: dict of rules

    @return dict {rule_name: array of record IDs}
    """
    override_record_ids = task_get_option("record_ids")
    recids = {}
    for rule_name, rule in rules.iteritems():
        if "filter_pattern" in rule:
            query = rule["filter_pattern"]
            if "filter_collection" in rule:
                collections = rule["filter_collection"].split()
            else:
                collections = None
            write_message("Performing given search query: '%s'" % query)
            if collections:
                result = perform_request_search(
                    p=query,
                    of='intbitset',
                    wl=rule.get('filter_limit', 0),
                    f=rule.get('filter_field', None),
                    c=collections
                )
            else:
                result = search_pattern(
                    p=query,
                    wl=rule.get('filter_limit', 0),
                    f=rule.get('filter_field', None),
                )
        else:
            result = intbitset(trailing_bits=True)

        if override_record_ids is not None:
            result.intersection_update(override_record_ids)
        else:
            last_run = get_rule_lastrun(rule_name)
            modified_recids = get_modified_records_since(last_run)
            if not "consider_deleted_records" in rule:
                modified_recids -= search_unit_in_bibxxx(p='DELETED', f='980__%', m='e')
                if CFG_CERN_SITE:
                    modified_recids -= search_unit_in_bibxxx(p='DUMMY', f='980__%', m='e')
            result.intersection_update(modified_recids)
        recids[rule_name] = result

    return recids
예제 #3
0
파일: daemon.py 프로젝트: dset0x/invenio
def _get_recids_foreach_ontology(recids=None, collections=None, taxonomy=None):
    """Returns an array containing hash objects containing the
    collection, its corresponding ontology and the records belonging to
    the given collection."""
    rec_onts = []

    # User specified record IDs.
    if recids:
        rec_onts.append({
            'ontology': taxonomy,
            'collection': None,
            'recIDs': recids,
        })
        return rec_onts

    # User specified collections.
    if collections:
        for collection in collections:
            records = get_collection_reclist(collection)
            if records:
                rec_onts.append({
                    'ontology': taxonomy,
                    'collection': collection,
                    'recIDs': records
                })
        return rec_onts

    # Use rules found in collection_clsMETHOD.
    result = run_sql("""SELECT "clsMETHOD".name, "clsMETHOD".last_updated, """
                     """collection.name FROM "clsMETHOD" JOIN "collection_clsMETHOD" ON """
                     """clsMETHOD".id="id_clsMETHOD" JOIN collection ON """
                     "id_collection=collection.id")

    for ontology, date_last_run, collection in result:
        records = get_collection_reclist(collection)
        if records:
            if not date_last_run:
                bibtask.write_message(
                    "INFO: Collection %s has not been previously "
                    "analyzed." % collection, stream=sys.stderr, verbose=3)
                modified_records = intbitset(run_sql("SELECT id FROM bibrec"))
            elif bibtask.task_get_option('force'):
                bibtask.write_message(
                    "INFO: Analysis is forced for collection %s." %
                    collection, stream=sys.stderr, verbose=3)
                modified_records = intbitset(run_sql("SELECT id FROM bibrec"))
            else:
                modified_records = bibtask.get_modified_records_since(date_last_run)

            records &= modified_records
            if records:
                rec_onts.append({
                    'ontology': ontology,
                    'collection': collection,
                    'recIDs': records
                })
            else:
                bibtask.write_message(
                    "WARNING: All records from collection '%s' have "
                    "already been analyzed for keywords with ontology '%s' "
                    "on %s." % (collection, ontology, date_last_run),
                    stream=sys.stderr, verbose=2)
        else:
            bibtask.write_message(
                "ERROR: Collection '%s' doesn't contain any record. "
                "Cannot analyse keywords." % (collection,),
                stream=sys.stderr, verbose=0)

    return rec_onts
예제 #4
0
def _get_recids_foreach_ontology(recids=None, collections=None, taxonomy=None):
    """Returns an array containing hash objects containing the
    collection, its corresponding ontology and the records belonging to
    the given collection."""
    rec_onts = []

    # User specified record IDs.
    if recids:
        rec_onts.append({
            'ontology': taxonomy,
            'collection': None,
            'recIDs': recids,
        })
        return rec_onts

    # User specified collections.
    if collections:
        for collection in collections:
            records = get_collection_reclist(collection)
            if records:
                rec_onts.append({
                    'ontology': taxonomy,
                    'collection': collection,
                    'recIDs': records
                })
        return rec_onts

    # Use rules found in collection_clsMETHOD.
    result = run_sql(
        """SELECT "clsMETHOD".name, "clsMETHOD".last_updated, """
        """collection.name FROM "clsMETHOD" JOIN "collection_clsMETHOD" ON """
        """clsMETHOD".id="id_clsMETHOD" JOIN collection ON """
        "id_collection=collection.id")

    for ontology, date_last_run, collection in result:
        records = get_collection_reclist(collection)
        if records:
            if not date_last_run:
                bibtask.write_message(
                    "INFO: Collection %s has not been previously "
                    "analyzed." % collection,
                    stream=sys.stderr,
                    verbose=3)
                modified_records = intbitset(run_sql("SELECT id FROM bibrec"))
            elif bibtask.task_get_option('force'):
                bibtask.write_message(
                    "INFO: Analysis is forced for collection %s." % collection,
                    stream=sys.stderr,
                    verbose=3)
                modified_records = intbitset(run_sql("SELECT id FROM bibrec"))
            else:
                modified_records = bibtask.get_modified_records_since(
                    date_last_run)

            records &= modified_records
            if records:
                rec_onts.append({
                    'ontology': ontology,
                    'collection': collection,
                    'recIDs': records
                })
            else:
                bibtask.write_message(
                    "WARNING: All records from collection '%s' have "
                    "already been analyzed for keywords with ontology '%s' "
                    "on %s." % (collection, ontology, date_last_run),
                    stream=sys.stderr,
                    verbose=2)
        else:
            bibtask.write_message(
                "ERROR: Collection '%s' doesn't contain any record. "
                "Cannot analyse keywords." % (collection, ),
                stream=sys.stderr,
                verbose=0)

    return rec_onts