Exemple #1
0
def fetch_concerned_records(name):
    task_update_progress("Fetching record ids")

    last_recid, last_date = fetch_last_updated(name)

    if task_get_option('new'):
        # Fetch all records inserted since last run
        sql = "SELECT `id`, `creation_date` FROM `bibrec` " \
            "WHERE `creation_date` >= %s " \
            "AND `id` > %s " \
            "ORDER BY `creation_date`"
        records = run_sql(sql, (last_date.isoformat(), last_recid))
    elif task_get_option('modified'):
        # Fetch all records inserted since last run
        sql = "SELECT `id`, `modification_date` FROM `bibrec` " \
            "WHERE `modification_date` >= %s " \
            "AND `id` > %s " \
            "ORDER BY `modification_date`"
        records = run_sql(sql, (last_date.isoformat(), last_recid))
    else:
        given_recids = task_get_option('recids')
        for collection in task_get_option('collections'):
            given_recids.add(get_collection_reclist(collection))

        if given_recids:
            format_strings = ','.join(['%s'] * len(given_recids))
            records = run_sql("SELECT `id`, NULL FROM `bibrec` " \
                "WHERE `id` IN (%s) ORDER BY `id`" % format_strings,
                    list(given_recids))
        else:
            records = []

    task_update_progress("Done fetching record ids")

    return records
Exemple #2
0
def fetch_concerned_records(name):
    task_update_progress("Fetching record ids")

    last_recid, last_date = fetch_last_updated(name)

    if task_get_option('new'):
        # Fetch all records inserted since last run
        sql = "SELECT `id`, `creation_date` FROM `bibrec` " \
            "WHERE `creation_date` >= %s " \
            "AND `id` > %s " \
            "ORDER BY `creation_date`"
        records = run_sql(sql, (last_date.isoformat(), last_recid))
    elif task_get_option('modified'):
        # Fetch all records inserted since last run
        sql = "SELECT `id`, `modification_date` FROM `bibrec` " \
            "WHERE `modification_date` >= %s " \
            "AND `id` > %s " \
            "ORDER BY `modification_date`"
        records = run_sql(sql, (last_date.isoformat(), last_recid))
    else:
        given_recids = task_get_option('recids')
        for collection in task_get_option('collections'):
            given_recids.add(get_collection_reclist(collection))

        if given_recids:
            format_strings = ','.join(['%s'] * len(given_recids))
            records = run_sql("SELECT `id`, NULL FROM `bibrec` " \
                "WHERE `id` IN (%s) ORDER BY `id`" % format_strings,
                    list(given_recids))
        else:
            records = []

    task_update_progress("Done fetching record ids")

    return records
Exemple #3
0
def get_all_public_records(collections):
    """ Get all records which exist (i.e. not suppressed ones) and are in
    accessible collection.
    returns list of (recid, last_modification) tuples
    """
    recids = intbitset()
    for collection in collections:
        recids += get_collection_reclist(collection)
    query = 'SELECT id, modification_date FROM bibrec'
    res = run_sql(query)
    return [(recid, lastmod) for (recid, lastmod) in res if recid in recids]
Exemple #4
0
def create_update_jobs_by_collection(batch_template_file, collection, job_directory=CFG_BIBENCODE_DAEMON_DIR_NEWJOBS):
    """ Creates the job description files to update a whole collection
    @param batch_template_file: fullpath to the template for the update
    @type batch_tempalte_file: string
    @param collection: name of the collection that should be updated
    @type collection: string
    @param job_directory: fullpath to the directory storing the job files
    @type job_directory: string
    """
    recids = get_collection_reclist(collection)
    return create_update_jobs_by_recids(recids, batch_template_file, job_directory)
Exemple #5
0
def get_all_public_records(collections):
    """ Get all records which exist (i.e. not suppressed ones) and are in
    accessible collection.
    returns list of (recid, last_modification) tuples
    """
    all_restricted_recids = get_all_restricted_recids()
    recids = intbitset()
    minimum_timestamp = get_minimum_timestamp()
    for collection in collections:
        recids += get_collection_reclist(collection)
    recids = recids.difference(all_restricted_recids)
    query = 'SELECT id, modification_date FROM bibrec'
    res = run_sql(query)
    return [(recid, max(lastmod, minimum_timestamp)) for (recid, lastmod) in res if recid in recids]
Exemple #6
0
def get_all_public_records(collections):
    """ Get all records which exist (i.e. not suppressed ones) and are in
    accessible collection.
    returns list of (recid, last_modification) tuples
    """
    all_restricted_recids = get_all_restricted_recids()
    recids = intbitset()
    minimum_timestamp = get_minimum_timestamp()
    for collection in collections:
        recids += get_collection_reclist(collection)
    recids = recids.difference(all_restricted_recids)
    query = "SELECT id, modification_date FROM bibrec"
    res = run_sql(query)
    return [(recid, max(lastmod, minimum_timestamp)) for (recid, lastmod) in res if recid in recids]
Exemple #7
0
def create_update_jobs_by_collection(
                            batch_template_file,
                            collection,
                            job_directory=CFG_BIBENCODE_DAEMON_DIR_NEWJOBS):
    """ Creates the job description files to update a whole collection
    @param batch_template_file: fullpath to the template for the update
    @type batch_tempalte_file: string
    @param collection: name of the collection that should be updated
    @type collection: string
    @param job_directory: fullpath to the directory storing the job files
    @type job_directory: string
    """
    recids = get_collection_reclist(collection)
    return create_update_jobs_by_recids(recids, batch_template_file,
                                        job_directory)
Exemple #8
0
def get_all_public_records_modified_last_month(collections):
    """ Get all records which exist (i.e. not suppressed ones) and are in
    accessible collection.
    returns list of (recid, last_modification) tuples
    """
    all_restricted_recids = get_all_restricted_recids()
    current_date = datetime.date.today()
    one_month_ago = current_date - datetime.timedelta(days=31)
    recids = intbitset()
    for collection in collections:
        recids += get_collection_reclist(collection)
    recids = recids.difference(all_restricted_recids)
    query = 'SELECT id, modification_date FROM bibrec WHERE modification_date > %s'
    res = run_sql(query, (one_month_ago, ))
    return [(recid, lastmod) for (recid, lastmod) in res if recid in recids]
Exemple #9
0
def get_all_public_records_modified_last_month(collections):
    """ Get all records which exist (i.e. not suppressed ones) and are in
    accessible collection.
    returns list of (recid, last_modification) tuples
    """
    all_restricted_recids = get_all_restricted_recids()
    current_date = datetime.date.today()
    one_month_ago = current_date - datetime.timedelta(days = 31)
    recids = intbitset()
    for collection in collections:
        recids += get_collection_reclist(collection)
    recids = recids.difference(all_restricted_recids)
    query = 'SELECT id, modification_date FROM bibrec WHERE modification_date > %s'
    res = run_sql(query, (one_month_ago,))
    return [(recid, lastmod) for (recid, lastmod) in res if recid in recids]
Exemple #10
0
    def lazy_parser(collection, left_tags, right_tags):
        for recid in get_collection_reclist(collection):
            try:
                # Key tag
                # e.g. for journals database: 711__a
                left_values = get_tag_values(recid, left_tags)
            except IndexError:
                pass
            else:
                # Value tags
                # e.g. for journals database: 130__a, 730__a and 030__a
                right_values = get_tag_values(recid, right_tags)

                for left_value in set(left_values):
                    for right_value in set(right_values):
                        yield left_value, right_value
Exemple #11
0
def find_book(citation_element):
    books_recids = get_collection_reclist('Books')
    search_string = citation_element['title']
    recids = intbitset(get_recids_matching_query(search_string, 'title'))
    recids &= books_recids
    if len(recids) == 1:
        return recids

    if 'year' in citation_element:
        for recid in recids:
            year_tags = get_fieldvalues(recid, '269__c')
            for tag in year_tags:
                if tag == citation_element['year']:
                    return [recid]

    return []
Exemple #12
0
    def lazy_parser(collection, left_tags, right_tags):
        for recid in get_collection_reclist(collection):
            try:
                # Key tag
                # e.g. for journals database: 711__a
                left_values = get_tag_values(recid, left_tags)
            except IndexError:
                pass
            else:
                # Value tags
                # e.g. for journals database: 130__a, 730__a and 030__a
                right_values = get_tag_values(recid, right_tags)

                for left_value in set(left_values):
                    for right_value in set(right_values):
                        yield left_value, right_value
Exemple #13
0
def find_book(citation_element):
    books_recids = get_collection_reclist('Books')
    search_string = citation_element['title']
    recids = intbitset(get_recids_matching_query(search_string, 'title'))
    recids &= books_recids
    if len(recids) == 1:
        return recids

    if 'year' in citation_element:
        for recid in recids:
            year_tags = get_fieldvalues(recid, '269__c')
            for tag in year_tags:
                if tag == citation_element['year']:
                    return [recid]

    return []
Exemple #14
0
def task_parse_options(key, value, opts, args):   # pylint: disable-msg=W0613
    """ Must be defined for bibtask to create a task """
    if args:
        # There should be no standalone arguments for any bibcatalog job
        # This will catch args before the job is shipped to Bibsched
        raise StandardError("Error: Unrecognised argument '%s'." % args[0])

    if key in ('-a', '--new'):
        task_set_option('new', True)
    elif key in ('-m', '--modified'):
        task_set_option('modified', True)
    elif key in ('-c', '--collections'):
        collections = task_get_option('collections')
        if not collections:
            collections = set()
            task_set_option('collections', collections)
        for v in value.split(","):
            collections.update(get_collection_reclist(v))
    elif key in ('-i', '--recids'):
        recids = task_get_option('recids')
        if not recids:
            recids = set()
            task_set_option('recids', recids)
        recids.update(split_ids(value))
    elif key in ('--tickets',):
        tickets = task_get_option('tickets')
        if not tickets:
            tickets = set()
            task_set_option('tickets', tickets)
        for item in value.split(','):
            tickets.add(item.strip())
    elif key in ('--all-tickets',):
        task_set_option('all-tickets', True)
    elif key in ('-q', '--query'):
        query = task_get_option('query')
        if not query:
            query = set()
            task_set_option('query', query)
        query.add(value)
    elif key in ('-r', '--reportnumbers'):
        reportnumbers = task_get_option('reportnumbers')
        if not reportnumbers:
            reportnumbers = set()
            task_set_option('reportnumbers', reportnumbers)
        reportnumbers.add(value)
    return True
Exemple #15
0
def task_parse_options(key, value, opts, args):   # pylint: disable-msg=W0613
    """ Must be defined for bibtask to create a task """
    if args:
        # There should be no standalone arguments for any bibcatalog job
        # This will catch args before the job is shipped to Bibsched
        raise StandardError("Error: Unrecognised argument '%s'." % args[0])

    if key in ('-a', '--new'):
        task_set_option('new', True)
    elif key in ('-m', '--modified'):
        task_set_option('modified', True)
    elif key in ('-c', '--collections'):
        collections = task_get_option('collections')
        if not collections:
            collections = set()
            task_set_option('collections', collections)
        for v in value.split(","):
            collections.update(get_collection_reclist(v))
    elif key in ('-i', '--recids'):
        recids = task_get_option('recids')
        if not recids:
            recids = set()
            task_set_option('recids', recids)
        recids.update(split_ids(value))
    elif key in ('--tickets',):
        tickets = task_get_option('tickets')
        if not tickets:
            tickets = set()
            task_set_option('tickets', tickets)
        for item in value.split(','):
            tickets.add(item.strip())
    elif key in ('--all-tickets',):
        task_set_option('all-tickets', True)
    elif key in ('-q', '--query'):
        query = task_get_option('query')
        if not query:
            query = set()
            task_set_option('query', query)
        query.add(value)
    elif key in ('-r', '--reportnumbers'):
        reportnumbers = task_get_option('reportnumbers')
        if not reportnumbers:
            reportnumbers = set()
            task_set_option('reportnumbers', reportnumbers)
        reportnumbers.add(value)
    return True
Exemple #16
0
def search_unit(query, f, m, wl=None):
    """Search for records in collection query.

    Example:

    .. code-block:: text

        collection:"BOOK"
        collection:"Books"
    """
    from invenio.legacy.search_engine import (get_collection_reclist,
                                              search_unit_in_bibwords)
    if len(query):
        ahitset = get_collection_reclist(query)
        if not ahitset:
            return search_unit_in_bibwords(query, 'collection', m, wl=wl)
        return ahitset
    else:
        return intbitset([])
Exemple #17
0
def search_unit(query, f, m, wl=None):
    """Search for records in collection query.

    Example:

    .. code-block:: text

        collection:"BOOK"
        collection:"Books"
    """
    from invenio.legacy.search_engine import (
        get_collection_reclist, search_unit_in_bibwords
    )
    if len(query):
        ahitset = get_collection_reclist(query)
        if not ahitset:
            return search_unit_in_bibwords(query, 'collection', m, wl=wl)
        return ahitset
    else:
        return intbitset([])
Exemple #18
0
    def get_all_collections_for_records(self, recreate_cache_if_needed=True):
        """Return a dict with recid as key and collection list as value.

        This replace existing Invenio function for performance reason.

        :param recreate_cache_if_needed: [bool] True if regenerate the cache
        """
        from invenio.legacy.search_engine import collection_reclist_cache, get_collection_reclist
        from invenio.legacy.websearch.webcoll import Collection

        ret = {}

        # update the cache?
        if recreate_cache_if_needed:
            collection_reclist_cache.recreate_cache_if_needed()

        for name in collection_reclist_cache.cache.keys():
            recids = get_collection_reclist(name, recreate_cache_if_needed=False)
            full_path_name = "/".join([v.name for v in Collection(name).get_ancestors()] + [name])
            for recid in recids:
                ret.setdefault(recid, []).append(full_path_name)
        self._recids_collections = ret
Exemple #19
0
    def get_all_collections_for_records(self, recreate_cache_if_needed=True):
        """Return a dict with recid as key and collection list as value.

        This replace existing Invenio function for performance reason.

        :param recreate_cache_if_needed: [bool] True if regenerate the cache
        """
        from invenio.legacy.search_engine import collection_reclist_cache, get_collection_reclist
        from invenio.legacy.websearch.webcoll import Collection
        ret = {}

        #update the cache?
        if recreate_cache_if_needed:
            collection_reclist_cache.recreate_cache_if_needed()

        for name in collection_reclist_cache.cache.keys():
            recids = get_collection_reclist(name,
                                            recreate_cache_if_needed=False)
            full_path_name = "/".join(
                [v.name for v in Collection(name).get_ancestors()] + [name])
            for recid in recids:
                ret.setdefault(recid, []).append(full_path_name)
        self._recids_collections = ret
Exemple #20
0
def _get_recids_foreach_ontology(recids=None, collections=None, taxonomy=None):
    """Returns an array containing hash objects containing the
    collection, its corresponding ontology and the records belonging to
    the given collection."""
    rec_onts = []

    # User specified record IDs.
    if recids:
        rec_onts.append({
            'ontology': taxonomy,
            'collection': None,
            'recIDs': recids,
        })
        return rec_onts

    # User specified collections.
    if collections:
        for collection in collections:
            records = get_collection_reclist(collection)
            if records:
                rec_onts.append({
                    'ontology': taxonomy,
                    'collection': collection,
                    'recIDs': records
                })
        return rec_onts

    # Use rules found in collection_clsMETHOD.
    result = run_sql("""SELECT "clsMETHOD".name, "clsMETHOD".last_updated, """
                     """collection.name FROM "clsMETHOD" JOIN "collection_clsMETHOD" ON """
                     """clsMETHOD".id="id_clsMETHOD" JOIN collection ON """
                     "id_collection=collection.id")

    for ontology, date_last_run, collection in result:
        records = get_collection_reclist(collection)
        if records:
            if not date_last_run:
                bibtask.write_message(
                    "INFO: Collection %s has not been previously "
                    "analyzed." % collection, stream=sys.stderr, verbose=3)
                modified_records = intbitset(run_sql("SELECT id FROM bibrec"))
            elif bibtask.task_get_option('force'):
                bibtask.write_message(
                    "INFO: Analysis is forced for collection %s." %
                    collection, stream=sys.stderr, verbose=3)
                modified_records = intbitset(run_sql("SELECT id FROM bibrec"))
            else:
                modified_records = bibtask.get_modified_records_since(date_last_run)

            records &= modified_records
            if records:
                rec_onts.append({
                    'ontology': ontology,
                    'collection': collection,
                    'recIDs': records
                })
            else:
                bibtask.write_message(
                    "WARNING: All records from collection '%s' have "
                    "already been analyzed for keywords with ontology '%s' "
                    "on %s." % (collection, ontology, date_last_run),
                    stream=sys.stderr, verbose=2)
        else:
            bibtask.write_message(
                "ERROR: Collection '%s' doesn't contain any record. "
                "Cannot analyse keywords." % (collection,),
                stream=sys.stderr, verbose=0)

    return rec_onts
Exemple #21
0
def find_isbn(citation_element):
    books_recids = get_collection_reclist('Books')
    recids = intbitset(get_recids_matching_query(citation_element['ISBN'], 'isbn'))
    return list(recids & books_recids)
def recids_cache(collections, cache={}):
    if 'valid_recids' not in cache:
        cache['valid_recids'] = intbitset()
        for coll in collections.split(','):
            cache['valid_recids'] += get_collection_reclist(coll)
    return cache['valid_recids']
Exemple #23
0
def bst_create_icons(recid, icon_sizes, icon_format_mappings=None,
                     collection=None, docnames=None, add_default_icon=0, inherit_moreinfo=0):
    """BibTasklet for generating missing icons.
       @param recid: the record on which the action is being performed
       @type recid: int
       @param icon_sizes: a comma-separated list of icon sizes, ex 180,640
       @type icon_sizes: string
       @param collection: the collection name on which to run the task;
                          if recid is defined, collection will be ignored
       @type collection: string
       @param icon_format_mappings: defines for each "master" format in
                                   which format the icons should be
                                   created. If the master format is
                                   not specified here, then its icons
                                   will be created in the same format,
                                   if possible (for eg. the icons of a
                                   TIFF file would be created as TIFF,
                                   while icons of a PDF or DOC file
                                   would be created in JPG) and unless
                                   a default mapping is not provided in
                                   C{CFG_ICON_CREATION_FORMAT_MAPPINGS}.
                                   Use syntax masterextension-targetextension1,targetextension2
                                   (eg. "doc->png,jpg" or "png-jpg")
                                   Use '*' to target extensions not
                                   matched by other rules (if
                                   necessary set its value to empty ''
                                   in order to override/remove the
                                   default star rule set in
                                   C{CFG_ICON_CREATION_FORMAT_MAPPINGS}.
       @type icon_format_mappings: list
       @param docnames: the list of docnames for which we want to create an icon.
                        If not provided, consider all docnames.
                        Separate docnames using "/"
       @type docnames: list
       @param add_default_icon: if a default icon (i.e. without icon
                                size suffix, matching
                                CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT)
                                should be added (1) or not (0)
       @type add_default_icon: int
       @param inherit_moreinfo: if the added icons should also have
                                their description and comment set to
                                the same value as the "main" bibdoc
                                (1) or not (0)
       @type inherit_moreinfo: int
    """
    if recid:
        recids = [int(recid)]
    elif collection:
        from invenio.legacy.search_engine import get_collection_reclist
        recids = get_collection_reclist(collection)
    else:
        write_message("Error: no recid found.", sys.stderr)
        return 1
    try:
        add_default_icon = int(add_default_icon) and True or False
    except:
        add_default_icon = False
    try:
        inherit_moreinfo = int(inherit_moreinfo) and True or False
    except:
        inherit_moreinfo = False
    if icon_format_mappings is None:
        icon_format_mappings = []
    if isinstance(icon_format_mappings, str):
        icon_format_mappings = [icon_format_mappings]
    try:
        icon_format_mappings = dict([map(lambda x: ',' in x and x.split(',') or x, mapping.split("-", 1)) \
                                     for mapping in icon_format_mappings])
    except Exception, e:
        write_message("Error: parameter 'icon_format_mappings' not well-formed:\n%s" % e, sys.stderr)
        return 0
def recids_cache(collections, cache={}):
    if "valid_recids" not in cache:
        cache["valid_recids"] = intbitset()
        for coll in collections.split(","):
            cache["valid_recids"] += get_collection_reclist(coll)
    return cache["valid_recids"]
def bst_create_icons(recid,
                     icon_sizes,
                     icon_format_mappings=None,
                     collection=None,
                     docnames=None,
                     add_default_icon=0,
                     inherit_moreinfo=0):
    """BibTasklet for generating missing icons.
       @param recid: the record on which the action is being performed
       @type recid: int
       @param icon_sizes: a comma-separated list of icon sizes, ex 180,640
       @type icon_sizes: string
       @param collection: the collection name on which to run the task;
                          if recid is defined, collection will be ignored
       @type collection: string
       @param icon_format_mappings: defines for each "master" format in
                                   which format the icons should be
                                   created. If the master format is
                                   not specified here, then its icons
                                   will be created in the same format,
                                   if possible (for eg. the icons of a
                                   TIFF file would be created as TIFF,
                                   while icons of a PDF or DOC file
                                   would be created in JPG) and unless
                                   a default mapping is not provided in
                                   C{CFG_ICON_CREATION_FORMAT_MAPPINGS}.
                                   Use syntax masterextension-targetextension1,targetextension2
                                   (eg. "doc->png,jpg" or "png-jpg")
                                   Use '*' to target extensions not
                                   matched by other rules (if
                                   necessary set its value to empty ''
                                   in order to override/remove the
                                   default star rule set in
                                   C{CFG_ICON_CREATION_FORMAT_MAPPINGS}.
       @type icon_format_mappings: list
       @param docnames: the list of docnames for which we want to create an icon.
                        If not provided, consider all docnames.
                        Separate docnames using "/"
       @type docnames: list
       @param add_default_icon: if a default icon (i.e. without icon
                                size suffix, matching
                                CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT)
                                should be added (1) or not (0)
       @type add_default_icon: int
       @param inherit_moreinfo: if the added icons should also have
                                their description and comment set to
                                the same value as the "main" bibdoc
                                (1) or not (0)
       @type inherit_moreinfo: int
    """
    if recid:
        recids = [int(recid)]
    elif collection:
        from invenio.legacy.search_engine import get_collection_reclist
        recids = get_collection_reclist(collection)
    else:
        write_message("Error: no recid found.", sys.stderr)
        return 1
    try:
        add_default_icon = int(add_default_icon) and True or False
    except:
        add_default_icon = False
    try:
        inherit_moreinfo = int(inherit_moreinfo) and True or False
    except:
        inherit_moreinfo = False
    if icon_format_mappings is None:
        icon_format_mappings = []
    if isinstance(icon_format_mappings, str):
        icon_format_mappings = [icon_format_mappings]
    try:
        icon_format_mappings = dict([map(lambda x: ',' in x and x.split(',') or x, mapping.split("-", 1)) \
                                     for mapping in icon_format_mappings])
    except Exception, e:
        write_message(
            "Error: parameter 'icon_format_mappings' not well-formed:\n%s" % e,
            sys.stderr)
        return 0
Exemple #26
0
def recids_cache(collections, cache={}):
    if 'valid_recids' not in cache:
        cache['valid_recids'] = intbitset()
        for coll in collections.split(','):
            cache['valid_recids'] += get_collection_reclist(coll)
    return cache['valid_recids']
Exemple #27
0
def _get_recids_foreach_ontology(recids=None, collections=None, taxonomy=None):
    """Returns an array containing hash objects containing the
    collection, its corresponding ontology and the records belonging to
    the given collection."""
    rec_onts = []

    # User specified record IDs.
    if recids:
        rec_onts.append({
            'ontology': taxonomy,
            'collection': None,
            'recIDs': recids,
        })
        return rec_onts

    # User specified collections.
    if collections:
        for collection in collections:
            records = get_collection_reclist(collection)
            if records:
                rec_onts.append({
                    'ontology': taxonomy,
                    'collection': collection,
                    'recIDs': records
                })
        return rec_onts

    # Use rules found in collection_clsMETHOD.
    result = run_sql(
        """SELECT "clsMETHOD".name, "clsMETHOD".last_updated, """
        """collection.name FROM "clsMETHOD" JOIN "collection_clsMETHOD" ON """
        """clsMETHOD".id="id_clsMETHOD" JOIN collection ON """
        "id_collection=collection.id")

    for ontology, date_last_run, collection in result:
        records = get_collection_reclist(collection)
        if records:
            if not date_last_run:
                bibtask.write_message(
                    "INFO: Collection %s has not been previously "
                    "analyzed." % collection,
                    stream=sys.stderr,
                    verbose=3)
                modified_records = intbitset(run_sql("SELECT id FROM bibrec"))
            elif bibtask.task_get_option('force'):
                bibtask.write_message(
                    "INFO: Analysis is forced for collection %s." % collection,
                    stream=sys.stderr,
                    verbose=3)
                modified_records = intbitset(run_sql("SELECT id FROM bibrec"))
            else:
                modified_records = bibtask.get_modified_records_since(
                    date_last_run)

            records &= modified_records
            if records:
                rec_onts.append({
                    'ontology': ontology,
                    'collection': collection,
                    'recIDs': records
                })
            else:
                bibtask.write_message(
                    "WARNING: All records from collection '%s' have "
                    "already been analyzed for keywords with ontology '%s' "
                    "on %s." % (collection, ontology, date_last_run),
                    stream=sys.stderr,
                    verbose=2)
        else:
            bibtask.write_message(
                "ERROR: Collection '%s' doesn't contain any record. "
                "Cannot analyse keywords." % (collection, ),
                stream=sys.stderr,
                verbose=0)

    return rec_onts
Exemple #28
0
def find_isbn(citation_element):
    books_recids = get_collection_reclist('Books')
    recids = intbitset(
        get_recids_matching_query(citation_element['ISBN'], 'isbn'))
    return list(recids & books_recids)