Python get_cited_byの例、invenio.bibrank_citation_searcher.get_cited_by Pythonの例

コード例 #1

0

ファイルを表示

ファイル: bibrank_selfcites_indexer.py プロジェクト: BessemAamira/invenio

def get_self_citations_count(recids, algorithm='simple',
                                  precompute=CFG_BIBRANK_SELFCITES_PRECOMPUTE):
    """Depending on our site we config, we either:
    * compute self-citations (using a simple algorithm)
    * or fetch self-citations from pre-computed table"""
    total_cites = 0

    if not precompute:
        tags = get_authors_tags()
        selfcites_fun = ALL_ALGORITHMS[algorithm]

        for recid in recids:
            citers = get_cited_by(recid)
            self_cites = selfcites_fun(recid, tags)
            total_cites += len(citers) - len(self_cites)
    else:
        results = get_precomputed_self_cites_list(recids)

        results_dict = {}
        for r in results:
            results_dict[r[0]] = r[1]

        for r in recids:
            citers = get_cited_by(r)
            self_cites = results_dict.get(r, 0)
            total_cites += len(citers) - self_cites

    return total_cites

コード例 #2

0

ファイルを表示

ファイル: bibrank_selfcites_indexer.py プロジェクト: ppiotr/Invenio

def get_self_citations_count(recids, algorithm='simple',
                                  precompute=CFG_BIBRANK_SELFCITES_PRECOMPUTE):
    """Depending on our site we config, we either:
    * compute self-citations (using a simple algorithm)
    * or fetch self-citations from pre-computed table"""
    total_cites = 0

    if not precompute:
        tags = get_authors_tags()
        selfcites_fun = ALL_ALGORITHMS[algorithm]

        for recid in recids:
            citers = get_cited_by(recid)
            self_cites = selfcites_fun(recid, tags)
            total_cites += len(citers) - len(self_cites)
    else:
        results = get_precomputed_self_cites_list(recids)

        results_dict = {}
        for r in results:
            results_dict[r[0]] = r[1]

        for r in recids:
            citers = get_cited_by(r)
            self_cites = results_dict.get(r, 0)
            total_cites += len(citers) - self_cites

    return total_cites

コード例 #3

0

ファイルを表示

ファイル: bibrank_citation_indexer_regression_tests.py プロジェクト: chokribr/inveniotest

 def test_db_adding_and_removing_records(self):
     from invenio.bibrank_citation_searcher import get_cited_by
     from invenio.bibrank_citation_indexer import store_dicts
     store_dicts([42222],
                 refs={42222: set([43333])},
                 cites={42222: set([40000, 40001])})
     cited_by_42222 = get_cited_by(42222)
     cited_by_43333 = get_cited_by(43333)
     store_dicts([42222], refs={42222: set()}, cites={42222: set()})
     self.assertEqual(cited_by_42222, set([40000, 40001]))
     self.assertEqual(cited_by_43333, set([42222]))
     self.assertEqual(get_cited_by(42222), set())
     self.assertEqual(get_cited_by(43333), set())

コード例 #4

0

ファイルを表示

ファイル: bibrank_citation_indexer_regression_tests.py プロジェクト: BessemAamira/invenio

 def test_db_adding_and_removing_records(self):
     from invenio.bibrank_citation_searcher import get_cited_by
     from invenio.bibrank_citation_indexer import store_dicts
     store_dicts([42222],
                 refs={42222: set([43333])},
                 cites={42222: set([40000, 40001])})
     cited_by_42222 = get_cited_by(42222)
     cited_by_43333 = get_cited_by(43333)
     store_dicts([42222],
                 refs={42222: set()},
                 cites={42222: set()})
     self.assertEqual(cited_by_42222, set([40000, 40001]))
     self.assertEqual(cited_by_43333, set([42222]))
     self.assertEqual(get_cited_by(42222), set())
     self.assertEqual(get_cited_by(43333), set())

コード例 #5

0

ファイルを表示

def calculate_citation_history_coordinates(recid):
    """Return a list of citation graph coordinates for RECID, sorted by year."""
    result = {}
    for year in calculate_citation_graphe_x_coordinates(recid):
        result[year] = 0

    if len(result) < CFG_BIBRANK_CITATION_HISTORY_MIN_X_POINTS:
        # do not generate graphs that have less than X points
        return []

    for recid in get_cited_by(recid):
        rec_date = get_record_year(recid)
        # Some records simlpy do not have these fields
        if rec_date:
            # Maybe rec_date[0][0:4] has a typo and cannot
            # be converted to an int
            try:
                d = strptime(rec_date[0][:4], '%Y')
            except ValueError:
                pass
            else:
                if d.year in result:
                    result[d.year] += 1

    return sorted(result.iteritems())

コード例 #6

0

ファイルを表示

    def related_records(recids, recids_processed):
        if fmt == "HDREF" and recids:
            # HDREF represents the references tab
            # the tab needs to be recomputed not only when the record changes
            # but also when one of the citations changes
            sql = """SELECT id, modification_date FROM bibrec
                     WHERE id in (%s)""" % ','.join(str(r) for r in recids)

            def check_date(mod_date):
                return mod_date.strftime(
                    "%Y-%m-%d %H:%M:%S") < latest_bibrank_run

            rel_recids = intbitset([
                recid for recid, mod_date in run_sql(sql)
                if check_date(mod_date)
            ])
            for r in rel_recids:
                recids |= intbitset(get_cited_by(r))

        # To not process recids twice
        recids -= recids_processed
        # Adds to the set of processed recids
        recids_processed += recids

        return recids

コード例 #7

0

ファイルを表示

ファイル: bibrank_record_sorter.py プロジェクト: AlbertoPeon/invenio

def find_citations(rank_method_code, recID, hitset, verbose):
    """Rank by the amount of citations."""
    #calculate the cited-by values for all the members of the hitset
    #returns: ((recordid,weight),prefix,postfix,message)

    global voutput
    voutput = ""

    #If the recID is numeric, return only stuff that cites it. Otherwise return
    #stuff that cites hitset

    #try to convert to int
    recisint = True
    recidint = 0
    try:
        recidint = int(recID)
    except:
        recisint = False
    ret = []
    if recisint:
        myrecords = get_cited_by(recidint) #this is a simple list
        ret = get_cited_by_weight(myrecords)
    else:
        ret = get_cited_by_weight(hitset)
    ret.sort(lambda x,y:cmp(x[1],y[1]))      #ascending by the second member of the tuples

    if verbose > 0:
        voutput = voutput+"\nrecID "+str(recID)+" is int: "+str(recisint)+" hitset "+str(hitset)+"\n"+"find_citations retlist "+str(ret)

    #voutput = voutput + str(ret)

    if ret:
        return (ret,"(", ")", "")
    else:
        return ((),"", "", "")

コード例 #8

0

ファイルを表示

ファイル: bibrank_record_sorter.py プロジェクト: epfl-si/invenio-infoscience

def find_citations(rank_method_code, recID, hitset, verbose):
    """Rank by the amount of citations."""
    #calculate the cited-by values for all the members of the hitset
    #returns: ((recordid,weight),prefix,postfix,message)

    global voutput
    voutput = ""

    #If the recID is numeric, return only stuff that cites it. Otherwise return
    #stuff that cites hitset

    #try to convert to int
    recisint = True
    recidint = 0
    try:
        recidint = int(recID)
    except:
        recisint = False
    ret = []
    if recisint:
        myrecords = get_cited_by(recidint) #this is a simple list
        ret = get_cited_by_weight(myrecords)
    else:
        ret = get_cited_by_weight(hitset)
    ret.sort(lambda x,y:cmp(x[1],y[1]))      #ascending by the second member of the tuples

    if verbose > 0:
        voutput = voutput+"\nrecID "+str(recID)+" is int: "+str(recisint)+" hitset "+str(hitset)+"\n"+"find_citations retlist "+str(ret)

    #voutput = voutput + str(ret)

    if ret:
        return (ret,"(", ")", "")
    else:
        return ((),"", "", "")

コード例 #9

0

ファイルを表示

ファイル: bibrank_citation_grapher.py プロジェクト: aw-bib/tind-invenio

def calculate_citation_history_coordinates(recid):
    """Return a list of citation graph coordinates for RECID, sorted by year."""
    result = {}
    for year in calculate_citation_graphe_x_coordinates(recid):
        result[year] = 0

    if len(result) < CFG_BIBRANK_CITATION_HISTORY_MIN_X_POINTS:
        # do not generate graphs that have less than X points
        return []

    for recid in get_cited_by(recid):
        rec_date = get_record_year(recid)
        # Some records simlpy do not have these fields
        if rec_date:
            # Maybe rec_date[0][0:4] has a typo and cannot
            # be converted to an int
            try:
                d = strptime(rec_date[0][:4], '%Y')
            except ValueError:
                pass
            else:
                if d.year in result:
                    result[d.year] += 1

    return sorted(result.iteritems())

コード例 #10

0

ファイルを表示

ファイル: chart_author.py プロジェクト: arbitton/beacom

def find_cites(author):
   """
   Find and return all necessary components for plotting the data set.

   Returned values:
   1. year_dict: a dictionary keyed by years with values of the citations that occured in that year
   2. start_year: an integer that holds the year the first citation occured, used for calculating
      points to plot
   3. lifetime_cites: an integer holding the total amount of cites the author has in the present
      day, used to scale the final data set

   This definition first grabs a list of all the papers written by the author. From there,
   it iterates through the list, pulling the citations of each paper and incrementing the
   appropriate year in the year_dict dictionary for that citations year.

   Next, it iterates through the year ditionary to fill in values for missing years, setting them
   to zero. It also calculates the lifetime cites during this iteration.

   """

   print "# Author:", author
   papers = get_realauthor_data(author, 'bibrec_id')
   
   year_dict = {}
   lifetime_cites = 0

   for paper in papers:
      cites = get_cited_by(int(paper[1]))
      # print papers[i][1], cites 
      for cite in cites:
         fieldvalues_yearlist = get_fieldvalues(cite, '269__C')
         if len(fieldvalues_yearlist) > 0:
            year = year_re.search(fieldvalues_yearlist[0])
            if year:
               if int(year.group()) not in year_dict:
                  year_dict[int(year.group())] = 1
               else:
                  year_dict[int(year.group())] += 1
               # print year.group()

   if len(year_dict) > 0:

      start_year = min(year_dict.keys())
      end_year = max(year_dict.keys())

      for i in range(start_year, end_year + 1):
         if i not in year_dict:
            year_dict[i] = 0
         lifetime_cites += year_dict[i]
   else:
      print "# Author has no citations"

   # print year_dict

   return year_dict, start_year, float(lifetime_cites)

コード例 #11

0

ファイルを表示

ファイル: read_logs.py プロジェクト: GunioRobot/SLAC

def print_rec_ids(rec_ids):
   complete_paper_list = intbitset(perform_request_search(p='year:2009->2010'))

   print "Rec ID, Clicks, Citations:"

   for key in rec_ids:

      paper_citation_list = intbitset(get_cited_by(key))

      narrowed_citation_count = len(paper_citation_list & complete_paper_list)
      print "%d %d %d" % (key, rec_ids[key], narrowed_citation_count)

コード例 #12

0

ファイルを表示

ファイル: bibrank_selfcites_indexer.py プロジェクト: BessemAamira/invenio

def compute_self_citations(recid, tags, authors_fun):
    """Compute the self-citations

    We return the total numbers of citations minus the number of self-citations
    Args:
     - recid: record id
     - lciters: list of record ids citing this record
     - authors_cache: the authors cache which will be used to store an author
                      friends (to not compute friends twice)
     - tags: the tag number for author, coauthors, collaborations,
             required since it depends on how the marc was defined
    """
    citers = get_cited_by(recid)
    if not citers:
        return set()

    self_citations = set()

    authors = frozenset(get_authors_from_record(recid, tags))

    collaborations = None
    if not authors or len(authors) > 20:
        collaborations = frozenset(
            get_collaborations_from_record(recid, tags))

    if collaborations:
        # Use collaborations names
        for cit in citers:
            cit_collaborations = frozenset(
                get_collaborations_from_record(cit, tags))
            if collaborations.intersection(cit_collaborations):
                self_citations.add(cit)
    else:
        # Use authors names
        for cit in citers:
            cit_authors = get_authors_from_record(cit, tags)
            if (not authors or len(cit_authors) > 20) and \
                get_collaborations_from_record(cit, tags):
                # Record from a collaboration that cites
                # a record from an author, it's fine
                pass
            else:
                cit_coauthors = frozenset(authors_fun(cit, tags))
                if authors.intersection(cit_coauthors):
                    self_citations.add(cit)

    return self_citations

コード例 #13

0

ファイルを表示

ファイル: bibrank_selfcites_indexer.py プロジェクト: ppiotr/Invenio

def compute_self_citations(recid, tags, authors_fun):
    """Compute the self-citations

    We return the total numbers of citations minus the number of self-citations
    Args:
     - recid: record id
     - lciters: list of record ids citing this record
     - authors_cache: the authors cache which will be used to store an author
                      friends (to not compute friends twice)
     - tags: the tag number for author, coauthors, collaborations,
             required since it depends on how the marc was defined
    """
    citers = get_cited_by(recid)
    if not citers:
        return set()

    self_citations = set()

    authors = frozenset(get_authors_from_record(recid, tags))

    collaborations = None
    if not authors or len(authors) > 20:
        collaborations = frozenset(
            get_collaborations_from_record(recid, tags))

    if collaborations:
        # Use collaborations names
        for cit in citers:
            cit_collaborations = frozenset(
                get_collaborations_from_record(cit, tags))
            if collaborations.intersection(cit_collaborations):
                self_citations.add(cit)
    else:
        # Use authors names
        for cit in citers:
            cit_authors = get_authors_from_record(cit, tags)
            if (not authors or len(cit_authors) > 20) and \
                get_collaborations_from_record(cit, tags):
                # Record from a collaboration that cites
                # a record from an author, it's fine
                pass
            else:
                cit_coauthors = frozenset(authors_fun(cit, tags))
                if authors.intersection(cit_coauthors):
                    self_citations.add(cit)

    return self_citations

コード例 #14

0

ファイルを表示

ファイル: read_logs_combo.py プロジェクト: traviscb/SLAC

def print_rec_ids(rec_ids,offset=365):


   print "Rec ID, Clicks,date, arXiv, Citations(1yr), Citations(6mo):"
   output = []
   for key in rec_ids:
      dates = get_fieldvalues(key, '269__c')
      if len(dates) > 0:
         date = dates[0]
      reps = get_fieldvalues(key, '037__a')
      if len(reps) > 0:
         rep = reps[0]
      cats = get_fieldvalues(key, '037__c')
      if len(cats) > 0:
         cat = cats[0]
      output.append([key, rec_ids[key], date, rep, cat])
   date1=''
   output.sort(key = lambda record:record[2])
   for record in output:
      if record[2] != date1:
         date = datetime.date(int(record[2].rsplit('-')[0]),int(record[2].rsplit('-')[1]),1)
         date2 = date + datetime.timedelta(offset/2)
         date3 = date + datetime.timedelta(offset)
         ## check and split across yearsdue to search bug.   assumes that
         ## if small offset splits the year, the big one does too (i.e. we
         ## don't go back or forward more than 6 mos
         if date.year != date2.year:
            join = str(date.year) +'-12-31 or year:' + str(date2.year) + '-01-01->'
         else:
            join = ''
         date1 = date.strftime("%Y-%m")
         date2 = date2.strftime("%Y-%m")
         date3 = date3.strftime("%Y-%m")

         print date1, date2, date3
         complete_paper_list = intbitset(perform_request_search(p='year:'+date1+'->' + join + date2))
         half_complete_paper_list = intbitset(perform_request_search(p='year:'+date1+'->' + join + date3))
      paper_citation_list = intbitset(get_cited_by(record[0]))
      narrowed_citation_count = len(paper_citation_list & complete_paper_list)
      half_narrowed_citation_count = len(paper_citation_list & half_complete_paper_list)

      print '%d,%d,%s,%s,%s,%d,%d' % (record[0],record[1],record[2],record[3],record[4], half_narrowed_citation_count,narrowed_citation_count)

コード例 #15

0

ファイルを表示

ファイル: bibreformat.py プロジェクト: BessemAamira/invenio

    def related_records(recids, recids_processed):
        if fmt == "HDREF" and recids:
            # HDREF represents the references tab
            # the tab needs to be recomputed not only when the record changes
            # but also when one of the citations changes
            sql = """SELECT id, modification_date FROM bibrec
                     WHERE id in (%s)""" % ','.join(str(r) for r in recids)

            def check_date(mod_date):
                return mod_date.strftime("%Y-%m-%d %H:%M:%S") < latest_bibrank_run
            rel_recids = intbitset([recid for recid, mod_date in run_sql(sql)
                                                    if check_date(mod_date)])
            for r in rel_recids:
                recids |= intbitset(get_cited_by(r))

        # To not process recids twice
        recids -= recids_processed
        # Adds to the set of processed recids
        recids_processed += recids

        return recids

コード例 #16

0

ファイルを表示

ファイル: bibreformat.py プロジェクト: labordoc/labordoc-next

def bibreformat_task(fmt, sql, sql_queries, cds_query, process_format, process,
                     recids):
    """
    BibReformat main task

    @param fmt: output format to use
    @param sql: dictionary with pre-created sql queries for various cases (for selecting records). Some of these queries will be picked depending on the case
    @param sql_queries: a list of sql queries to be executed to select records to reformat.
    @param cds_query: a search query to be executed to select records to reformat
    @param process_format:
    @param process:
    @param recids: a list of record IDs to reformat
    @return: None
    """
    write_message("Processing format %s" % fmt)

    t1 = os.times()[4]

    start_date = datetime.now()

    ### Query the database
    ###
    task_update_progress('Fetching records to process')
    if process_format:  # '-without' parameter
        write_message("Querying database for records without cache...")
        without_format = without_fmt(sql)

    recIDs = intbitset(recids)

    if cds_query['field']      != "" or  \
       cds_query['collection'] != "" or  \
       cds_query['pattern']    != "":

        write_message("Querying database (CDS query)...")

        if cds_query['collection'] == "":
            # use search_pattern() whenever possible, as it can search
            # even in private collections
            res = search_pattern(p=cds_query['pattern'],
                                 f=cds_query['field'],
                                 m=cds_query['matching'])
        else:
            # use perform_request_search when '-c' argument has been
            # defined, as it is not supported by search_pattern()
            res = intbitset(
                perform_request_search(req=None,
                                       of='id',
                                       c=cds_query['collection'],
                                       p=cds_query['pattern'],
                                       f=cds_query['field']))

        recIDs |= res

    for sql_query in sql_queries:
        write_message("Querying database (%s) ..." % sql_query, verbose=2)
        recIDs |= intbitset(run_sql(sql_query))

    if fmt == "HDREF" and recIDs:
        # HDREF represents the references tab
        # the tab needs to be recomputed not only when the record changes
        # but also when one of the citations changes
        latest_bibrank_run = get_bibrankmethod_lastupdate('citation')
        start_date = latest_bibrank_run
        sql = """SELECT id, modification_date FROM bibrec
                 WHERE id in (%s)""" % ','.join(str(r) for r in recIDs)

        def check_date(mod_date):
            return mod_date < latest_bibrank_run
        recIDs = intbitset([recid for recid, mod_date in run_sql(sql) \
                                                    if check_date(mod_date)])
        for r in recIDs:
            recIDs |= intbitset(get_cited_by(r))

### list of corresponding record IDs was retrieved
### now format the selected records

    if process_format:
        write_message("Records to be processed: %d" % (len(recIDs) \
                                               + len(without_format)))
        write_message("Out of it records without existing cache: %d" %
                      len(without_format))
    else:
        write_message("Records to be processed: %d" % (len(recIDs)))

### Initialize main loop

    total_rec = 0  # Total number of records
    tbibformat = 0  # time taken up by external call
    tbibupload = 0  # time taken up by external call

    ### Iterate over all records prepared in lists I (option)
    if process:
        if CFG_BIBFORMAT_USE_OLD_BIBFORMAT:  # FIXME: remove this
            # when migration from php to
            # python bibformat is done
            (total_rec_1, tbibformat_1,
             tbibupload_1) = iterate_over_old(recIDs, fmt)
        else:
            (total_rec_1, tbibformat_1,
             tbibupload_1) = iterate_over_new(recIDs, fmt)
        total_rec += total_rec_1
        tbibformat += tbibformat_1
        tbibupload += tbibupload_1

### Iterate over all records prepared in list II (no_format)
    if process_format and process:
        if CFG_BIBFORMAT_USE_OLD_BIBFORMAT:  # FIXME: remove this
            # when migration from php to
            # python bibformat is done
            (total_rec_2, tbibformat_2,
             tbibupload_2) = iterate_over_old(without_format, fmt)
        else:
            (total_rec_2, tbibformat_2,
             tbibupload_2) = iterate_over_new(without_format, fmt)
        total_rec += total_rec_2
        tbibformat += tbibformat_2
        tbibupload += tbibupload_2

### Store last run time
    if task_has_option("last"):
        write_message("storing run date to %s" % start_date)
        store_last_updated(fmt, start_date)


### Final statistics

    t2 = os.times()[4]

    elapsed = t2 - t1
    message = "total records processed: %d" % total_rec
    write_message(message)

    message = "total processing time: %2f sec" % elapsed
    write_message(message)

    message = "Time spent on external call (os.system):"
    write_message(message)

    message = " bibformat: %2f sec" % tbibformat
    write_message(message)

    message = " bibupload: %2f sec" % tbibupload
    write_message(message)

コード例 #17

0

ファイルを表示

ファイル: chart_author.py プロジェクト: arbitton/beacom

def find_citesb(author):

   """
   This defition plays the same role as the above defition of a similar name. However,
   it creates a different dictionary, as this definition is used only when the user
   wants to plot citations that occured in the past five years from papers published
   in the past five years only.

   The year dictionary in rather keyed by year of paper published. The values of the 
   keys are then another dictionary that holds years as the keys (of citations)
   with the values as the number of citations.
   Ex: {paper year: {citation year, count}}

   All other return values are the same, with the addition of 'end_year', which
   is an integer denoting the final year the author had a paper cited.

   """

   print "# Author:", author
   papers = get_realauthor_data(author, 'bibrec_id')
  
   year_dict = {}
   # print papers, "Papers"
   # print 'Number of papers:', len(papers)

   lifetime_cites = 0
   end_year = 0

   for paper in papers:
      paper_yearlist = get_fieldvalues(int(paper[1]), '269__C')
      # print paper_yearlist, "Paper year list"
      # print paper[1]
      if len(paper_yearlist) > 0:
         paper_year_match = year_re.search(paper_yearlist[0])
         if paper_year_match:
            paper_year = int(paper_year_match.group())
            # print paper_year
            cites = get_cited_by(int(paper[1]))
            # print cites
            for cite in cites:
               fieldvalues_yearlist = get_fieldvalues(cite, '269__C')
               if len(fieldvalues_yearlist) > 0:
                  cite_year_match = year_re.search(fieldvalues_yearlist[0])
                  if cite_year_match:
                     cite_year = int(cite_year_match.group())
                     if cite_year > end_year:
                        end_year = cite_year
                     # print "Years:", paper_year, cite_year
                     if paper_year not in year_dict:
                        year_dict[paper_year] = {cite_year: 1}
                     elif cite_year not in year_dict[paper_year]:
                        year_dict[paper_year][cite_year] = 1
                     else:
                        year_dict[paper_year][cite_year] += 1

   if len(year_dict) > 0:

      start_year = min(year_dict.keys())
      for i in year_dict:
         for j in year_dict[i]:
            lifetime_cites += year_dict[i][j]
   else:
      print "# Author has no citations"

   # print year_dict

   return year_dict, start_year, end_year, float(lifetime_cites)

コード例 #18

0

ファイルを表示

def _find_citations(bib):
    return get_cited_by(bib[2])

コード例 #19

0

ファイルを表示

ファイル: bibauthorid_comparison.py プロジェクト: aw-bib/tind-invenio

def _find_citations(bib):
    return get_cited_by(bib[2])

コード例 #20

0

ファイルを表示

ファイル: bibreformat.py プロジェクト: benavidez/invenio

def bibreformat_task(fmt, sql, sql_queries, cds_query, process_format, process, recids):
    """
    BibReformat main task

    @param fmt: output format to use
    @param sql: dictionary with pre-created sql queries for various cases (for selecting records). Some of these queries will be picked depending on the case
    @param sql_queries: a list of sql queries to be executed to select records to reformat.
    @param cds_query: a search query to be executed to select records to reformat
    @param process_format:
    @param process:
    @param recids: a list of record IDs to reformat
    @return: None
    """
    t1 = os.times()[4]


### Query the database
###
    task_update_progress('Fetching records to process')
    if process_format: # '-without' parameter
        write_message("Querying database for records without cache...")
        without_format = without_fmt(sql)

    recIDs = recids

    if cds_query['field']      != "" or  \
       cds_query['collection'] != "" or  \
       cds_query['pattern']    != "":

        write_message("Querying database (CDS query)...")

        if cds_query['collection'] == "":
            # use search_pattern() whenever possible, as it can search
            # even in private collections
            res = search_pattern(p=cds_query['pattern'],
                                 f=cds_query['field'],
                                 m=cds_query['matching'])
        else:
            # use perform_request_search when '-c' argument has been
            # defined, as it is not supported by search_pattern()
            res = intbitset(perform_request_search(req=None, of='id',
                                         c=cds_query['collection'],
                                         p=cds_query['pattern'],
                                         f=cds_query['field']))

        recIDs |= res

    for sql_query in sql_queries:
        write_message("Querying database (%s) ..." % sql_query, verbose=2)
        recIDs |= intbitset(run_sql(sql_query))

    if fmt == "HDREF":
        # HDREF represents the references tab
        # the tab needs to be recomputed not only when the record changes
        # but also when one of the citations changes
        latest_bibrank_run = get_bibrankmethod_lastupdate('citation')
        sql = """SELECT id, modification_date FROM bibrec
                 WHERE id in (%s)""" % ','.join(str(r) for r in recIDs)

        def check_date(mod_date):
            return mod_date < latest_bibrank_run
        recIDs = intbitset([recid for recid, mod_date in run_sql(sql) \
                                                    if check_date(mod_date)])
        for r in recIDs:
            recIDs |= intbitset(get_cited_by(r))

### list of corresponding record IDs was retrieved
### now format the selected records

    if process_format:
        write_message("Records to be processed: %d" % (len(recIDs) \
                                               + len(without_format)))
        write_message("Out of it records without existing cache: %d" % len(without_format))
    else:
        write_message("Records to be processed: %d" % (len(recIDs)))

### Initialize main loop

    total_rec   = 0     # Total number of records
    tbibformat  = 0     # time taken up by external call
    tbibupload  = 0     # time taken up by external call


### Iterate over all records prepared in lists I (option)
    if process:
        if CFG_BIBFORMAT_USE_OLD_BIBFORMAT: # FIXME: remove this
                                            # when migration from php to
                                            # python bibformat is done
            (total_rec_1, tbibformat_1, tbibupload_1) = iterate_over_old(recIDs,
                                                                         fmt)
        else:
            (total_rec_1, tbibformat_1, tbibupload_1) = iterate_over_new(recIDs,
                                                                         fmt)
        total_rec += total_rec_1
        tbibformat += tbibformat_1
        tbibupload += tbibupload_1

### Iterate over all records prepared in list II (no_format)
    if process_format and process:
        if CFG_BIBFORMAT_USE_OLD_BIBFORMAT: # FIXME: remove this
                                            # when migration from php to
                                            # python bibformat is done
            (total_rec_2, tbibformat_2, tbibupload_2) = iterate_over_old(without_format,
                                                                         fmt)
        else:
            (total_rec_2, tbibformat_2, tbibupload_2) = iterate_over_new(without_format,
                                                                         fmt)
        total_rec += total_rec_2
        tbibformat += tbibformat_2
        tbibupload += tbibupload_2

### Final statistics

    t2 = os.times()[4]

    elapsed = t2 - t1
    message = "total records processed: %d" % total_rec
    write_message(message)

    message = "total processing time: %2f sec" % elapsed
    write_message(message)

    message = "Time spent on external call (os.system):"
    write_message(message)

    message = " bibformat: %2f sec" % tbibformat
    write_message(message)

    message = " bibupload: %2f sec" % tbibupload
    write_message(message)