コード例 #1
0
ファイル: dbquery_unit_tests.py プロジェクト: adsabs/invenio
 def _check_table_update_time(self, tablename):
     """Helper function to check update time of TABLENAME."""
     # detect MySQL version number:
     res = dbquery.run_sql("SELECT VERSION()")
     mysql_server_version = res[0][0]
     if mysql_server_version.startswith("5."):
         # MySQL-5 provides INFORMATION_SCHEMA:
         query = """SELECT UPDATE_TIME FROM INFORMATION_SCHEMA.TABLES
                     WHERE table_name='%s' AND table_schema='%s'""" % (
             tablename,
             dbquery.CFG_DATABASE_NAME,
         )
         tablename_update_time = str(dbquery.run_sql(query)[0][0])
     elif mysql_server_version.startswith("4.1"):
         # MySQL-4.1 has it on 12th position:
         query = """SHOW TABLE STATUS LIKE '%s'""" % tablename
         tablename_update_time = str(dbquery.run_sql(query)[0][12])
     elif mysql_server_version.startswith("4.0"):
         # MySQL-4.0 has it on 11th position:
         query = """SHOW TABLE STATUS LIKE '%s'""" % tablename
         tablename_update_time = str(dbquery.run_sql(query)[0][11])
     else:
         tablename_update_time = "MYSQL SERVER VERSION NOT DETECTED"
     # compare it with the one detected by the function:
     self.assertEqual(tablename_update_time, dbquery.get_table_update_time(tablename))
コード例 #2
0
ファイル: chk_add_inspireid.py プロジェクト: Dziolas/scoap3-1
def check_records(records):
    """
    Add INSPIRE ID if missing
    """
    _init_db()
    for record in records:
        if 'INSPIRE' in record_get_field_values(record, '035', code='9'):
            ## Has already the link. Good! Let's go on.
            continue
        doi = record_get_field_value(record, '024', ind1='7', code='a')
        arxiv = record_get_field_value(record, '037', code='a')
        query = 'doi:"%s"' % doi
        if arxiv:
            query += ' or %s' % arxiv
        inspireid = run_sql("SELECT inspireid FROM doi2inspireid WHERE doi=%s", (doi,))
        if inspireid:
            inspireid = inspireid[0][0]
        else:
            sleep(2)
            inspireid = [int(elem.strip()) for elem in urlopen(create_url("http://inspirehep.net/search", {'cc': 'HEP', 'of': 'id', 'p': query})).read().strip()[1:-1].split(',') if elem.strip()]
            if len(inspireid) == 1:
                inspireid = inspireid[0]
                try:
                    run_sql("INSERT INTO doi2inspireid(doi, inspireid, creation_date) VALUES(%s, %s, NOW())", (doi, inspireid))
                except IntegrityError, err:
                    other_doi = run_sql("SELECT doi FROM doi2inspireid WHERE inspireid=%s", (inspireid, ))[0][0]
                    record.warn("This record with doi %s is connected with INSPIRE id %s which is already connected to doi %s" % (doi, inspireid, other_doi))
                    continue
            else:
                record.warn("More than one inspire ID matches this record: %s" % inspireid)
                continue
コード例 #3
0
def send_message(uids_to, msgid, status=CFG_WEBMESSAGE_STATUS_CODE['NEW']):
    """
    Send message to uids
    @param uids: sequence of user ids
    @param msg_id: id of message
    @param status: status of the message. (single char, see webmessage_config.py).
    @return: a list of users having their mailbox full
    """
    if not((type(uids_to) is list) or (type(uids_to) is tuple)):
        uids_to = [uids_to]
    user_problem = []
    if len(uids_to) > 0:
        users_quotas = check_quota(CFG_WEBMESSAGE_MAX_NB_OF_MESSAGES - 1)
        query = """INSERT INTO user_msgMESSAGE (id_user_to, id_msgMESSAGE,
                    status) VALUES """
        fixed_value = ",%s,%s)"
        query_params = []
        def not_users_quotas_has_key(key):
            """ not(is key in users over  quota?)"""
            return not(users_quotas.has_key(key))
        user_ids_to = filter(not_users_quotas_has_key, uids_to)
        user_problem = filter(users_quotas.has_key, uids_to)
        if len(user_ids_to) > 0:
            for uid_to in user_ids_to[0:-1]:
                query += "(%%s%s," % fixed_value
                query_params += [uid_to, msgid, status]
            query += "(%%s%s" % fixed_value
            query_params += [user_ids_to[-1], msgid, status]
            run_sql(query, tuple(query_params))
    return user_problem
コード例 #4
0
ファイル: webstat.py プロジェクト: pombredanne/invenio
def basket_display():
    """
    Display basket statistics.
    """
    tbl_name = get_customevent_table("baskets")
    if not tbl_name:
        # custom event baskets not defined, so return empty output:
        return []
    try:
        res = run_sql("SELECT creation_time FROM %s ORDER BY creation_time" % tbl_name)
        days = (res[-1][0] - res[0][0]).days + 1
        public = run_sql("SELECT COUNT(*) FROM %s WHERE action = 'display_public'" % tbl_name)[0][0]
        users = run_sql("SELECT COUNT(DISTINCT user) FROM %s" % tbl_name)[0][0]
        adds = run_sql("SELECT COUNT(*) FROM %s WHERE action = 'add'" % tbl_name)[0][0]
        displays = run_sql("SELECT COUNT(*) FROM %s WHERE action = 'display' OR action = 'display_public'" % tbl_name)[0][0]
        hits = adds + displays
        average = hits / days

        res = [("Basket page hits", hits)]
        res.append(("   Average per day", average))
        res.append(("   Unique users", users))
        res.append(("   Additions", adds))
        res.append(("   Public", public))
    except IndexError:
        res = []

    return res
コード例 #5
0
def update_user_inbox_for_reminders(uid):
    """
    Updates user's inbox with any reminders that should have arrived
    @param uid: user id
    @return: integer number of new expired reminders
    """
    now =  convert_datestruct_to_datetext(localtime())
    reminder_status = CFG_WEBMESSAGE_STATUS_CODE['REMINDER']
    new_status = CFG_WEBMESSAGE_STATUS_CODE['NEW']
    query1 = """SELECT m.id
                FROM   msgMESSAGE m,
                       user_msgMESSAGE um
                WHERE  um.id_user_to=%s AND
                       um.id_msgMESSAGE=m.id AND
                       m.received_date<=%s AND
                       um.status like binary %s
                """
    params1 = (uid, now, reminder_status)
    res_ids = run_sql(query1, params1)
    out = len(res_ids)
    if (out>0):
        query2 = """UPDATE user_msgMESSAGE
                    SET    status=%s
                    WHERE  id_user_to=%s AND ("""
        query_params = [new_status, uid]
        for msg_id in res_ids[0:-1]:
            query2 += "id_msgMESSAGE=%s OR "
            query_params.append(msg_id[0])
        query2 += "id_msgMESSAGE=%s)"
        query_params.append(res_ids[-1][0])
        run_sql(query2, tuple(query_params))
    return out
コード例 #6
0
def filter_out_based_on_date_range(recids, fromdate="", untildate="", set_spec=None):
    """ Filter out recids based on date range."""
    if fromdate:
        fromdate = normalize_date(fromdate, "T00:00:00Z")
    else:
        fromdate = get_earliest_datestamp()
    fromdate = utc_to_localtime(fromdate)

    if untildate:
        untildate = normalize_date(untildate, "T23:59:59Z")
    else:
        untildate = get_latest_datestamp()
    untildate = utc_to_localtime(untildate)

    if set_spec is not None: ## either it has a value or it empty, thus meaning all records
        last_updated = get_set_last_update(set_spec)
        if last_updated is not None:
            last_updated = utc_to_localtime(last_updated)
            if last_updated > fromdate:
                fromdate = utc_to_localtime(get_earliest_datestamp())

    recids = intbitset(recids) ## Let's clone :-)

    if fromdate and untildate:
        recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date BETWEEN %s AND %s", (fromdate, untildate)))
    elif fromdate:
        recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date >= %s", (fromdate, )))
    elif untildate:
        recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date <= %s", (untildate, )))
    return recids - get_all_restricted_recids()
コード例 #7
0
def check_if_need_to_delete_message_permanently(msg_ids):
    """
    Checks if a list of messages exist in anyone's inbox, if not,
    delete them permanently
    @param msg_id: sequence of message ids
    @return: number of deleted messages
    """
    if not((type(msg_ids) is list) or (type(msg_ids) is tuple)):
        msg_ids = [msg_ids]
    query1 = """SELECT count(id_msgMESSAGE)
                FROM user_msgMESSAGE
                WHERE id_msgMESSAGE=%s"""
    messages_to_delete = []
    for msg_id in msg_ids:
        nb_users = int(run_sql(query1, (msg_id,))[0][0])
        if nb_users == 0:
            messages_to_delete.append(int(msg_id))

    if len(messages_to_delete) > 0:
        query2 = """DELETE FROM msgMESSAGE
                    WHERE"""
        params2 = []
        for msg_id in messages_to_delete[0:-1]:
            query2 += " id=%s OR"
            params2.append(msg_id)
        query2 += " id=%s"
        params2.append(messages_to_delete[-1])

        run_sql(query2, tuple(params2))
    return len(messages_to_delete)
コード例 #8
0
ファイル: webaccount.py プロジェクト: aw-bib/tind-invenio
def external_user_warning(uid):
    """
    Returns 'email_auto_generated' if the email of the user is auto-generated.

    @param uid: user id
    @type uid: int

    @rtype: ''|'email_auto_generated'
    """
    from invenio.access_control_config import CFG_TEMP_EMAIL_ADDRESS

    query = """
        SELECT      email
        FROM        user
        WHERE       id=%s
    """

    params = (uid, )
    email = run_sql(query, params)[0][0]

    regexp = re.compile(CFG_TEMP_EMAIL_ADDRESS % "\w+", re.IGNORECASE)

    query = """
        SELECT      *
        FROM        userEXT
        WHERE       id_user=%s
    """

    if run_sql(query, params) and re.match(regexp, email):
        return 'email_auto_generated'

    return ''
コード例 #9
0
def store_citation_warning(warning_type, cit_info):
    r = run_sql("""SELECT 1 FROM rnkCITATIONDATAERR
                   WHERE type = %s
                   AND citinfo = %s""", (warning_type, cit_info))
    if not r:
        run_sql("""INSERT INTO rnkCITATIONDATAERR (type, citinfo)
                   VALUES (%s, %s)""", (warning_type, cit_info))
コード例 #10
0
def is_method_valid(colID, rank_method_code):
    """
    Check if RANK_METHOD_CODE method is valid for the collection given.
    If colID is None, then check for existence regardless of collection.
    """

    if colID is None:
        return run_sql("SELECT COUNT(*) FROM rnkMETHOD WHERE name=%s", (rank_method_code,))[0][0]

    enabled_colls = dict(
        run_sql(
            "SELECT id_collection, score from collection_rnkMETHOD,rnkMETHOD WHERE id_rnkMETHOD=rnkMETHOD.id AND name=%s",
            (rank_method_code,),
        )
    )

    try:
        colID = int(colID)
    except TypeError:
        return 0

    if colID in enabled_colls:
        return 1
    else:
        while colID:
            colID = run_sql("SELECT id_dad FROM collection_collection WHERE id_son=%s", (colID,))
            if colID and colID[0][0] in enabled_colls:
                return 1
            elif colID:
                colID = colID[0][0]
    return 0
コード例 #11
0
def iterate_over_new(list, fmt):
    "Iterate over list of IDs"
    global total_rec

    formatted_records = ''      # (string-)List of formatted record of an iteration
    tbibformat  = 0     # time taken up by external call
    tbibupload  = 0     # time taken up by external call
    start_date = task_get_task_param('task_starting_time') # Time at which the record was formatted

    tot = len(list)
    count = 0
    for recID in list:
        t1 = os.times()[4]
        start_date = time.strftime('%Y-%m-%d %H:%M:%S')
        formatted_record = zlib.compress(format_record(recID, fmt, on_the_fly=True))
        if run_sql('SELECT id FROM bibfmt WHERE id_bibrec=%s AND format=%s', (recID, fmt)):
            run_sql('UPDATE bibfmt SET last_updated=%s, value=%s WHERE id_bibrec=%s AND format=%s', (start_date, formatted_record, recID, fmt))
        else:
            run_sql('INSERT INTO bibfmt(id_bibrec, format, last_updated, value) VALUES(%s, %s, %s, %s)', (recID, fmt, start_date, formatted_record))
        t2 = os.times()[4]
        tbibformat += (t2 - t1)
        count += 1
        if (count % 100) == 0:
            write_message("   ... formatted %s records out of %s" % (count, tot))
            task_update_progress('Formatted %s out of %s' % (count, tot))
            task_sleep_now_if_required(can_stop_too=True)
    if (tot % 100) != 0:
        write_message("   ... formatted %s records out of %s" % (count, tot))
    return (tot, tbibformat, tbibupload)
コード例 #12
0
def get_visible_group_list(uid, pattern=""):
    """List the group the user can join (not already member
    of the group regardless user's status).
     @return: groups {id : name} whose name matches pattern
    """
    grpID = []
    groups = {}
    #list the group the user is member of"""
    query = """SELECT distinct(id_usergroup)
               FROM user_usergroup
               WHERE id_user=%s """
    uid = int(uid)
    res = run_sql(query, (uid,))
    map(lambda x: grpID.append(int(x[0])), res)
    query2 = """SELECT id,name
                FROM usergroup
                WHERE (join_policy='%s' OR join_policy='%s')""" % (
                        CFG_WEBSESSION_GROUP_JOIN_POLICY['VISIBLEOPEN'],
                        CFG_WEBSESSION_GROUP_JOIN_POLICY['VISIBLEMAIL'])

    if len(grpID) == 1 :
        query2 += """ AND id!=%i""" % grpID[0]
    elif len(grpID) > 1:
        query2 += """ AND id NOT IN %s""" % str(tuple(grpID))

    if pattern:
        try:
            res2 = run_sql(query2 + """ AND name RLIKE %s ORDER BY name""", (pattern,))
        except OperationalError:
            res2 = ()
    else:
        res2 = run_sql(query2 + """ ORDER BY name""")

    map(lambda x: groups.setdefault(x[0], x[1]), res2)
    return groups
コード例 #13
0
def insert_new_group(uid,
                     new_group_name,
                     new_group_description,
                     join_policy,
                     login_method='INTERNAL'):
    """Create a new group and affiliate a user."""
    query1 = """INSERT INTO usergroup (id, name, description, join_policy,
                   login_method)
                VALUES (NULL,%s,%s,%s,%s)
                """
    params1 = (new_group_name,
               new_group_description,
               join_policy,
               login_method)
    res1 = run_sql(query1, params1)

    date = convert_datestruct_to_datetext(localtime())
    uid = int(uid)
    query2 = """INSERT INTO user_usergroup (id_user, id_usergroup, user_status,
                   user_status_date)
                VALUES (%s,%s,'A',%s)
                """
    params2 = (uid, res1, date)
    res2 = run_sql(query2, params2)
    return res1
コード例 #14
0
 def save(self):
     """
     Save the session to the database.
     """
     if not self._invalid:
         session_dict = {"_data" : self.copy(),
                 "_created" : self._created,
                 "_accessed": self._accessed,
                 "_timeout" : self._timeout,
                 "_http_ip" : self._http_ip,
                 "_https_ip" : self._https_ip,
                 "_remember_me" : self._remember_me
         }
         session_key = self._sid
         session_object = cPickle.dumps(session_dict, -1)
         session_expiry = time.time() + self._timeout + \
             CFG_WEBSESSION_ONE_DAY
         uid = self.get('uid', -1)
         run_sql("""
             INSERT session(
                 session_key,
                 session_expiry,
                 session_object,
                 uid
             ) VALUE(%s,
                 %s,
                 %s,
                 %s
             ) ON DUPLICATE KEY UPDATE
                 session_expiry=%s,
                 session_object=%s,
                 uid=%s
         """, (session_key, session_expiry, session_object, uid,
             session_expiry, session_object, uid))
コード例 #15
0
ファイル: bst_doi_timestamp.py プロジェクト: GiorgosPa/scoap3
def prepate_doi_table():
    run_sql("""CREATE TABLE IF NOT EXISTS doi (
        doi varchar(255) NOT NULL,
        creation_date datetime NOT NULL,
        PRIMARY KEY doi(doi),
        KEY (creation_date)
    ) ENGINE=MyISAM;""")
コード例 #16
0
def remove_kb_mapping(kb_name, key):
    """Removes mapping with given key from given kb"""
    k_id = get_kb_id(kb_name)
    run_sql("""DELETE FROM knwKBRVAL
                WHERE m_key = %s AND id_knwKB = %s""",
            (key, k_id))
    return True
コード例 #17
0
def get_external_links_from_db(ref, dict_of_ids, reference_indicator):
    """returns a dictionary containing the number of
    external links for each recid
    external link=citation that is not in our database """
    ext_links = {}
    dict_all_ref = {}
    for recid in dict_of_ids:
        dict_all_ref[recid] = 0
        ext_links[dict_of_ids[recid]] = 0
    reference_db_id = reference_indicator[0:2]
    reference_tag_regex = reference_indicator + "[a-z]"
    tag_list = run_sql("select id from bib" + reference_db_id + \
                         "x where tag RLIKE %s", (reference_tag_regex, ))
    tag_set = set()
    for tag in tag_list:
        tag_set.add(tag[0])
    ref_list = run_sql("select id_bibrec, id_bibxxx, field_number from \
                       bibrec_bib" + reference_db_id + "x group by \
                       id_bibrec, field_number")
    for item in ref_list:
        recid = int(item[0])
        id_bib = int(item[1])
        if recid in dict_of_ids and id_bib in tag_set:
            dict_all_ref[recid] += 1
    for recid in dict_of_ids:
        total_links = dict_all_ref[recid]
        internal_links = ref[dict_of_ids[recid]]
        ext_links[dict_of_ids[recid]] = total_links - internal_links
        if ext_links[dict_of_ids[recid]] < 0:
            ext_links[dict_of_ids[recid]] = 0
    write_message("External link information extracted", verbose=2)
    write_message("External links: %s" % str(ext_links), verbose=9)
    return ext_links
コード例 #18
0
def update_submission_status(id_record, status, remote_id=''):
    '''
        update the submission field with the new status of the submission
        @param (id_record) : id of the row to update
        @param (status) : new value to set in the status field
        @return : true if update done, else, false
    '''

    current_date = time.strftime("%Y-%m-%d %H:%M:%S")

    if status == CFG_SUBMISSION_STATUS_PUBLISHED and remote_id != '' :
        qstr = '''UPDATE swrCLIENTDATA SET status=%s, id_remote=%s, ''' \
                 '''publication_date=%s, last_update=%s WHERE id=%s '''
        qres = run_sql(qstr, (status, remote_id, current_date, current_date,
                            id_record, ))


    if status == CFG_SUBMISSION_STATUS_REMOVED :
        qstr = '''UPDATE swrCLIENTDATA SET status=%s, removal_date=%s, ''' \
                 '''last_update=%s WHERE id=%s '''
        qres = run_sql(qstr, (status, current_date, current_date, id_record, ))

    else :
        qstr = '''UPDATE swrCLIENTDATA SET status=%s, last_update=%s ''' \
                 '''WHERE id=%s '''
        qres = run_sql(qstr, (status, current_date, id_record, ))

    return qres
コード例 #19
0
def update_kb(kb_name, new_name, new_description):
    """Updates given kb with new name and new description"""
    k_id = get_kb_id(kb_name)
    run_sql("""UPDATE knwKB
                  SET name = %s , description = %s
                WHERE id = %s""", (new_name, new_description, k_id))
    return True
コード例 #20
0
def add_oai_set(oai_set_name, oai_set_spec, oai_set_collection,
                oai_set_description, oai_set_p1, oai_set_f1,oai_set_m1,
                oai_set_p2, oai_set_f2,oai_set_m2, oai_set_p3,
                oai_set_f3, oai_set_m3, oai_set_op1, oai_set_op2):
    """Add a definition into the OAI Repository"""
    try:
        if not oai_set_spec:
            oai_set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
        set_definition = 'c=' + oai_set_collection + ';' + \
                         'p1=' + oai_set_p1  + ';' + \
                         'f1=' + oai_set_f1  + ';' + \
                         'm1=' + oai_set_m1  + ';' + \
                         'op1='+ oai_set_op1 + ';' + \
                         'p2=' + oai_set_p2  + ';' + \
                         'f2=' + oai_set_f2  + ';' + \
                         'm2=' + oai_set_m2  + ';' + \
                         'op2='+ oai_set_op2 + ';' + \
                         'p3=' + oai_set_p3  + ';' + \
                         'f3=' + oai_set_f3  + ';' + \
                         'm3=' + oai_set_m3  + ';'

        run_sql("""INSERT INTO oaiREPOSITORY (id, setName, setSpec,
                           setCollection, setDescription, setDefinition,
                           setRecList, p1, f1, m1, p2, f2, m2, p3, f3, m3)
                         VALUES (0, %s, %s, %s, %s, %s, NULL, %s, %s, %s,
                           %s, %s, %s, %s, %s, %s)""",
                      (oai_set_name, oai_set_spec, oai_set_collection,
                       oai_set_description, set_definition, oai_set_p1,
                       oai_set_f1, oai_set_m1, oai_set_p2, oai_set_f2,
                       oai_set_m2, oai_set_p3, oai_set_f3, oai_set_m3))
        return (1, "")
    except StandardError, e:
        register_exception(alert_admin=True)
        return (0, e)
コード例 #21
0
def get_all_remote_server(id_server):
    '''
        This function select the name of all remote service implementing the
        SWORD protocol. It returns a list of dictionnary containing three fields:
        id, name and host
        @return (remote_server) : list of dictionnary (id - name - host) of each
                                          remote server
    '''

    remote_servers = []

    if id_server == '':
        qstr = '''SELECT id, name, host FROM swrREMOTESERVER'''
        qres = run_sql(qstr)
    else :
        qstr = ''' SELECT id, name, host FROM swrREMOTESERVER WHERE id=%s'''
        qres = run_sql(qstr, (id_server, ))


    for res in qres:
        remote_server = {}
        remote_server['id'] = res[0]
        remote_server['name'] = res[1]
        remote_server['host'] = res[2]
        remote_servers.append(remote_server)

    return remote_servers
コード例 #22
0
ファイル: bibeditcli.py プロジェクト: aw-bib/tind-invenio
def cli_clean_revisions(recid, dry_run=True, verbose=True):
    """Clean revisions of the given recid, by removing duplicate revisions
    that do not change the content of the record."""
    if recid == '*':
        recids = intbitset(run_sql("SELECT DISTINCT id_bibrec FROM hstRECORD"))
    else:
        try:
            recids = [int(recid)]
        except ValueError:
            print 'ERROR: record ID must be integer, not %s.' % recid
            sys.exit(1)
    for recid in recids:
        all_revisions = run_sql("SELECT marcxml, job_id, job_name, job_person, job_date FROM hstRECORD WHERE id_bibrec=%s ORDER BY job_date ASC", (recid,))
        previous_rec = {}
        deleted_revisions = 0
        for marcxml, job_id, job_name, job_person, job_date in all_revisions:
            try:
                current_rec = create_record(zlib.decompress(marcxml))[0]
            except Exception:
                print >> sys.stderr, "ERROR: corrupted revisions found. Please run %s --fix-revisions '*'" % sys.argv[0]
                sys.exit(1)
            if records_identical(current_rec, previous_rec):
                deleted_revisions += 1
                if not dry_run:
                    run_sql("DELETE FROM hstRECORD WHERE id_bibrec=%s AND job_id=%s AND job_name=%s AND job_person=%s AND job_date=%s", (recid, job_id, job_name, job_person, job_date))
            previous_rec = current_rec
        if verbose and deleted_revisions:
            print "record %s: deleted %s duplicate revisions out of %s" % (recid, deleted_revisions, len(all_revisions))
    if verbose:
        print "DONE"
コード例 #23
0
def acc_firerole_extract_emails(firerole_def_obj):
    """
    Best effort function to extract all the possible email addresses
    authorized by the given firerole.
    """
    authorized_emails = set()
    try:
        default_allow_p, rules = firerole_def_obj
        for (allow_p, not_p, field, expressions_list) in rules: # for every rule
            if not_p:
                continue
            if field == 'group':
                for reg_p, expr in expressions_list:
                    if reg_p:
                        continue
                    if CFG_CERN_SITE and expr.endswith(' [CERN]'):
                        authorized_emails.add(expr[:-len(' [CERN]')].lower().strip() + '@cern.ch')
                    emails = run_sql("SELECT user.email FROM usergroup JOIN user_usergroup ON usergroup.id=user_usergroup.id_usergroup JOIN user ON user.id=user_usergroup.id_user WHERE usergroup.name=%s", (expr, ))
                    for email in emails:
                        authorized_emails.add(email[0].lower().strip())
            elif field == 'email':
                for reg_p, expr in expressions_list:
                    if reg_p:
                        continue
                    authorized_emails.add(expr.lower().strip())
            elif field == 'uid':
                for reg_p, expr in expressions_list:
                    if reg_p:
                        continue
                    email = run_sql("SELECT email FROM user WHERE id=%s", (expr, ))
                    if email:
                        authorized_emails.add(email[0][0].lower().strip())
        return authorized_emails
    except Exception, msg:
        raise InvenioWebAccessFireroleError, msg
def do_upgrade():
    run_sql("""CREATE TABLE IF NOT EXISTS schSTATUS (
  name varchar(50),
  value mediumblob,
  PRIMARY KEY (name)
) ENGINE=MyISAM
    """)
コード例 #25
0
def repair_role_definitions():
    """ Try to rebuild compiled serialized definitions from their respectives
    sources. This is needed in case Python break back compatibility.
    """
    definitions = run_sql("SELECT id, firerole_def_src FROM accROLE")
    for role_id, firerole_def_src in definitions:
        run_sql("UPDATE accROLE SET firerole_def_ser=%s WHERE id=%s", (serialize(compile_role_definition(firerole_def_src)), role_id))
コード例 #26
0
ファイル: webstat_engine.py プロジェクト: pombredanne/invenio
def get_keyevent_snapshot_sessions():
    """
    A specific implementation of get_current_event().

    @return: The current number of website visitors (guests, logged in)
    @type: (int, int)
    """
    # SQL to retrieve sessions in the Guests
    sql = (
        "SELECT COUNT(session_expiry) FROM session INNER JOIN user ON uid=id "
        + "WHERE email = '' AND "
        + "session_expiry-%d < unix_timestamp() AND " % WEBSTAT_SESSION_LENGTH
        + "unix_timestamp() < session_expiry"
    )
    guests = run_sql(sql)[0][0]

    # SQL to retrieve sessions in the Logged in users
    sql = (
        "SELECT COUNT(session_expiry) FROM session INNER JOIN user ON uid=id "
        + "WHERE email <> '' AND "
        + "session_expiry-%d < unix_timestamp() AND " % WEBSTAT_SESSION_LENGTH
        + "unix_timestamp() < session_expiry"
    )
    logged_ins = run_sql(sql)[0][0]

    # Assemble, according to return type
    return (guests, logged_ins)
コード例 #27
0
def precache_element(name, key):
    '''
    Updates the last_updated flag of a cache to prevent parallel recomputation of the same cache.
    '''
    run_sql("insert into wapCACHE  (object_name,object_key,last_updated) values (%s,%s,now()) "
            "on duplicate key update last_updated=now(),object_status=%s",
            (str(name), str(key), 'Precached'))
コード例 #28
0
ファイル: inveniogc.py プロジェクト: chezjohnny/invenio
def clean_bibxxx():
    """
    Clean unreferenced bibliographic values from bibXXx tables.
    This is useful to prettify browse results, as it removes
    old, no longer used values.

    WARNING: this function must be run only when no bibupload is
    running and/or sleeping.
    """
    write_message("""CLEANING OF UNREFERENCED bibXXx VALUES STARTED""")
    for xx in range(0, 100):
        bibxxx = 'bib%02dx' % xx
        bibrec_bibxxx = 'bibrec_bib%02dx' % xx
        if task_get_option('verbose') >= 9:
            num_unref_values = run_sql("""SELECT COUNT(*) FROM %(bibxxx)s
                     LEFT JOIN %(bibrec_bibxxx)s
                            ON %(bibxxx)s.id=%(bibrec_bibxxx)s.id_bibxxx
                     WHERE %(bibrec_bibxxx)s.id_bibrec IS NULL""" % \
                        {'bibxxx': bibxxx,
                         'bibrec_bibxxx': bibrec_bibxxx, })[0][0]
        run_sql("""DELETE %(bibxxx)s FROM %(bibxxx)s
                     LEFT JOIN %(bibrec_bibxxx)s
                            ON %(bibxxx)s.id=%(bibrec_bibxxx)s.id_bibxxx
                     WHERE %(bibrec_bibxxx)s.id_bibrec IS NULL""" % \
                        {'bibxxx': bibxxx,
                         'bibrec_bibxxx': bibrec_bibxxx, })
        if task_get_option('verbose') >= 9:
            write_message(""" - %d unreferenced %s values cleaned""" % \
                          (num_unref_values, bibxxx))
    write_message("""CLEANING OF UNREFERENCED bibXXx VALUES FINISHED""")
コード例 #29
0
ファイル: webstat.py プロジェクト: pombredanne/invenio
def alert_display():
    """
    Display alert statistics.
    """
    tbl_name = get_customevent_table("alerts")
    if not tbl_name:
        # custom event alerts not defined, so return empty output:
        return []
    try:
        res = run_sql("SELECT creation_time FROM %s ORDER BY creation_time" % tbl_name)
        days = (res[-1][0] - res[0][0]).days + 1
        res = run_sql("SELECT COUNT(DISTINCT user),COUNT(*) FROM %s" % tbl_name)
        users = res[0][0]
        hits = res[0][1]
        displays = run_sql("SELECT COUNT(*) FROM %s WHERE action = 'list'" % tbl_name)[0][0]
        search = run_sql("SELECT COUNT(*) FROM %s WHERE action = 'display'" % tbl_name)[0][0]
        average = hits / days

        res = [("Alerts page hits", hits)]
        res.append(("   Average per day", average))
        res.append(("   Unique users", users))
        res.append(("   Displays", displays))
        res.append(("   Searches history display", search))
    except IndexError:
        res = []

    return res
コード例 #30
0
def save_references(paper_id, data):
    """
    Saves the references of the passed data dictionary using the standard 
    authorlist_config keys of the paper data set with the given id. Should NOT 
    be used alone as long as you are not sure what you are doing. Refer to 
    save() instead. Returns the paper id.
    """
    reference_ids = data[cfg.JSON.REFERENCE_IDS]

    # Insert or update old references
    for index, reference in enumerate(reference_ids):
        data_tuple = (# insert values
                      index,
                      reference,
                      paper_id,
                      
                      # update values
                      reference)
    
        run_sql("""INSERT INTO 
                   aulREFERENCES (item, reference, paper_id)
                   VALUES (%s, %s, %s)
                   ON DUPLICATE KEY UPDATE
                   reference = %s;""", data_tuple)
                   
    # Delete old references that are out of bounds - i.e. have a higher index 
    # than the length of the reference list
    run_sql("""DELETE FROM aulREFERENCES WHERE item >= %s AND paper_id = %s;""", 
            (len(reference_ids), paper_id))
            
    return paper_id
コード例 #31
0
def store_last_updated(format, update_date):
    sql = "UPDATE format SET last_updated = %s " \
           "WHERE code = %s AND (last_updated < %s or last_updated IS NULL)"
    iso_date = update_date.strftime("%Y-%m-%d %H:%M:%S")
    run_sql(sql, (iso_date, format.lower(), iso_date))
コード例 #32
0
ファイル: arxiv_pdf_checker.py プロジェクト: tsgit/invenio
def fetch_records_with_arxiv_fulltext():
    """
    Returns all the record IDs for records that have an arXiv bibdocfile
    attached.
    """
    return intbitset(run_sql("select id_bibrec from bibrec_bibdoc join bibdoc on id_bibdoc=id where (bibrec_bibdoc.type='arXiv' or bibdoc.doctype='arXiv') and bibdoc.status <> 'DELETED'"))
コード例 #33
0
ファイル: arxiv_pdf_checker.py プロジェクト: tsgit/invenio
def fetch_arxiv_pdf_status(recid):
    """Fetch from the database the harvest status of given recid"""
    ret = run_sql("""SELECT status, version FROM bibARXIVPDF
                     WHERE id_bibrec = %s""", [recid])
    return ret and ret[0] or (None, None)
コード例 #34
0
                                            msg)
                                except InvenioBibDocFileError, e:
                                    # Most probably icon already existed.
                                    pass
                            elif mybibdoc is not None:
                                mybibdoc.delete_icon()

    # Update the MARC
    bibdocfile_bin = os.path.join(CFG_BINDIR, 'bibdocfile --yes-i-know')
    run_shell_command(bibdocfile_bin + " --fix-marc --recid=%s",
                      (str(sysno), ))

    # Delete the HB BibFormat cache in the DB, so that the fulltext
    # links do not point to possible dead files
    run_sql(
        "DELETE LOW_PRIORITY from bibfmt WHERE format='HB' AND id_bibrec=%s",
        (sysno, ))

    return ""


def get_pa_tag_content(pa_content):
    """Get content for <PA>XXX</PA>.
    @param pa_content: MatchObject for <PA>(.*)</PA>.
    return: the content of the file possibly filtered by an regular expression
    if pa_content=file[re]:a_file => first line of file a_file matching re
    if pa_content=file*p[re]:a_file => all lines of file a_file, matching re,
    separated by - (dash) char.
    """
    pa_content = pa_content.groupdict()['content']
    sep = '-'
コード例 #35
0
def word_similarity(rank_method_code, lwords, hitset, rank_limit_relevance,
                    verbose, methods):
    """Ranking a records containing specified words and returns a sorted list.
    input:
    rank_method_code - the code of the method, from the name field in rnkMETHOD
    lwords - a list of words from the query
    hitset - a list of hits for the query found by search_engine
    rank_limit_relevance - show only records with a rank value above this
    verbose - verbose value
    output:
    reclist - a list of sorted records: [[23,34], [344,24], [1,01]]
    prefix - what to show before the rank value
    postfix - what to show after the rank value
    voutput - contains extra information, content dependent on verbose value"""
    voutput = ""
    startCreate = time.time()

    if verbose > 0:
        voutput += "<br />Running rank method: %s, using word_frequency function in bibrank_record_sorter<br />" % rank_method_code

    lwords_old = lwords
    lwords = []
    #Check terms, remove non alphanumeric characters. Use both unstemmed and stemmed version of all terms.
    for i in range(0, len(lwords_old)):
        term = string.lower(lwords_old[i])
        if not methods[rank_method_code]["stopwords"] == "True" or methods[
                rank_method_code]["stopwords"] and not is_stopword(term):
            lwords.append((term, methods[rank_method_code]["rnkWORD_table"]))
            terms = string.split(
                string.lower(
                    re.sub(
                        methods[rank_method_code]
                        ["chars_alphanumericseparators"], ' ', term)))
            for term in terms:
                if methods[rank_method_code].has_key("stemmer"):  # stem word
                    term = stem(string.replace(term, ' ', ''),
                                methods[rank_method_code]["stemmer"])
                if lwords_old[
                        i] != term:  #add if stemmed word is different than original word
                    lwords.append(
                        (term, methods[rank_method_code]["rnkWORD_table"]))

    (recdict, rec_termcount, lrecIDs_remove) = ({}, {}, {})
    #For each term, if accepted, get a list of the records using the term
    #calculate then relevance for each term before sorting the list of records
    for (term, table) in lwords:
        term_recs = run_sql(
            """SELECT term, hitlist FROM %s WHERE term=%%s""" %
            methods[rank_method_code]["rnkWORD_table"], (term, ))
        if term_recs:  #if term exists in database, use for ranking
            term_recs = deserialize_via_marshal(term_recs[0][1])
            (recdict, rec_termcount) = calculate_record_relevance(
                (term, int(term_recs["Gi"][1])),
                term_recs,
                hitset,
                recdict,
                rec_termcount,
                verbose,
                quick=None)
            del term_recs

    if len(recdict) == 0 or (len(lwords) == 1 and lwords[0] == ""):
        return (
            None,
            "Records not ranked. The query is not detailed enough, or not enough records found, for ranking to be possible.",
            "", voutput)
    else:  #sort if we got something to sort
        (reclist, hitset) = sort_record_relevance(recdict, rec_termcount,
                                                  hitset, rank_limit_relevance,
                                                  verbose)

    #Add any documents not ranked to the end of the list
    if hitset:
        lrecIDs = list(hitset)  #using 2-3mb
        reclist = zip(lrecIDs, [0] * len(lrecIDs)) + reclist  #using 6mb

    if verbose > 0:
        voutput += "<br />Current number of recIDs: %s<br />" % (
            methods[rank_method_code]["col_size"])
        voutput += "Number of terms: %s<br />" % run_sql(
            "SELECT count(id) FROM %s" %
            methods[rank_method_code]["rnkWORD_table"])[0][0]
        voutput += "Terms: %s<br />" % lwords
        voutput += "Prepare and pre calculate time: %s<br />" % (
            str(time.time() - startCreate))
        voutput += "Total time used: %s<br />" % (str(time.time() -
                                                      startCreate))
        voutput += str(reclist) + "<br />"
        rank_method_stat(rank_method_code, reclist, lwords)
    return (reclist, methods[rank_method_code]["prefix"],
            methods[rank_method_code]["postfix"], voutput)
コード例 #36
0
    voutput = ""

    if verbose > 0:
        voutput += "<br />Running rank method: %s, using find_similar/word_frequency in bibrank_record_sorter<br />" % rank_method_code
    rank_limit_relevance = methods[rank_method_code]["default_min_relevance"]

    try:
        recID = int(recID)
    except Exception, e:
        return (
            None,
            "Warning: Error in record ID, please check that a number is given.",
            "", voutput)

    rec_terms = run_sql(
        """SELECT termlist FROM %sR WHERE id_bibrec=%%s""" %
        methods[rank_method_code]["rnkWORD_table"][:-1], (recID, ))
    if not rec_terms:
        return (None, "Warning: Requested record does not seem to exist.", "",
                voutput)
    rec_terms = deserialize_via_marshal(rec_terms[0][0])

    #Get all documents using terms from the selected documents
    if len(rec_terms) == 0:
        return (
            None,
            "Warning: Record specified has no content indexed for use with this method.",
            "", voutput)
    else:
        terms = "%s" % rec_terms.keys()
        terms_recs = dict(
コード例 #37
0
def bibreformat_task(fmt, sql, sql_queries, cds_query, process_format, process,
                     recids):
    """
    BibReformat main task

    @param fmt: output format to use
    @param sql: dictionary with pre-created sql queries for various cases (for selecting records). Some of these queries will be picked depending on the case
    @param sql_queries: a list of sql queries to be executed to select records to reformat.
    @param cds_query: a search query to be executed to select records to reformat
    @param process_format:
    @param process:
    @param recids: a list of record IDs to reformat
    @return: None
    """
    write_message("Processing format %s" % fmt)

    t1 = os.times()[4]

    start_date = datetime.now()

    ### Query the database
    ###
    task_update_progress('Fetching records to process')
    if process_format:  # '-without' parameter
        write_message("Querying database for records without cache...")
        without_format = without_fmt(sql)

    recIDs = intbitset(recids)

    if cds_query['field']      != "" or  \
       cds_query['collection'] != "" or  \
       cds_query['pattern']    != "":

        write_message("Querying database (CDS query)...")

        if cds_query['collection'] == "":
            # use search_pattern() whenever possible, as it can search
            # even in private collections
            res = search_pattern(p=cds_query['pattern'],
                                 f=cds_query['field'],
                                 m=cds_query['matching'])
        else:
            # use perform_request_search when '-c' argument has been
            # defined, as it is not supported by search_pattern()
            res = intbitset(
                perform_request_search(req=None,
                                       of='id',
                                       c=cds_query['collection'],
                                       p=cds_query['pattern'],
                                       f=cds_query['field']))

        recIDs |= res

    for sql_query in sql_queries:
        write_message("Querying database (%s) ..." % sql_query, verbose=2)
        recIDs |= intbitset(run_sql(sql_query))

    if fmt == "HDREF" and recIDs:
        # HDREF represents the references tab
        # the tab needs to be recomputed not only when the record changes
        # but also when one of the citations changes
        latest_bibrank_run = get_bibrankmethod_lastupdate('citation')
        sql = """SELECT id, modification_date FROM bibrec
                 WHERE id in (%s)""" % ','.join(str(r) for r in recIDs)

        def check_date(mod_date):
            return mod_date < latest_bibrank_run
        recIDs = intbitset([recid for recid, mod_date in run_sql(sql) \
                                                    if check_date(mod_date)])
        for r in recIDs:
            recIDs |= intbitset(get_cited_by(r))

### list of corresponding record IDs was retrieved
### now format the selected records

    if process_format:
        write_message("Records to be processed: %d" % (len(recIDs) \
                                               + len(without_format)))
        write_message("Out of it records without existing cache: %d" %
                      len(without_format))
    else:
        write_message("Records to be processed: %d" % (len(recIDs)))

### Initialize main loop

    total_rec = 0  # Total number of records
    tbibformat = 0  # time taken up by external call
    tbibupload = 0  # time taken up by external call

    ### Iterate over all records prepared in lists I (option)
    if process:
        if CFG_BIBFORMAT_USE_OLD_BIBFORMAT:  # FIXME: remove this
            # when migration from php to
            # python bibformat is done
            (total_rec_1, tbibformat_1,
             tbibupload_1) = iterate_over_old(recIDs, fmt)
        else:
            (total_rec_1, tbibformat_1,
             tbibupload_1) = iterate_over_new(recIDs, fmt)
        total_rec += total_rec_1
        tbibformat += tbibformat_1
        tbibupload += tbibupload_1

### Iterate over all records prepared in list II (no_format)
    if process_format and process:
        if CFG_BIBFORMAT_USE_OLD_BIBFORMAT:  # FIXME: remove this
            # when migration from php to
            # python bibformat is done
            (total_rec_2, tbibformat_2,
             tbibupload_2) = iterate_over_old(without_format, fmt)
        else:
            (total_rec_2, tbibformat_2,
             tbibupload_2) = iterate_over_new(without_format, fmt)
        total_rec += total_rec_2
        tbibformat += tbibformat_2
        tbibupload += tbibupload_2

### Store last run time
    if task_has_option("last"):
        write_message("storing run date to %s" % start_date)
        store_last_updated(fmt, start_date)


### Final statistics

    t2 = os.times()[4]

    elapsed = t2 - t1
    message = "total records processed: %d" % total_rec
    write_message(message)

    message = "total processing time: %2f sec" % elapsed
    write_message(message)

    message = "Time spent on external call (os.system):"
    write_message(message)

    message = " bibformat: %2f sec" % tbibformat
    write_message(message)

    message = " bibupload: %2f sec" % tbibupload
    write_message(message)
コード例 #38
0
def do_upgrade_atlantis():
    field_id = run_sql("""INSERT INTO field SET name='note', code='note'""")
    tag_id = run_sql("""INSERT INTO tag SET name='note', value='500__a'""")
    run_sql("""INSERT INTO field_tag VALUES (%s, %s, 10)""",
            (field_id, tag_id))
コード例 #39
0
def retrieve_bibdoc_bibdoc():
    return run_sql('SELECT id_bibdoc1, id_bibdoc2 from bibdoc_bibdoc')
コード例 #40
0
def guest_user_garbage_collector():
    """Session Garbage Collector

    program flow/tasks:
    1: delete expired sessions
    1b:delete guest users without session
    2: delete queries not attached to any user
    3: delete baskets not attached to any user
    4: delete alerts not attached to any user
    5: delete expired mailcookies
    5b: delete expired not confirmed email address
    6: delete expired roles memberships

    verbose - level of program output.
              0 - nothing
              1 - default
              9 - max, debug"""

    # dictionary used to keep track of number of deleted entries
    delcount = {
        'session': 0,
        'user': 0,
        'user_query': 0,
        'query': 0,
        'bskBASKET': 0,
        'user_bskBASKET': 0,
        'bskREC': 0,
        'bskRECORDCOMMENT': 0,
        'bskEXTREC': 0,
        'bskEXTFMT': 0,
        'user_query_basket': 0,
        'mail_cookie': 0,
        'email_addresses': 0,
        'role_membership': 0
    }

    write_message("CLEANING OF GUEST SESSIONS STARTED")

    # 1 - DELETE EXPIRED SESSIONS
    write_message("- deleting expired sessions")
    timelimit = time.time()
    write_message("  DELETE FROM session WHERE"
                  " session_expiry < %d \n" % (timelimit, ),
                  verbose=9)
    delcount['session'] += run_sql("DELETE FROM session WHERE"
                                   " session_expiry < %s "
                                   "" % (timelimit, ))

    # 1b - DELETE GUEST USERS WITHOUT SESSION
    write_message("- deleting guest users without session")

    # get uids
    write_message(
        """  SELECT u.id\n  FROM user AS u LEFT JOIN session AS s\n  ON u.id = s.uid\n  WHERE s.uid IS NULL AND u.email = ''""",
        verbose=9)

    result = run_sql("""SELECT u.id
    FROM user AS u LEFT JOIN session AS s
    ON u.id = s.uid
    WHERE s.uid IS NULL AND u.email = ''""")
    write_message(result, verbose=9)

    if result:
        # work on slices of result list in case of big result
        for i in range(0, len(result), CFG_MYSQL_ARGUMENTLIST_SIZE):
            # create string of uids
            uidstr = ''
            for (id_user, ) in result[i:i + CFG_MYSQL_ARGUMENTLIST_SIZE]:
                if uidstr: uidstr += ','
                uidstr += "%s" % (id_user, )

            # delete users
            write_message(
                "  DELETE FROM user WHERE"
                " id IN (TRAVERSE LAST RESULT) AND email = '' \n",
                verbose=9)
            delcount['user'] += run_sql("DELETE FROM user WHERE"
                                        " id IN (%s) AND email = ''" %
                                        (uidstr, ))

    # 2 - DELETE QUERIES NOT ATTACHED TO ANY USER

    # first step, delete from user_query
    write_message("- deleting user_queries referencing" " non-existent users")

    # find user_queries referencing non-existent users
    write_message(
        "  SELECT DISTINCT uq.id_user\n"
        "  FROM user_query AS uq LEFT JOIN user AS u\n"
        "  ON uq.id_user = u.id\n  WHERE u.id IS NULL",
        verbose=9)
    result = run_sql("""SELECT DISTINCT uq.id_user
        FROM user_query AS uq LEFT JOIN user AS u
        ON uq.id_user = u.id
        WHERE u.id IS NULL""")
    write_message(result, verbose=9)

    # delete in user_query one by one
    write_message(
        "  DELETE FROM user_query WHERE"
        " id_user = '******' \n",
        verbose=9)
    for (id_user, ) in result:
        delcount['user_query'] += run_sql("""DELETE FROM user_query
            WHERE id_user = %s""" % (id_user, ))

    # delete the actual queries
    write_message("- deleting queries not attached to any user")

    # select queries that must be deleted
    write_message(
        """  SELECT DISTINCT q.id\n  FROM query AS q LEFT JOIN user_query AS uq\n  ON uq.id_query = q.id\n  WHERE uq.id_query IS NULL AND\n  q.type <> 'p' """,
        verbose=9)
    result = run_sql("""SELECT DISTINCT q.id
                        FROM query AS q LEFT JOIN user_query AS uq
                        ON uq.id_query = q.id
                        WHERE uq.id_query IS NULL AND
                              q.type <> 'p'""")
    write_message(result, verbose=9)

    # delete queries one by one
    write_message(
        """  DELETE FROM query WHERE id = 'TRAVERSE LAST RESULT \n""",
        verbose=9)
    for (id_user, ) in result:
        delcount['query'] += run_sql("""DELETE FROM query WHERE id = %s""",
                                     (id_user, ))

    # 3 - DELETE BASKETS NOT OWNED BY ANY USER
    write_message("- deleting baskets not owned by any user")

    # select basket ids
    write_message(
        """ SELECT ub.id_bskBASKET\n  FROM user_bskBASKET AS ub LEFT JOIN user AS u\n  ON u.id = ub.id_user\n  WHERE u.id IS NULL""",
        verbose=9)
    try:
        result = run_sql("""SELECT ub.id_bskBASKET
                              FROM user_bskBASKET AS ub LEFT JOIN user AS u
                                ON u.id = ub.id_user
                             WHERE u.id IS NULL""")
    except:
        result = []
    write_message(result, verbose=9)

    # delete from user_basket and basket one by one
    write_message(
        """  DELETE FROM user_bskBASKET WHERE id_bskBASKET = 'TRAVERSE LAST RESULT' """,
        verbose=9)
    write_message(
        """  DELETE FROM bskBASKET WHERE id = 'TRAVERSE LAST RESULT' """,
        verbose=9)
    write_message(
        """  DELETE FROM bskREC WHERE id_bskBASKET = 'TRAVERSE LAST RESULT'""",
        verbose=9)
    write_message(
        """  DELETE FROM bskRECORDCOMMENT WHERE id_bskBASKET = 'TRAVERSE LAST RESULT' \n""",
        verbose=9)
    for (id_basket, ) in result:
        delcount['user_bskBASKET'] += run_sql(
            """DELETE FROM user_bskBASKET WHERE id_bskBASKET = %s""",
            (id_basket, ))
        delcount['bskBASKET'] += run_sql(
            """DELETE FROM bskBASKET WHERE id = %s""", (id_basket, ))
        delcount['bskREC'] += run_sql(
            """DELETE FROM bskREC WHERE id_bskBASKET = %s""", (id_basket, ))
        delcount['bskRECORDCOMMENT'] += run_sql(
            """DELETE FROM bskRECORDCOMMENT WHERE id_bskBASKET = %s""",
            (id_basket, ))
    write_message(
        """ SELECT DISTINCT ext.id, rec.id_bibrec_or_bskEXTREC FROM bskEXTREC AS ext \nLEFT JOIN bskREC AS rec ON ext.id=-rec.id_bibrec_or_bskEXTREC WHERE id_bibrec_or_bskEXTREC is NULL""",
        verbose=9)
    try:
        result = run_sql("""SELECT DISTINCT ext.id FROM bskEXTREC AS ext
                            LEFT JOIN bskREC AS rec ON ext.id=-rec.id_bibrec_or_bskEXTREC
                            WHERE id_bibrec_or_bskEXTREC is NULL""")
    except:
        result = []
    write_message(result, verbose=9)
    write_message(
        """  DELETE FROM bskEXTREC WHERE id = 'TRAVERSE LAST RESULT' """,
        verbose=9)
    write_message(
        """  DELETE FROM bskEXTFMT WHERE id_bskEXTREC = 'TRAVERSE LAST RESULT' \n""",
        verbose=9)
    for (id_basket, ) in result:
        delcount['bskEXTREC'] += run_sql(
            """DELETE FROM bskEXTREC WHERE id=%s""", (id_basket, ))
        delcount['bskEXTFMT'] += run_sql(
            """DELETE FROM bskEXTFMT WHERE id_bskEXTREC=%s""", (id_basket, ))

    # 4 - DELETE ALERTS NOT OWNED BY ANY USER
    write_message('- deleting alerts not owned by any user')

    # select user ids in uqb that reference non-existent users
    write_message(
        """SELECT DISTINCT uqb.id_user FROM user_query_basket AS uqb LEFT JOIN user AS u ON uqb.id_user = u.id WHERE u.id IS NULL""",
        verbose=9)
    result = run_sql(
        """SELECT DISTINCT uqb.id_user FROM user_query_basket AS uqb LEFT JOIN user AS u ON uqb.id_user = u.id WHERE u.id IS NULL"""
    )
    write_message(result, verbose=9)

    # delete all these entries
    for (id_user, ) in result:
        write_message(
            """DELETE FROM user_query_basket WHERE id_user = '******'user_query_basket'] += run_sql(
            """DELETE FROM user_query_basket WHERE id_user = %s """,
            (id_user, ))

    # 5 - delete expired mailcookies
    write_message("""mail_cookie_gc()""", verbose=9)
    delcount['mail_cookie'] = mail_cookie_gc()

    ## 5b - delete expired not confirmed email address
    write_message(
        """DELETE FROM user WHERE note='2' AND NOW()>ADDTIME(last_login, '%s 0:0:0')"""
        % CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS,
        verbose=9)
    delcount['email_addresses'] = run_sql(
        """DELETE FROM user WHERE note='2' AND NOW()>ADDTIME(last_login, '%s 0:0:0')"""
        % CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS)

    # 6 - delete expired roles memberships
    write_message("""DELETE FROM user_accROLE WHERE expiration<NOW()""",
                  verbose=9)
    delcount['role_membership'] = run_sql(
        """DELETE FROM user_accROLE WHERE expiration<NOW()""")

    # print STATISTICS
    write_message("""- statistics about deleted data: """)
    write_message("""  %7s sessions.""" % (delcount['session'], ))
    write_message("""  %7s users.""" % (delcount['user'], ))
    write_message("""  %7s user_queries.""" % (delcount['user_query'], ))
    write_message("""  %7s queries.""" % (delcount['query'], ))
    write_message("""  %7s baskets.""" % (delcount['bskBASKET'], ))
    write_message("""  %7s user_baskets.""" % (delcount['user_bskBASKET'], ))
    write_message("""  %7s basket_records.""" % (delcount['bskREC'], ))
    write_message("""  %7s basket_external_records.""" %
                  (delcount['bskEXTREC'], ))
    write_message("""  %7s basket_external_formats.""" %
                  (delcount['bskEXTFMT'], ))
    write_message("""  %7s basket_comments.""" %
                  (delcount['bskRECORDCOMMENT'], ))
    write_message("""  %7s user_query_baskets.""" %
                  (delcount['user_query_basket'], ))
    write_message("""  %7s mail_cookies.""" % (delcount['mail_cookie'], ))
    write_message("""  %7s non confirmed email addresses.""" %
                  delcount['email_addresses'])
    write_message("""  %7s role_memberships.""" %
                  (delcount['role_membership'], ))
    write_message("""CLEANING OF GUEST SESSIONS FINISHED""")
コード例 #41
0
 def tearDown(self):
     # Clean DB entries
     run_sql(""" DELETE FROM seqSTORE
                 WHERE seq_name="texkey"
                 AND seq_value IN ("%s", "%s", "%s") """ %
             (self.texkey1, self.texkey2, self.texkey3))
コード例 #42
0
def get_recid_from_docid(docid):
    return run_sql('SELECT id_bibrec FROM bibrec_bibdoc WHERE id_bibdoc=%s',
                   (docid, ))
コード例 #43
0
ファイル: sequtils_texkey.py プロジェクト: tsgit/invenio
def wait_for_task(task_id):
    sql = 'select status from schTASK where id = %s'
    while run_sql(sql, [task_id])[0][0] not in ('DONE', 'ACK', 'ACK DONE'):
        task_sleep_now_if_required(True)
        time.sleep(5)
コード例 #44
0
    except Exception, err:
        msg = "WARNING: when opening docid %s: %s" % (id_bibdoc1, err)
        print >> logfile, msg
        print msg
        return True
    try:
        msg = "Fixing icon for the document %s" % (id_bibdoc1, )
        print msg,
        print >> logfile, msg,
        the_icon = BibDoc.create_instance(id_bibdoc2)
        for a_file in the_icon.list_latest_files():
            the_bibdoc.add_icon(a_file.get_full_path(),
                                format=a_file.get_format())
        the_icon.delete()
        run_sql(
            "DELETE FROM bibdoc_bibdoc WHERE id_bibdoc1=%s AND id_bibdoc2=%s",
            (id_bibdoc1, id_bibdoc2))
        print "OK"
        print >> logfile, "OK"
        return True
    except Exception, err:
        print "ERROR: %s" % err
        print >> logfile, "ERROR: %s" % err
        register_exception()
        return False


def main():
    """Core loop."""
    check_running_process_user()
    logfilename = '%s/fulltext_files_migration_kit-%s.log' % (
コード例 #45
0
def Send_APP_Mail(parameters, curdir, form, user_info=None):
    """
    This function send an email informing the original submitter of a
    document that the referee has approved/ rejected the document. The
    email is also sent to the referee for checking.

    Parameters:

       * addressesAPP: email addresses of the people who will receive
         this email (comma separated list). this parameter may contain
         the <CATEG> string. In which case the variable computed from
         the [categformatAFP] parameter replaces this string.
         eg.: "<CATEG>[email protected]"

       * categformatAPP contains a regular expression used to compute
         the category of the document given the reference of the
         document.
         eg.: if [categformatAFP]="TEST-<CATEG>-.*" and the reference
         of the document is "TEST-CATEGORY1-2001-001", then the computed
         category equals "CATEGORY1"

       * newrnin: Name of the file containing the 2nd reference of the
                  approved document (if any).

       * edsrn: Name of the file containing the reference of the
                approved document.
    """
    global titlevalue, authorvalue, emailvalue, sysno, rn
    FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,
                                              CFG_SITE_SUPPORT_EMAIL)
    sequence_id = bibtask_allocate_sequenceid(curdir)
    doctype = form['doctype']
    titlevalue = titlevalue.replace("\n", " ")
    authorvalue = authorvalue.replace("\n", "; ")
    # variables declaration
    categformat = parameters['categformatAPP']
    otheraddresses = parameters['addressesAPP']
    newrnpath = parameters['newrnin']
    ## Get the name of the decision file:
    try:
        decision_filename = parameters['decision_file']
    except KeyError:
        decision_filename = ""
    ## Get the name of the comments file:
    try:
        comments_filename = parameters['comments_file']
    except KeyError:
        comments_filename = ""

    ## Now try to read the comments from the comments_filename:
    if comments_filename in (None, "", "NULL"):
        ## We don't have a name for the comments file.
        ## For backward compatibility reasons, try to read the comments from
        ## a file called 'COM' in curdir:
        if os.path.exists("%s/COM" % curdir):
            try:
                fh_comments = open("%s/COM" % curdir, "r")
                comment = fh_comments.read()
                fh_comments.close()
            except IOError:
                ## Unable to open the comments file
                exception_prefix = "Error in WebSubmit function " \
                                   "Send_APP_Mail. Tried to open " \
                                   "comments file [%s/COM] but was " \
                                   "unable to." % curdir
                register_exception(prefix=exception_prefix)
                comment = ""
            else:
                comment = comment.strip()
        else:
            comment = ""
    else:
        ## Try to read the comments from the comments file:
        if os.path.exists("%s/%s" % (curdir, comments_filename)):
            try:
                fh_comments = open("%s/%s" % (curdir, comments_filename), "r")
                comment = fh_comments.read()
                fh_comments.close()
            except IOError:
                ## Oops, unable to open the comments file.
                comment = ""
                exception_prefix = "Error in WebSubmit function " \
                                "Send_APP_Mail. Tried to open comments " \
                                "file [%s/%s] but was unable to." \
                                % (curdir, comments_filename)
                register_exception(prefix=exception_prefix)
            else:
                comment = comment.strip()
        else:
            comment = ""

    ## Now try to read the decision from the decision_filename:
    if decision_filename in (None, "", "NULL"):
        ## We don't have a name for the decision file.
        ## For backward compatibility reasons, try to read the decision from
        ## a file called 'decision' in curdir:
        if os.path.exists("%s/decision" % curdir):
            try:
                fh_decision = open("%s/decision" % curdir, "r")
                decision = fh_decision.read()
                fh_decision.close()
            except IOError:
                ## Unable to open the decision file
                exception_prefix = "Error in WebSubmit function " \
                                   "Send_APP_Mail. Tried to open " \
                                   "decision file [%s/decision] but was " \
                                   "unable to." % curdir
                register_exception(prefix=exception_prefix)
                decision = ""
            else:
                decision = decision.strip()
        else:
            decision = ""
    else:
        ## Try to read the decision from the decision file:
        try:
            fh_decision = open("%s/%s" % (curdir, decision_filename), "r")
            decision = fh_decision.read()
            fh_decision.close()
        except IOError:
            ## Oops, unable to open the decision file.
            decision = ""
            exception_prefix = "Error in WebSubmit function " \
                               "Send_APP_Mail. Tried to open decision " \
                               "file [%s/%s] but was unable to." \
                               % (curdir, decision_filename)
            register_exception(prefix=exception_prefix)
        else:
            decision = decision.strip()

    if os.path.exists("%s/%s" % (curdir, newrnpath)):
        fp = open("%s/%s" % (curdir, newrnpath), "r")
        newrn = fp.read()
        fp.close()
    else:
        newrn = ""
    # Document name
    res = run_sql("SELECT ldocname FROM sbmDOCTYPE WHERE sdocname=%s",
                  (doctype, ))
    docname = res[0][0]
    # retrieve category
    categformat = categformat.replace("<CATEG>", "([^-]*)")
    m_categ_search = re.match(categformat, rn)
    if m_categ_search is not None:
        if len(m_categ_search.groups()) > 0:
            ## Found a match for the category of this document. Get it:
            category = m_categ_search.group(1)
        else:
            ## This document has no category.
            category = "unknown"
    else:
        category = "unknown"
    ## Get the referee email address:
    if CFG_CERN_SITE:
        ## The referees system in CERN now works with listbox membership.
        ## List names should take the format
        ## "*****@*****.**"
        ## Make sure that your list exists!
        ## FIXME - to be replaced by a mailing alias in webaccess in the
        ## future.
        referee_listname = "service-cds-referee-%s" % doctype.lower()
        if category != "":
            referee_listname += "-%s" % category.lower()
        referee_listname += "@cern.ch"
        addresses = referee_listname
    else:
        # Build referee's email address
        refereeaddress = ""
        # Try to retrieve the referee's email from the referee's database
        for user in acc_get_role_users(
                acc_get_role_id("referee_%s_%s" % (doctype, category))):
            refereeaddress += user[1] + ","
        # And if there is a general referee
        for user in acc_get_role_users(
                acc_get_role_id("referee_%s_*" % doctype)):
            refereeaddress += user[1] + ","
        refereeaddress = re.sub(",$", "", refereeaddress)
        # Creation of the mail for the referee
        otheraddresses = otheraddresses.replace("<CATEG>", category)
        addresses = ""
        if refereeaddress != "":
            addresses = refereeaddress + ","
        if otheraddresses != "":
            addresses += otheraddresses
        else:
            addresses = re.sub(",$", "", addresses)
    ## Add the record's submitter(s) into the list of recipients:
    ## Get the email address(es) of the record submitter(s)/owner(s) from
    ## the record itself:
    record_owners = print_record(sysno, 'tm', \
                                 [CFG_WEBSUBMIT_RECORD_OWNER_EMAIL]).strip()
    if record_owners != "":
        record_owners_list = record_owners.split("\n")
        record_owners_list = [email.lower().strip() \
                              for email in record_owners_list]
    else:
        #if the record owner can not be retrieved from the metadata
        #(in case the record has not been inserted yet),
        #try to use the global variable emailvalue
        try:
            record_owners_list = [emailvalue]
        except NameError:
            record_owners_list = []
    record_owners = ",".join([owner for owner in record_owners_list])
    if record_owners != "":
        addresses += ",%s" % record_owners

    if decision == "approve":
        mailtitle = "%s has been approved" % rn
        mailbody = "The %s %s has been approved." % (docname, rn)
        mailbody += "\nIt will soon be accessible here:\n\n<%s/%s/%s>" % (
            CFG_SITE_URL, CFG_SITE_RECORD, sysno)
    else:
        mailtitle = "%s has been rejected" % rn
        mailbody = "The %s %s has been rejected." % (docname, rn)
    if rn != newrn and decision == "approve" and newrn != "":
        mailbody += "\n\nIts new reference number is: %s" % newrn
    mailbody += "\n\nTitle: %s\n\nAuthor(s): %s\n\n" % (titlevalue,
                                                        authorvalue)
    if comment != "":
        mailbody += "Comments from the referee:\n%s\n" % comment
    # Send mail to referee if any recipients or copy to admin
    if addresses or CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN:
        scheduled_send_email(
            FROMADDR,
            addresses,
            mailtitle,
            mailbody,
            copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN,
            other_bibtasklet_arguments=['-I', str(sequence_id)])
    return ""
コード例 #46
0
ファイル: sequtils_texkey.py プロジェクト: tsgit/invenio
def fetch_records_modified_since(last_date):
    """Fetch all the recids of records modified since last_date in the system
    """
    return intbitset(run_sql("SELECT id FROM bibrec WHERE"
                             " modification_date>=%s", (last_date, )))
コード例 #47
0
ファイル: bibauthorid_hoover.py プロジェクト: tsgit/invenio
def hoover(authors=None,
           check_db_consistency=False,
           dry_run=False,
           packet_size=1000,
           dry_hepnames_run=False,
           open_tickets=False,
           queue='Test'):
    """The actions that hoover performs are the following:
    1. Find out the identifiers that belong to the authors(pids) in the database
    2. Find and pull all the signatures that have the same identifier as the author to the author
    3. Connect the profile of the author with the hepnames collection entry
    (optional) check the database to see if it is in a consistent state

    Keyword arguments:
    authors -- an iterable of authors to be hoovered
    check_db_consistency -- perform checks for the consistency of the database
    dry_run -- do not alter the database tables
    packet_size -- squeeze together the marcxml. This there are fewer bibupload
                   processes for the bibsched to run.
    dry_hepnames_run -- do not alter the hepnames collection
    queue -- the name of the queue to be used in the rt system for the tickets
    """
    global rt_ticket_report
    rt_ticket_report = open_tickets
    write_message("Packet size {0}".format(packet_size), verbose=1)
    write_message("Initializing hoover", verbose=1)
    write_message("Selecting records with identifiers...", verbose=1)
    recs = get_records_with_tag('100__i')
    task_sleep_now_if_required(can_stop_too=True)
    recs += get_records_with_tag('100__j')
    task_sleep_now_if_required(can_stop_too=True)
    recs += get_records_with_tag('700__i')
    task_sleep_now_if_required(can_stop_too=True)
    recs += get_records_with_tag('700__j')
    task_sleep_now_if_required(can_stop_too=True)
    write_message("Found {0} records".format(len(set(recs))), verbose=2)
    recs = set(recs) & set(
        run_sql("select DISTINCT(bibrec) from aidPERSONIDPAPERS"))
    write_message("   out of which {0} are in BibAuthorID".format(len(recs)),
                  verbose=2)
    task_sleep_now_if_required(can_stop_too=True)

    records_with_id = set(rec[0] for rec in recs)

    destroy_partial_marc_caches()
    populate_partial_marc_caches(records_with_id, create_inverted_dicts=True)

    if rt_ticket_report:
        global ticket_hashes
        write_message("Ticketing system rt is used", verbose=9)
        write_message("Building hash cache for tickets for queue %s" % queue,
                      verbose=9)
        ticket_ids = BIBCATALOG_SYSTEM.ticket_search(None,
                                                     subject='[Hoover]',
                                                     queue=queue)
        write_message("Found %s existing tickets" % len(ticket_ids), verbose=9)
        for ticket_id in ticket_ids:
            task_sleep_now_if_required(can_stop_too=True)
            try:
                ticket_data = BIBCATALOG_SYSTEM.ticket_get_info(
                    None, ticket_id)
                ticket_hashes[ticket_data['subject'].split()
                              [-1]] = ticket_data, ticket_id, False
            except IndexError:
                write_message(
                    "Problem in subject of ticket {0}".format(ticket_id),
                    verbose=5)
        write_message("Found {0} tickets".format(len(ticket_hashes)),
                      verbose=2)

    task_sleep_now_if_required(can_stop_too=True)
    fdict_id_getters = {
        "INSPIREID": {
            'reliable': [
                get_inspire_id_of_author, get_inspireID_from_hepnames,
                lambda pid: get_inspireID_from_claimed_papers(
                    pid, intersection_set=records_with_id, queue=queue)
            ],
            'unreliable': [
                lambda pid: get_inspireID_from_unclaimed_papers(
                    pid, intersection_set=records_with_id, queue=queue)
            ],
            'signatures_getter':
            get_signatures_with_inspireID,
            'connection':
            dict_entry_for_hepnames_connector,
            'data_dicts': {
                'pid_mapping': defaultdict(set),
                'id_mapping': defaultdict(set)
            }
        },
        "ORCID": {
            'reliable': [  # get_orcid_id_of_author,
                # get_inspireID_from_hepnames,
                # lambda pid: get_inspireID_from_claimed_papers(pid,
                # intersection_set=records_with_id)]
            ],
            'unreliable': [
                # get_inspireID_from_hepnames,
                # lambda pid: get_inspireID_from_claimed_papers(pid,
                # intersection_set=records_with_id)]
            ],
            'signatures_getter': lambda x: list(),
            'connection': lambda pid, _id: None,
            'data_dicts': {
                'pid_mapping': defaultdict(set),
                'id_mapping': defaultdict(set)
            }
        }
    }

    if not authors:
        authors = get_existing_authors()

    write_message("Running on {0}".format(len(authors)), verbose=2)

    unclaimed_authors = defaultdict(set)
    hep_connector = HepnamesConnector(packet_size=packet_size,
                                      dry_hepnames_run=dry_hepnames_run)

    for index, pid in enumerate(authors):
        task_sleep_now_if_required(can_stop_too=True)
        write_message("Searching for reliable ids of person {0}".format(pid),
                      verbose=2)
        for identifier_type, functions in fdict_id_getters.iteritems():
            write_message("    Type: {0}".format(identifier_type, ), verbose=9)

            try:
                G = (func(pid) for func in functions['reliable'])
                if check_db_consistency:
                    results = filter(None, (func for func in G if func))
                    try:
                        # check if this is reduntant
                        if len(results) == 1:
                            consistent_db = True
                        else:
                            consistent_db = len(set(results)) <= 1
                        res = results[0]
                    except IndexError:
                        res = None
                    else:
                        if not consistent_db:
                            res = None
                            raise InconsistentIdentifiersException(
                                'Inconsistent database', pid, identifier_type,
                                set(results))
                else:
                    res = next((func for func in G if func), None)
            except MultipleIdsOnSingleAuthorException as e:
                open_rt_ticket(e, queue=queue)
            except BrokenHepNamesRecordException as e:
                continue
            except InconsistentIdentifiersException as e:
                open_rt_ticket(e, queue=queue)
            except MultipleHepnamesRecordsWithSameIdException as e:
                open_rt_ticket(e, queue=queue)
            else:
                if res:
                    HooverStats.new_ids_found += 1
                    write_message("   Found reliable id {0}".format(res, ),
                                  verbose=9)
                    fdict_id_getters[identifier_type]['data_dicts'][
                        'pid_mapping'][pid].add(res)
                    fdict_id_getters[identifier_type]['data_dicts'][
                        'id_mapping'][res].add(pid)
                else:
                    write_message("   No reliable id found", verbose=9)
                    unclaimed_authors[identifier_type].add(pid)

    write_message("Vacuuming reliable ids...", verbose=2)

    for identifier_type, data in fdict_id_getters.iteritems():
        task_sleep_now_if_required(can_stop_too=True)
        hep_connector.produce_connection_entry = fdict_id_getters[
            identifier_type]['connection']
        for pid, identifiers in data['data_dicts']['pid_mapping'].iteritems():
            write_message(
                "   Person {0} has reliable identifier(s) {1} ".format(
                    str(pid), str(identifiers)),
                verbose=9)
            try:
                if len(identifiers) == 1:
                    identifier = list(identifiers)[0]
                    write_message(
                        "        Considering  {0}".format(identifier),
                        verbose=9)

                    if len(data['data_dicts']['id_mapping'][identifier]) == 1:
                        if not dry_run:
                            rowenta = Vacuumer(pid)
                            signatures = data['signatures_getter'](identifier)
                            write_message(
                                "        Vacuuming {0} signatures! ".format(
                                    str(len(signatures))),
                                verbose=4)
                            for sig in signatures:
                                try:
                                    rowenta.vacuum_signature(sig)
                                except DuplicateClaimedPaperException as e:
                                    open_rt_ticket(e, queue=queue)
                                except DuplicateUnclaimedPaperException as e:
                                    unclaimed_authors[identifier_type].add(
                                        e.pid)
                            write_message(
                                "        Adding inspireid {0} to pid {1}".
                                format(identifier, pid),
                                verbose=3)
                            add_external_id_to_author(pid, identifier_type,
                                                      identifier)
                            hep_connector.add_connection(pid, identifier)

                    else:
                        raise MultipleAuthorsWithSameIdException(
                            "More than one authors with the same identifier",
                            data['data_dicts']['id_mapping'][identifier],
                            identifier)
                else:
                    raise MultipleIdsOnSingleAuthorException(
                        "More than one identifier on a single author ", pid,
                        'INSPIREID', identifiers)

            except MultipleAuthorsWithSameIdException as e:
                open_rt_ticket(e, queue=queue)
            except MultipleIdsOnSingleAuthorException as e:
                open_rt_ticket(e, queue=queue)
            except MultipleHepnamesRecordsWithSameIdException as e:
                open_rt_ticket(e, queue=queue)
            write_message("   Done with {0}".format(pid, ), verbose=3)

    write_message("Vacuuming unreliable ids...", verbose=2)

    for identifier_type, functions in fdict_id_getters.iteritems():
        task_sleep_now_if_required(can_stop_too=True)
        hep_connector.produce_connection_entry = fdict_id_getters[
            identifier_type]['connection']
        for index, pid in enumerate(unclaimed_authors[identifier_type]):
            write_message(
                "Searching for unreliable ids of person {0}".format(pid),
                verbose=9)
            try:
                G = (func(pid) for func in functions['unreliable'])
                res = next((func for func in G if func), None)
                if res is None:
                    continue
            except MultipleIdsOnSingleAuthorException as e:
                continue
            except BrokenHepNamesRecordException as e:
                continue
            except MultipleHepnamesRecordsWithSameIdException as e:
                open_rt_ticket(e, queue=queue)

            HooverStats.new_ids_found += 1
            write_message(
                "   Person {0} has unreliable identifier {1} ".format(
                    str(pid), str(res)),
                verbose=9)

            if res in fdict_id_getters[identifier_type]['data_dicts'][
                    'id_mapping']:
                write_message(
                    "        Id {0} is already assigned to another person, skipping person {1} "
                    .format(str(res), pid))
                continue

            if not dry_run:
                rowenta = Vacuumer(pid)
                signatures = functions['signatures_getter'](res)
                for sig in signatures:
                    try:
                        rowenta.vacuum_signature(sig)
                    except DuplicateClaimedPaperException as e:
                        open_rt_ticket(e, queue=queue)
                    except DuplicateUnclaimedPaperException as e:
                        pass

                write_message("     Adding inspireid {0} to pid {1}".format(
                    res, pid),
                              verbose=3)
                add_external_id_to_author(pid, identifier_type, res)
                hep_connector.add_connection(pid, res)
            write_message("   Done with {0}".format(pid), verbose=3)
    hep_connector.execute_connection()
    for ticket in ticket_hashes:
        if ticket[2] == False:
            BIBCATALOG_SYSTEM.ticket_set_attribute(None, ticket[1], 'status',
                                                   'resolved')

    HooverStats.report_results()
    write_message("Terminating hoover", verbose=1)
コード例 #48
0
        return errorMsg(str(e), req, ln=ln)

    if u_email == "guest" or u_email == "":
        return warningMsg(websubmit_templates.tmpl_warning_message(
            ln=ln,
            msg=_("Sorry, you must log in to perform this action."),
        ),
                          req,
                          ln=ln)

    if deletedId != "":
        t += deleteSubmission(deletedId, deletedAction, deletedDoctype,
                              u_email)

    # doctypes
    res = run_sql("select ldocname,sdocname from sbmDOCTYPE order by ldocname")
    doctypes = []
    for row in res:
        doctypes.append({
            'id': row[1],
            'name': row[0],
            'selected': (doctype == row[1]),
        })

    # submissions
    # request order default value
    reqorder = "sbmSUBMISSIONS.md DESC, lactname"
    # requested value
    if order == "actiondown":
        reqorder = "lactname ASC, sbmSUBMISSIONS.md DESC"
    elif order == "actionup":
コード例 #49
0
def all_records():
    """Produce record IDs for all available records."""
    return intbitset(run_sql("SELECT id FROM bibrec"))
コード例 #50
0
def CONFSUBMIT_Send_Approval_Request(parameters, curdir, form, user_info=None):
    """
    This function sends an email to the referee in order to start the
    simple approval process.  This function is very CERN-specific and
    should be changed in case of external use.  Must be called after
    the Get_Report_Number function.

    Parameters:

       * addressesDAM: email addresses of the people who will receive
                       this email (comma separated list). this
                       parameter may contain the <CATEG> string. In
                       which case the variable computed from the
                       [categformatDAM] parameter replaces this
                       string.
                       eg.:"<CATEG>[email protected]"

       * categformatDAM: contains a regular expression used to compute
                         the category of the document given the
                         reference of the document.

                         eg.: if [categformatAFP]="TEST-<CATEG>-.*"
                         and the reference of the document is
                         "TEST-CATEGORY1-2001-001", then the computed
                         category equals "CATEGORY1"

       * titleFile: name of the file in which the title is stored.

       * submitteremailfile: name of the file in which the title is stored.

       * submitternamefile: name of the file in which the title is stored.

       * contactnamefile: name of the file in which the title is stored.

       * contactemailfile: name of the file in which the title is stored.

       * referencefile: name of the file in which the title is stored.

       * affiliationfile: name of the file in which the title is stored.

       * regionfile: name of the file in which the title is stored.

       * rankfile: name of the file in which the title is stored.

       * fieldfile: name of the file in which the title is stored.

       * experimentsfile: name of the file in which the title is stored.

       * urlfile: name of the file in which the title is stored.

       * datefile: name of the file in which the title is stored.

       * abstractfile: name of the file in which the title is stored.

       * seriesnamefile: name of the file where the series name is stored.

       * seriesnumberfile: name of the file where the series number is stored.

       * directory: parameter used to create the URL to access the
                    files.
    """
    global rn, sysno
    # variables declaration
    doctype = re.search(".*/([^/]*)/([^/]*)/[^/]*$", curdir).group(2)
    otheraddresses = parameters['addressesDAM']
    categformat = parameters['categformatDAM']
    # retrieve category
    categformat = categformat.replace("<CATEG>", "([^-]*)")
    m_categ_search = re.match(categformat, rn)
    if m_categ_search is not None:
        if len(m_categ_search.groups()) > 0:
            ## Found a match for the category of this document. Get it:
            category = m_categ_search.group(1)
        else:
            ## This document has no category.
            category = "unknown"
    else:
        category = "unknown"

    # get record data
    date = get_file_contents(curdir, "date")
    title = get_file_contents(curdir,
                              parameters['titleFile']).replace("\n", "")
    title += " - %s" % date
    submitteremail = get_file_contents(
        curdir, parameters['submitteremailfile']).replace("\n", ", ")
    submittername = get_file_contents(curdir,
                                      parameters['submitternamefile']).replace(
                                          "\n", ", ")
    contactname = get_file_contents(curdir,
                                    parameters['contactnamefile']).replace(
                                        "\n", ", ")
    contactemail = get_file_contents(curdir,
                                     parameters['contactemailfile']).replace(
                                         "\n", ", ")
    subtitle = get_file_contents(curdir,
                                 parameters['subtitle']).replace("\n", ", ")
    city = get_file_contents(curdir,
                             parameters['cityfile']).replace("\n", ", ")
    country = get_file_contents(curdir,
                                parameters['countryfile']).replace("\n", ", ")
    state = get_file_contents(curdir,
                              parameters['statefile']).replace("\n", ", ")
    stdate = get_file_contents(curdir,
                               parameters['stdatefile']).replace("\n", ", ")
    fndate = get_file_contents(curdir,
                               parameters['fndatefile']).replace("\n", ", ")
    field = get_file_contents(curdir,
                              parameters['fieldfile']).replace("\n", ", ")
    url = get_file_contents(curdir, parameters['urlfile']).replace("\n", " ")
    shorttitle = get_file_contents(curdir, parameters['shorttitle']).replace(
        "\n", " ")
    keywords = get_file_contents(curdir,
                                 parameters['keywords']).replace("\n", " ")
    proceedings = get_file_contents(curdir, parameters['proceedings']).replace(
        "\n", " ")
    seriesname = get_file_contents(curdir,
                                   parameters['seriesnamefile']).replace(
                                       "\n", " ")
    seriesnumber = get_file_contents(curdir,
                                     parameters['seriesnumberfile']).replace(
                                         "\n", " ")
    abstract = get_file_contents(curdir, parameters['abstractfile'])

    # we get the referee password
    sth = run_sql("SELECT access FROM sbmAPPROVAL WHERE rn=%s", (rn, ))
    if len(sth) > 0:
        access = sth[0][0]
    # Build referee's email address
    refereeaddress = ""
    # Try to retrieve the referee's email from the referee's database
    for user in acc_get_role_users(
            acc_get_role_id("referee_%s_%s" % (doctype, category))):
        refereeaddress += user[1] + ","
    # And if there are general referees
    for user in acc_get_role_users(acc_get_role_id("referee_%s_*" % doctype)):
        refereeaddress += user[1] + ","
    refereeaddress = re.sub(",$", "", refereeaddress)
    # Creation of the mail for the referee
    addresses = ""
    if refereeaddress != "":
        addresses = refereeaddress + ","
    if otheraddresses != "":
        addresses += otheraddresses
    else:
        addresses = re.sub(",$", "", addresses)
    record_url = "%s/%s/%s" % (CFG_SITE_URL, CFG_SITE_RECORD, sysno)
    title_referee = "Request for approval of %s" % rn
    mail_referee = """
The document %(rn)s has been submitted to the Conferences database and it will appear here:\n%(recordlink)s.
To approve/reject the document, you should go to this URL:\n%(access)s\n

Title: %(title)s
Date: from %(stdate)s to %(fndate)s
Place: %(city)s, %(state)s, %(country)s
Series name: %(seriesname)s
Series number: %(seriesnumber)s

URL: %(url)s

Field(s): %(field)s

Description:
%(abstract)s

Contact name(s): %(contactname)s
Contact email(s): %(contactemail)s
Submitter name(s): %(submittername)s
Submitter email(s): %(submitteremail)s
    """ % {
        'rn': rn,
        'title': title,
        'submitteremail': submitteremail,
        'submittername': submittername,
        'contactname': contactname,
        'contactemail': contactemail,
        'field': field,
        'city': city,
        'state': state,
        'country': country,
        'stdate': stdate,
        'fndate': fndate,
        'url': url,
        'subtitle': subtitle,
        'shorttitle': shorttitle,
        'proceedings': proceedings,
        'keywords': keywords,
        'access': "%s/approve.py?access=%s" % (CFG_SITE_URL, access),
        'recordlink': record_url,
        'abstract': abstract,
        'seriesname': seriesname,
        'seriesnumber': seriesnumber
    }
    #Send mail to referee
    send_email(fromaddr=CFG_WEBSUBMIT_CONF_FROMADDR, toaddr=CFG_WEBSUBMIT_CONF_SUPPORT_EMAIL, subject=title_referee, \
               content=mail_referee, footer=email_footer(support_email=CFG_WEBSUBMIT_CONF_SUPPORT_EMAIL),
               copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN, bccaddr=addresses, replytoaddr=contactemail)
    return ""
コード例 #51
0
def get_expired_person_ids(expire_delay_days=CFG_WEBAUTHORPROFILE_CACHE_EXPIRED_DELAY_BIBSCHED):
    """ Returns pids with expired caches. """
    keys = run_sql("select object_key from wapCACHE where object_status=%s or last_updated < "
                   "timestampadd(day, -%s, now())", ('Expired', expire_delay_days))
    keys = [int(x[0].split(':')[1]) for x in set(keys) if ':' in x[0]]
    return keys
コード例 #52
0
def store_last_updated(fmt, iso_date):
    sql = "UPDATE format SET last_updated = %s " \
          "WHERE code = %s AND (last_updated < %s or last_updated IS NULL)"
    run_sql(sql, (iso_date, fmt.lower(), iso_date))
コード例 #53
0
def expire_cache_element(name, key):
    """ Sets cache element status to 'Expired'. """
    run_sql("update wapCACHE set object_status=%s where "
            "object_name=%s and object_key=%s", ('Expired', str(name), str(key)))
コード例 #54
0
 def tearDown(self):
     """Remove inserted comments"""
     run_sql("""DELETE FROM cmtRECORDCOMMENT WHERE id=%s""",
             (self.public_comid, ))
     run_sql("""DELETE FROM cmtRECORDCOMMENT WHERE id=%s""",
             (self.restr_comid_1, ))
     run_sql("""DELETE FROM cmtRECORDCOMMENT WHERE id=%s""",
             (self.restr_comid_2, ))
     if self.restr_comid_3:
         run_sql("""DELETE FROM cmtRECORDCOMMENT WHERE id=%s""",
                 (self.restr_comid_3, ))
     run_sql("""DELETE FROM cmtRECORDCOMMENT WHERE id=%s""",
             (self.restr_comid_4, ))
     run_sql("""DELETE FROM cmtRECORDCOMMENT WHERE id=%s""",
             (self.restr_comid_5, ))
     run_sql("""DELETE FROM cmtRECORDCOMMENT WHERE id=%s""",
             (self.deleted_comid, ))
     pass
コード例 #55
0
def precache_element(name, key):
    """ Updates the last_updated flag of a cache to prevent parallel recomputation of the same cache. """
    run_sql("insert into wapCACHE (object_name,object_key,last_updated,object_status) values (%s,%s,now(),%s) "
            "on duplicate key update last_updated=now(),object_status=%s" ,
            (str(name), str(key), 'Precached', 'Precached'))
コード例 #56
0
def expire_all_cache_for_person(person_id):
    """ Expires all caches for person n.canonical.1 """
    run_sql("DELETE FROM wapCACHE WHERE object_key=%s", ('pid:' + str(person_id),))
コード例 #57
0
    def test_process_one(self):
        from invenio import arxiv_pdf_checker
        from invenio.arxiv_pdf_checker import process_one, \
                                              FoundExistingPdf, \
                                              fetch_arxiv_pdf_status, \
                                              STATUS_OK, \
                                              AlreadyHarvested
        arxiv_pdf_checker.CFG_ARXIV_URL_PATTERN = EXAMPLE_PDF_URL_1 + "?%s%s"

        # Make sure there is no harvesting state stored or this test will fail
        run_sql('DELETE FROM bibARXIVPDF WHERE id_bibrec = %s', [self.recid])

        def look_for_fulltext(recid):
            """Look for fulltext pdf (bibdocfile) for a given recid"""
            rec_info = BibRecDocs(recid)
            docs = rec_info.list_bibdocs()

            for doc in docs:
                for d in doc.list_all_files():
                    if d.get_format().strip('.') in ['pdf', 'pdfa', 'PDF']:
                        try:
                            yield doc, d
                        except InvenioBibDocFileError:
                            pass

        # Remove all pdfs from record 3
        for doc, docfile in look_for_fulltext(self.recid):
            doc.delete_file(docfile.get_format(), docfile.get_version())
            if not doc.list_all_files():
                doc.expunge()

        try:
            process_one(self.recid)
        finally:
            self.clean_bibtask()

        # Check for existing pdf
        docs = list(look_for_fulltext(self.recid))
        if not docs:
            self.fail()

        # Check that harvesting state is stored
        status, version = fetch_arxiv_pdf_status(self.recid)
        self.assertEqual(status, STATUS_OK)
        self.assertEqual(version, 1)

        try:
            process_one(self.recid)
            self.fail()
        except AlreadyHarvested:
            pass

        # Even though the version is changed the md5 is the same
        self.arxiv_version = 2
        try:
            process_one(self.recid)
            self.fail()
        except FoundExistingPdf:
            pass

        arxiv_pdf_checker.CFG_ARXIV_URL_PATTERN = EXAMPLE_PDF_URL_2 + "?%s%s"
        self.arxiv_version = 3
        try:
            process_one(self.recid)
        finally:
            self.clean_bibtask()

        # We know the PDF is attached, run process_one again
        # and it needs to raise an error
        try:
            process_one(self.recid)
            self.fail()
        except AlreadyHarvested:
            run_sql('DELETE FROM bibARXIVPDF WHERE id_bibrec = %s',
                    [self.recid])

        # Restore state
        for doc, docfile in docs:
            doc.delete_file(docfile.get_format(), docfile.get_version())
            if not doc.list_all_files():
                doc.expunge()

        self.clean_bibupload_fft()
コード例 #58
0
def cache_element(name, key, value):
    """ Insert an element into cache or update already present element. """
    run_sql("insert into wapCACHE (object_name,object_key,object_value,object_status,last_updated) values (%s,%s,%s,%s,now()) "
            "on duplicate key update object_value=%s,last_updated=now(),object_status=%s" ,
            (str(name), str(key), str(value), 'UpToDate', str(value), 'UpToDate'))
コード例 #59
0
 def clean_bibupload_fft(self):
     run_sql("""DELETE FROM schTASK
                WHERE proc = 'bibupload:FFT'
                ORDER BY id DESC LIMIT 1""")
コード例 #60
0
def rank_by_method(rank_method_code, lwords, hitset, rank_limit_relevance,
                   verbose):
    """Ranking of records based on predetermined values.
    input:
    rank_method_code - the code of the method, from the name field in rnkMETHOD, used to get predetermined values from
    rnkMETHODDATA
    lwords - a list of words from the query
    hitset - a list of hits for the query found by search_engine
    rank_limit_relevance - show only records with a rank value above this
    verbose - verbose value
    output:
    reclist - a list of sorted records, with unsorted added to the end: [[23,34], [344,24], [1,01]]
    prefix - what to show before the rank value
    postfix - what to show after the rank value
    voutput - contains extra information, content dependent on verbose value"""

    global voutput
    voutput = ""
    rnkdict = run_sql(
        "SELECT relevance_data FROM rnkMETHODDATA,rnkMETHOD where rnkMETHOD.id=id_rnkMETHOD and rnkMETHOD.name=%s",
        (rank_method_code, ))

    if not rnkdict:
        return (None, "Warning: Could not load ranking data for method %s." %
                rank_method_code, "", voutput)

    max_recid = 0
    res = run_sql("SELECT max(id) FROM bibrec")
    if res and res[0][0]:
        max_recid = int(res[0][0])

    lwords_hitset = None
    for j in range(
            0, len(lwords)
    ):  #find which docs to search based on ranges..should be done in search_engine...
        if lwords[j] and lwords[j][:6] == "recid:":
            if not lwords_hitset:
                lwords_hitset = intbitset()
            lword = lwords[j][6:]
            if string.find(lword, "->") > -1:
                lword = string.split(lword, "->")
                if int(lword[0]) >= max_recid or int(
                        lword[1]) >= max_recid + 1:
                    return (None,
                            "Warning: Given record IDs are out of range.", "",
                            voutput)
                for i in range(int(lword[0]), int(lword[1])):
                    lwords_hitset.add(int(i))
            elif lword < max_recid + 1:
                lwords_hitset.add(int(lword))
            else:
                return (None, "Warning: Given record IDs are out of range.",
                        "", voutput)

    rnkdict = deserialize_via_marshal(rnkdict[0][0])
    if verbose > 0:
        voutput += "<br />Running rank method: %s, using rank_by_method function in bibrank_record_sorter<br />" % rank_method_code
        voutput += "Ranking data loaded, size of structure: %s<br />" % len(
            rnkdict)
    lrecIDs = list(hitset)

    if verbose > 0:
        voutput += "Number of records to rank: %s<br />" % len(lrecIDs)
    reclist = []
    reclist_addend = []

    if not lwords_hitset:  #rank all docs, can this be speed up using something else than for loop?
        for recID in lrecIDs:
            if rnkdict.has_key(recID):
                reclist.append((recID, rnkdict[recID]))
                del rnkdict[recID]
            else:
                reclist_addend.append((recID, 0))
    else:  #rank docs in hitset, can this be speed up using something else than for loop?
        for recID in lwords_hitset:
            if rnkdict.has_key(recID) and recID in hitset:
                reclist.append((recID, rnkdict[recID]))
                del rnkdict[recID]
            elif recID in hitset:
                reclist_addend.append((recID, 0))

    if verbose > 0:
        voutput += "Number of records ranked: %s<br />" % len(reclist)
        voutput += "Number of records not ranked: %s<br />" % len(
            reclist_addend)

    reclist.sort(lambda x, y: cmp(x[1], y[1]))
    return (reclist_addend + reclist, methods[rank_method_code]["prefix"],
            methods[rank_method_code]["postfix"], voutput)