def _check_table_update_time(self, tablename): """Helper function to check update time of TABLENAME.""" # detect MySQL version number: res = dbquery.run_sql("SELECT VERSION()") mysql_server_version = res[0][0] if mysql_server_version.startswith("5."): # MySQL-5 provides INFORMATION_SCHEMA: query = """SELECT UPDATE_TIME FROM INFORMATION_SCHEMA.TABLES WHERE table_name='%s' AND table_schema='%s'""" % ( tablename, dbquery.CFG_DATABASE_NAME, ) tablename_update_time = str(dbquery.run_sql(query)[0][0]) elif mysql_server_version.startswith("4.1"): # MySQL-4.1 has it on 12th position: query = """SHOW TABLE STATUS LIKE '%s'""" % tablename tablename_update_time = str(dbquery.run_sql(query)[0][12]) elif mysql_server_version.startswith("4.0"): # MySQL-4.0 has it on 11th position: query = """SHOW TABLE STATUS LIKE '%s'""" % tablename tablename_update_time = str(dbquery.run_sql(query)[0][11]) else: tablename_update_time = "MYSQL SERVER VERSION NOT DETECTED" # compare it with the one detected by the function: self.assertEqual(tablename_update_time, dbquery.get_table_update_time(tablename))
def check_records(records): """ Add INSPIRE ID if missing """ _init_db() for record in records: if 'INSPIRE' in record_get_field_values(record, '035', code='9'): ## Has already the link. Good! Let's go on. continue doi = record_get_field_value(record, '024', ind1='7', code='a') arxiv = record_get_field_value(record, '037', code='a') query = 'doi:"%s"' % doi if arxiv: query += ' or %s' % arxiv inspireid = run_sql("SELECT inspireid FROM doi2inspireid WHERE doi=%s", (doi,)) if inspireid: inspireid = inspireid[0][0] else: sleep(2) inspireid = [int(elem.strip()) for elem in urlopen(create_url("http://inspirehep.net/search", {'cc': 'HEP', 'of': 'id', 'p': query})).read().strip()[1:-1].split(',') if elem.strip()] if len(inspireid) == 1: inspireid = inspireid[0] try: run_sql("INSERT INTO doi2inspireid(doi, inspireid, creation_date) VALUES(%s, %s, NOW())", (doi, inspireid)) except IntegrityError, err: other_doi = run_sql("SELECT doi FROM doi2inspireid WHERE inspireid=%s", (inspireid, ))[0][0] record.warn("This record with doi %s is connected with INSPIRE id %s which is already connected to doi %s" % (doi, inspireid, other_doi)) continue else: record.warn("More than one inspire ID matches this record: %s" % inspireid) continue
def send_message(uids_to, msgid, status=CFG_WEBMESSAGE_STATUS_CODE['NEW']): """ Send message to uids @param uids: sequence of user ids @param msg_id: id of message @param status: status of the message. (single char, see webmessage_config.py). @return: a list of users having their mailbox full """ if not((type(uids_to) is list) or (type(uids_to) is tuple)): uids_to = [uids_to] user_problem = [] if len(uids_to) > 0: users_quotas = check_quota(CFG_WEBMESSAGE_MAX_NB_OF_MESSAGES - 1) query = """INSERT INTO user_msgMESSAGE (id_user_to, id_msgMESSAGE, status) VALUES """ fixed_value = ",%s,%s)" query_params = [] def not_users_quotas_has_key(key): """ not(is key in users over quota?)""" return not(users_quotas.has_key(key)) user_ids_to = filter(not_users_quotas_has_key, uids_to) user_problem = filter(users_quotas.has_key, uids_to) if len(user_ids_to) > 0: for uid_to in user_ids_to[0:-1]: query += "(%%s%s," % fixed_value query_params += [uid_to, msgid, status] query += "(%%s%s" % fixed_value query_params += [user_ids_to[-1], msgid, status] run_sql(query, tuple(query_params)) return user_problem
def basket_display(): """ Display basket statistics. """ tbl_name = get_customevent_table("baskets") if not tbl_name: # custom event baskets not defined, so return empty output: return [] try: res = run_sql("SELECT creation_time FROM %s ORDER BY creation_time" % tbl_name) days = (res[-1][0] - res[0][0]).days + 1 public = run_sql("SELECT COUNT(*) FROM %s WHERE action = 'display_public'" % tbl_name)[0][0] users = run_sql("SELECT COUNT(DISTINCT user) FROM %s" % tbl_name)[0][0] adds = run_sql("SELECT COUNT(*) FROM %s WHERE action = 'add'" % tbl_name)[0][0] displays = run_sql("SELECT COUNT(*) FROM %s WHERE action = 'display' OR action = 'display_public'" % tbl_name)[0][0] hits = adds + displays average = hits / days res = [("Basket page hits", hits)] res.append((" Average per day", average)) res.append((" Unique users", users)) res.append((" Additions", adds)) res.append((" Public", public)) except IndexError: res = [] return res
def update_user_inbox_for_reminders(uid): """ Updates user's inbox with any reminders that should have arrived @param uid: user id @return: integer number of new expired reminders """ now = convert_datestruct_to_datetext(localtime()) reminder_status = CFG_WEBMESSAGE_STATUS_CODE['REMINDER'] new_status = CFG_WEBMESSAGE_STATUS_CODE['NEW'] query1 = """SELECT m.id FROM msgMESSAGE m, user_msgMESSAGE um WHERE um.id_user_to=%s AND um.id_msgMESSAGE=m.id AND m.received_date<=%s AND um.status like binary %s """ params1 = (uid, now, reminder_status) res_ids = run_sql(query1, params1) out = len(res_ids) if (out>0): query2 = """UPDATE user_msgMESSAGE SET status=%s WHERE id_user_to=%s AND (""" query_params = [new_status, uid] for msg_id in res_ids[0:-1]: query2 += "id_msgMESSAGE=%s OR " query_params.append(msg_id[0]) query2 += "id_msgMESSAGE=%s)" query_params.append(res_ids[-1][0]) run_sql(query2, tuple(query_params)) return out
def filter_out_based_on_date_range(recids, fromdate="", untildate="", set_spec=None): """ Filter out recids based on date range.""" if fromdate: fromdate = normalize_date(fromdate, "T00:00:00Z") else: fromdate = get_earliest_datestamp() fromdate = utc_to_localtime(fromdate) if untildate: untildate = normalize_date(untildate, "T23:59:59Z") else: untildate = get_latest_datestamp() untildate = utc_to_localtime(untildate) if set_spec is not None: ## either it has a value or it empty, thus meaning all records last_updated = get_set_last_update(set_spec) if last_updated is not None: last_updated = utc_to_localtime(last_updated) if last_updated > fromdate: fromdate = utc_to_localtime(get_earliest_datestamp()) recids = intbitset(recids) ## Let's clone :-) if fromdate and untildate: recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date BETWEEN %s AND %s", (fromdate, untildate))) elif fromdate: recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date >= %s", (fromdate, ))) elif untildate: recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date <= %s", (untildate, ))) return recids - get_all_restricted_recids()
def check_if_need_to_delete_message_permanently(msg_ids): """ Checks if a list of messages exist in anyone's inbox, if not, delete them permanently @param msg_id: sequence of message ids @return: number of deleted messages """ if not((type(msg_ids) is list) or (type(msg_ids) is tuple)): msg_ids = [msg_ids] query1 = """SELECT count(id_msgMESSAGE) FROM user_msgMESSAGE WHERE id_msgMESSAGE=%s""" messages_to_delete = [] for msg_id in msg_ids: nb_users = int(run_sql(query1, (msg_id,))[0][0]) if nb_users == 0: messages_to_delete.append(int(msg_id)) if len(messages_to_delete) > 0: query2 = """DELETE FROM msgMESSAGE WHERE""" params2 = [] for msg_id in messages_to_delete[0:-1]: query2 += " id=%s OR" params2.append(msg_id) query2 += " id=%s" params2.append(messages_to_delete[-1]) run_sql(query2, tuple(params2)) return len(messages_to_delete)
def external_user_warning(uid): """ Returns 'email_auto_generated' if the email of the user is auto-generated. @param uid: user id @type uid: int @rtype: ''|'email_auto_generated' """ from invenio.access_control_config import CFG_TEMP_EMAIL_ADDRESS query = """ SELECT email FROM user WHERE id=%s """ params = (uid, ) email = run_sql(query, params)[0][0] regexp = re.compile(CFG_TEMP_EMAIL_ADDRESS % "\w+", re.IGNORECASE) query = """ SELECT * FROM userEXT WHERE id_user=%s """ if run_sql(query, params) and re.match(regexp, email): return 'email_auto_generated' return ''
def store_citation_warning(warning_type, cit_info): r = run_sql("""SELECT 1 FROM rnkCITATIONDATAERR WHERE type = %s AND citinfo = %s""", (warning_type, cit_info)) if not r: run_sql("""INSERT INTO rnkCITATIONDATAERR (type, citinfo) VALUES (%s, %s)""", (warning_type, cit_info))
def is_method_valid(colID, rank_method_code): """ Check if RANK_METHOD_CODE method is valid for the collection given. If colID is None, then check for existence regardless of collection. """ if colID is None: return run_sql("SELECT COUNT(*) FROM rnkMETHOD WHERE name=%s", (rank_method_code,))[0][0] enabled_colls = dict( run_sql( "SELECT id_collection, score from collection_rnkMETHOD,rnkMETHOD WHERE id_rnkMETHOD=rnkMETHOD.id AND name=%s", (rank_method_code,), ) ) try: colID = int(colID) except TypeError: return 0 if colID in enabled_colls: return 1 else: while colID: colID = run_sql("SELECT id_dad FROM collection_collection WHERE id_son=%s", (colID,)) if colID and colID[0][0] in enabled_colls: return 1 elif colID: colID = colID[0][0] return 0
def iterate_over_new(list, fmt): "Iterate over list of IDs" global total_rec formatted_records = '' # (string-)List of formatted record of an iteration tbibformat = 0 # time taken up by external call tbibupload = 0 # time taken up by external call start_date = task_get_task_param('task_starting_time') # Time at which the record was formatted tot = len(list) count = 0 for recID in list: t1 = os.times()[4] start_date = time.strftime('%Y-%m-%d %H:%M:%S') formatted_record = zlib.compress(format_record(recID, fmt, on_the_fly=True)) if run_sql('SELECT id FROM bibfmt WHERE id_bibrec=%s AND format=%s', (recID, fmt)): run_sql('UPDATE bibfmt SET last_updated=%s, value=%s WHERE id_bibrec=%s AND format=%s', (start_date, formatted_record, recID, fmt)) else: run_sql('INSERT INTO bibfmt(id_bibrec, format, last_updated, value) VALUES(%s, %s, %s, %s)', (recID, fmt, start_date, formatted_record)) t2 = os.times()[4] tbibformat += (t2 - t1) count += 1 if (count % 100) == 0: write_message(" ... formatted %s records out of %s" % (count, tot)) task_update_progress('Formatted %s out of %s' % (count, tot)) task_sleep_now_if_required(can_stop_too=True) if (tot % 100) != 0: write_message(" ... formatted %s records out of %s" % (count, tot)) return (tot, tbibformat, tbibupload)
def get_visible_group_list(uid, pattern=""): """List the group the user can join (not already member of the group regardless user's status). @return: groups {id : name} whose name matches pattern """ grpID = [] groups = {} #list the group the user is member of""" query = """SELECT distinct(id_usergroup) FROM user_usergroup WHERE id_user=%s """ uid = int(uid) res = run_sql(query, (uid,)) map(lambda x: grpID.append(int(x[0])), res) query2 = """SELECT id,name FROM usergroup WHERE (join_policy='%s' OR join_policy='%s')""" % ( CFG_WEBSESSION_GROUP_JOIN_POLICY['VISIBLEOPEN'], CFG_WEBSESSION_GROUP_JOIN_POLICY['VISIBLEMAIL']) if len(grpID) == 1 : query2 += """ AND id!=%i""" % grpID[0] elif len(grpID) > 1: query2 += """ AND id NOT IN %s""" % str(tuple(grpID)) if pattern: try: res2 = run_sql(query2 + """ AND name RLIKE %s ORDER BY name""", (pattern,)) except OperationalError: res2 = () else: res2 = run_sql(query2 + """ ORDER BY name""") map(lambda x: groups.setdefault(x[0], x[1]), res2) return groups
def insert_new_group(uid, new_group_name, new_group_description, join_policy, login_method='INTERNAL'): """Create a new group and affiliate a user.""" query1 = """INSERT INTO usergroup (id, name, description, join_policy, login_method) VALUES (NULL,%s,%s,%s,%s) """ params1 = (new_group_name, new_group_description, join_policy, login_method) res1 = run_sql(query1, params1) date = convert_datestruct_to_datetext(localtime()) uid = int(uid) query2 = """INSERT INTO user_usergroup (id_user, id_usergroup, user_status, user_status_date) VALUES (%s,%s,'A',%s) """ params2 = (uid, res1, date) res2 = run_sql(query2, params2) return res1
def save(self): """ Save the session to the database. """ if not self._invalid: session_dict = {"_data" : self.copy(), "_created" : self._created, "_accessed": self._accessed, "_timeout" : self._timeout, "_http_ip" : self._http_ip, "_https_ip" : self._https_ip, "_remember_me" : self._remember_me } session_key = self._sid session_object = cPickle.dumps(session_dict, -1) session_expiry = time.time() + self._timeout + \ CFG_WEBSESSION_ONE_DAY uid = self.get('uid', -1) run_sql(""" INSERT session( session_key, session_expiry, session_object, uid ) VALUE(%s, %s, %s, %s ) ON DUPLICATE KEY UPDATE session_expiry=%s, session_object=%s, uid=%s """, (session_key, session_expiry, session_object, uid, session_expiry, session_object, uid))
def prepate_doi_table(): run_sql("""CREATE TABLE IF NOT EXISTS doi ( doi varchar(255) NOT NULL, creation_date datetime NOT NULL, PRIMARY KEY doi(doi), KEY (creation_date) ) ENGINE=MyISAM;""")
def remove_kb_mapping(kb_name, key): """Removes mapping with given key from given kb""" k_id = get_kb_id(kb_name) run_sql("""DELETE FROM knwKBRVAL WHERE m_key = %s AND id_knwKB = %s""", (key, k_id)) return True
def get_external_links_from_db(ref, dict_of_ids, reference_indicator): """returns a dictionary containing the number of external links for each recid external link=citation that is not in our database """ ext_links = {} dict_all_ref = {} for recid in dict_of_ids: dict_all_ref[recid] = 0 ext_links[dict_of_ids[recid]] = 0 reference_db_id = reference_indicator[0:2] reference_tag_regex = reference_indicator + "[a-z]" tag_list = run_sql("select id from bib" + reference_db_id + \ "x where tag RLIKE %s", (reference_tag_regex, )) tag_set = set() for tag in tag_list: tag_set.add(tag[0]) ref_list = run_sql("select id_bibrec, id_bibxxx, field_number from \ bibrec_bib" + reference_db_id + "x group by \ id_bibrec, field_number") for item in ref_list: recid = int(item[0]) id_bib = int(item[1]) if recid in dict_of_ids and id_bib in tag_set: dict_all_ref[recid] += 1 for recid in dict_of_ids: total_links = dict_all_ref[recid] internal_links = ref[dict_of_ids[recid]] ext_links[dict_of_ids[recid]] = total_links - internal_links if ext_links[dict_of_ids[recid]] < 0: ext_links[dict_of_ids[recid]] = 0 write_message("External link information extracted", verbose=2) write_message("External links: %s" % str(ext_links), verbose=9) return ext_links
def update_submission_status(id_record, status, remote_id=''): ''' update the submission field with the new status of the submission @param (id_record) : id of the row to update @param (status) : new value to set in the status field @return : true if update done, else, false ''' current_date = time.strftime("%Y-%m-%d %H:%M:%S") if status == CFG_SUBMISSION_STATUS_PUBLISHED and remote_id != '' : qstr = '''UPDATE swrCLIENTDATA SET status=%s, id_remote=%s, ''' \ '''publication_date=%s, last_update=%s WHERE id=%s ''' qres = run_sql(qstr, (status, remote_id, current_date, current_date, id_record, )) if status == CFG_SUBMISSION_STATUS_REMOVED : qstr = '''UPDATE swrCLIENTDATA SET status=%s, removal_date=%s, ''' \ '''last_update=%s WHERE id=%s ''' qres = run_sql(qstr, (status, current_date, current_date, id_record, )) else : qstr = '''UPDATE swrCLIENTDATA SET status=%s, last_update=%s ''' \ '''WHERE id=%s ''' qres = run_sql(qstr, (status, current_date, id_record, )) return qres
def update_kb(kb_name, new_name, new_description): """Updates given kb with new name and new description""" k_id = get_kb_id(kb_name) run_sql("""UPDATE knwKB SET name = %s , description = %s WHERE id = %s""", (new_name, new_description, k_id)) return True
def add_oai_set(oai_set_name, oai_set_spec, oai_set_collection, oai_set_description, oai_set_p1, oai_set_f1,oai_set_m1, oai_set_p2, oai_set_f2,oai_set_m2, oai_set_p3, oai_set_f3, oai_set_m3, oai_set_op1, oai_set_op2): """Add a definition into the OAI Repository""" try: if not oai_set_spec: oai_set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC set_definition = 'c=' + oai_set_collection + ';' + \ 'p1=' + oai_set_p1 + ';' + \ 'f1=' + oai_set_f1 + ';' + \ 'm1=' + oai_set_m1 + ';' + \ 'op1='+ oai_set_op1 + ';' + \ 'p2=' + oai_set_p2 + ';' + \ 'f2=' + oai_set_f2 + ';' + \ 'm2=' + oai_set_m2 + ';' + \ 'op2='+ oai_set_op2 + ';' + \ 'p3=' + oai_set_p3 + ';' + \ 'f3=' + oai_set_f3 + ';' + \ 'm3=' + oai_set_m3 + ';' run_sql("""INSERT INTO oaiREPOSITORY (id, setName, setSpec, setCollection, setDescription, setDefinition, setRecList, p1, f1, m1, p2, f2, m2, p3, f3, m3) VALUES (0, %s, %s, %s, %s, %s, NULL, %s, %s, %s, %s, %s, %s, %s, %s, %s)""", (oai_set_name, oai_set_spec, oai_set_collection, oai_set_description, set_definition, oai_set_p1, oai_set_f1, oai_set_m1, oai_set_p2, oai_set_f2, oai_set_m2, oai_set_p3, oai_set_f3, oai_set_m3)) return (1, "") except StandardError, e: register_exception(alert_admin=True) return (0, e)
def get_all_remote_server(id_server): ''' This function select the name of all remote service implementing the SWORD protocol. It returns a list of dictionnary containing three fields: id, name and host @return (remote_server) : list of dictionnary (id - name - host) of each remote server ''' remote_servers = [] if id_server == '': qstr = '''SELECT id, name, host FROM swrREMOTESERVER''' qres = run_sql(qstr) else : qstr = ''' SELECT id, name, host FROM swrREMOTESERVER WHERE id=%s''' qres = run_sql(qstr, (id_server, )) for res in qres: remote_server = {} remote_server['id'] = res[0] remote_server['name'] = res[1] remote_server['host'] = res[2] remote_servers.append(remote_server) return remote_servers
def cli_clean_revisions(recid, dry_run=True, verbose=True): """Clean revisions of the given recid, by removing duplicate revisions that do not change the content of the record.""" if recid == '*': recids = intbitset(run_sql("SELECT DISTINCT id_bibrec FROM hstRECORD")) else: try: recids = [int(recid)] except ValueError: print 'ERROR: record ID must be integer, not %s.' % recid sys.exit(1) for recid in recids: all_revisions = run_sql("SELECT marcxml, job_id, job_name, job_person, job_date FROM hstRECORD WHERE id_bibrec=%s ORDER BY job_date ASC", (recid,)) previous_rec = {} deleted_revisions = 0 for marcxml, job_id, job_name, job_person, job_date in all_revisions: try: current_rec = create_record(zlib.decompress(marcxml))[0] except Exception: print >> sys.stderr, "ERROR: corrupted revisions found. Please run %s --fix-revisions '*'" % sys.argv[0] sys.exit(1) if records_identical(current_rec, previous_rec): deleted_revisions += 1 if not dry_run: run_sql("DELETE FROM hstRECORD WHERE id_bibrec=%s AND job_id=%s AND job_name=%s AND job_person=%s AND job_date=%s", (recid, job_id, job_name, job_person, job_date)) previous_rec = current_rec if verbose and deleted_revisions: print "record %s: deleted %s duplicate revisions out of %s" % (recid, deleted_revisions, len(all_revisions)) if verbose: print "DONE"
def acc_firerole_extract_emails(firerole_def_obj): """ Best effort function to extract all the possible email addresses authorized by the given firerole. """ authorized_emails = set() try: default_allow_p, rules = firerole_def_obj for (allow_p, not_p, field, expressions_list) in rules: # for every rule if not_p: continue if field == 'group': for reg_p, expr in expressions_list: if reg_p: continue if CFG_CERN_SITE and expr.endswith(' [CERN]'): authorized_emails.add(expr[:-len(' [CERN]')].lower().strip() + '@cern.ch') emails = run_sql("SELECT user.email FROM usergroup JOIN user_usergroup ON usergroup.id=user_usergroup.id_usergroup JOIN user ON user.id=user_usergroup.id_user WHERE usergroup.name=%s", (expr, )) for email in emails: authorized_emails.add(email[0].lower().strip()) elif field == 'email': for reg_p, expr in expressions_list: if reg_p: continue authorized_emails.add(expr.lower().strip()) elif field == 'uid': for reg_p, expr in expressions_list: if reg_p: continue email = run_sql("SELECT email FROM user WHERE id=%s", (expr, )) if email: authorized_emails.add(email[0][0].lower().strip()) return authorized_emails except Exception, msg: raise InvenioWebAccessFireroleError, msg
def do_upgrade(): run_sql("""CREATE TABLE IF NOT EXISTS schSTATUS ( name varchar(50), value mediumblob, PRIMARY KEY (name) ) ENGINE=MyISAM """)
def repair_role_definitions(): """ Try to rebuild compiled serialized definitions from their respectives sources. This is needed in case Python break back compatibility. """ definitions = run_sql("SELECT id, firerole_def_src FROM accROLE") for role_id, firerole_def_src in definitions: run_sql("UPDATE accROLE SET firerole_def_ser=%s WHERE id=%s", (serialize(compile_role_definition(firerole_def_src)), role_id))
def get_keyevent_snapshot_sessions(): """ A specific implementation of get_current_event(). @return: The current number of website visitors (guests, logged in) @type: (int, int) """ # SQL to retrieve sessions in the Guests sql = ( "SELECT COUNT(session_expiry) FROM session INNER JOIN user ON uid=id " + "WHERE email = '' AND " + "session_expiry-%d < unix_timestamp() AND " % WEBSTAT_SESSION_LENGTH + "unix_timestamp() < session_expiry" ) guests = run_sql(sql)[0][0] # SQL to retrieve sessions in the Logged in users sql = ( "SELECT COUNT(session_expiry) FROM session INNER JOIN user ON uid=id " + "WHERE email <> '' AND " + "session_expiry-%d < unix_timestamp() AND " % WEBSTAT_SESSION_LENGTH + "unix_timestamp() < session_expiry" ) logged_ins = run_sql(sql)[0][0] # Assemble, according to return type return (guests, logged_ins)
def precache_element(name, key): ''' Updates the last_updated flag of a cache to prevent parallel recomputation of the same cache. ''' run_sql("insert into wapCACHE (object_name,object_key,last_updated) values (%s,%s,now()) " "on duplicate key update last_updated=now(),object_status=%s", (str(name), str(key), 'Precached'))
def clean_bibxxx(): """ Clean unreferenced bibliographic values from bibXXx tables. This is useful to prettify browse results, as it removes old, no longer used values. WARNING: this function must be run only when no bibupload is running and/or sleeping. """ write_message("""CLEANING OF UNREFERENCED bibXXx VALUES STARTED""") for xx in range(0, 100): bibxxx = 'bib%02dx' % xx bibrec_bibxxx = 'bibrec_bib%02dx' % xx if task_get_option('verbose') >= 9: num_unref_values = run_sql("""SELECT COUNT(*) FROM %(bibxxx)s LEFT JOIN %(bibrec_bibxxx)s ON %(bibxxx)s.id=%(bibrec_bibxxx)s.id_bibxxx WHERE %(bibrec_bibxxx)s.id_bibrec IS NULL""" % \ {'bibxxx': bibxxx, 'bibrec_bibxxx': bibrec_bibxxx, })[0][0] run_sql("""DELETE %(bibxxx)s FROM %(bibxxx)s LEFT JOIN %(bibrec_bibxxx)s ON %(bibxxx)s.id=%(bibrec_bibxxx)s.id_bibxxx WHERE %(bibrec_bibxxx)s.id_bibrec IS NULL""" % \ {'bibxxx': bibxxx, 'bibrec_bibxxx': bibrec_bibxxx, }) if task_get_option('verbose') >= 9: write_message(""" - %d unreferenced %s values cleaned""" % \ (num_unref_values, bibxxx)) write_message("""CLEANING OF UNREFERENCED bibXXx VALUES FINISHED""")
def alert_display(): """ Display alert statistics. """ tbl_name = get_customevent_table("alerts") if not tbl_name: # custom event alerts not defined, so return empty output: return [] try: res = run_sql("SELECT creation_time FROM %s ORDER BY creation_time" % tbl_name) days = (res[-1][0] - res[0][0]).days + 1 res = run_sql("SELECT COUNT(DISTINCT user),COUNT(*) FROM %s" % tbl_name) users = res[0][0] hits = res[0][1] displays = run_sql("SELECT COUNT(*) FROM %s WHERE action = 'list'" % tbl_name)[0][0] search = run_sql("SELECT COUNT(*) FROM %s WHERE action = 'display'" % tbl_name)[0][0] average = hits / days res = [("Alerts page hits", hits)] res.append((" Average per day", average)) res.append((" Unique users", users)) res.append((" Displays", displays)) res.append((" Searches history display", search)) except IndexError: res = [] return res
def save_references(paper_id, data): """ Saves the references of the passed data dictionary using the standard authorlist_config keys of the paper data set with the given id. Should NOT be used alone as long as you are not sure what you are doing. Refer to save() instead. Returns the paper id. """ reference_ids = data[cfg.JSON.REFERENCE_IDS] # Insert or update old references for index, reference in enumerate(reference_ids): data_tuple = (# insert values index, reference, paper_id, # update values reference) run_sql("""INSERT INTO aulREFERENCES (item, reference, paper_id) VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE reference = %s;""", data_tuple) # Delete old references that are out of bounds - i.e. have a higher index # than the length of the reference list run_sql("""DELETE FROM aulREFERENCES WHERE item >= %s AND paper_id = %s;""", (len(reference_ids), paper_id)) return paper_id
def store_last_updated(format, update_date): sql = "UPDATE format SET last_updated = %s " \ "WHERE code = %s AND (last_updated < %s or last_updated IS NULL)" iso_date = update_date.strftime("%Y-%m-%d %H:%M:%S") run_sql(sql, (iso_date, format.lower(), iso_date))
def fetch_records_with_arxiv_fulltext(): """ Returns all the record IDs for records that have an arXiv bibdocfile attached. """ return intbitset(run_sql("select id_bibrec from bibrec_bibdoc join bibdoc on id_bibdoc=id where (bibrec_bibdoc.type='arXiv' or bibdoc.doctype='arXiv') and bibdoc.status <> 'DELETED'"))
def fetch_arxiv_pdf_status(recid): """Fetch from the database the harvest status of given recid""" ret = run_sql("""SELECT status, version FROM bibARXIVPDF WHERE id_bibrec = %s""", [recid]) return ret and ret[0] or (None, None)
msg) except InvenioBibDocFileError, e: # Most probably icon already existed. pass elif mybibdoc is not None: mybibdoc.delete_icon() # Update the MARC bibdocfile_bin = os.path.join(CFG_BINDIR, 'bibdocfile --yes-i-know') run_shell_command(bibdocfile_bin + " --fix-marc --recid=%s", (str(sysno), )) # Delete the HB BibFormat cache in the DB, so that the fulltext # links do not point to possible dead files run_sql( "DELETE LOW_PRIORITY from bibfmt WHERE format='HB' AND id_bibrec=%s", (sysno, )) return "" def get_pa_tag_content(pa_content): """Get content for <PA>XXX</PA>. @param pa_content: MatchObject for <PA>(.*)</PA>. return: the content of the file possibly filtered by an regular expression if pa_content=file[re]:a_file => first line of file a_file matching re if pa_content=file*p[re]:a_file => all lines of file a_file, matching re, separated by - (dash) char. """ pa_content = pa_content.groupdict()['content'] sep = '-'
def word_similarity(rank_method_code, lwords, hitset, rank_limit_relevance, verbose, methods): """Ranking a records containing specified words and returns a sorted list. input: rank_method_code - the code of the method, from the name field in rnkMETHOD lwords - a list of words from the query hitset - a list of hits for the query found by search_engine rank_limit_relevance - show only records with a rank value above this verbose - verbose value output: reclist - a list of sorted records: [[23,34], [344,24], [1,01]] prefix - what to show before the rank value postfix - what to show after the rank value voutput - contains extra information, content dependent on verbose value""" voutput = "" startCreate = time.time() if verbose > 0: voutput += "<br />Running rank method: %s, using word_frequency function in bibrank_record_sorter<br />" % rank_method_code lwords_old = lwords lwords = [] #Check terms, remove non alphanumeric characters. Use both unstemmed and stemmed version of all terms. for i in range(0, len(lwords_old)): term = string.lower(lwords_old[i]) if not methods[rank_method_code]["stopwords"] == "True" or methods[ rank_method_code]["stopwords"] and not is_stopword(term): lwords.append((term, methods[rank_method_code]["rnkWORD_table"])) terms = string.split( string.lower( re.sub( methods[rank_method_code] ["chars_alphanumericseparators"], ' ', term))) for term in terms: if methods[rank_method_code].has_key("stemmer"): # stem word term = stem(string.replace(term, ' ', ''), methods[rank_method_code]["stemmer"]) if lwords_old[ i] != term: #add if stemmed word is different than original word lwords.append( (term, methods[rank_method_code]["rnkWORD_table"])) (recdict, rec_termcount, lrecIDs_remove) = ({}, {}, {}) #For each term, if accepted, get a list of the records using the term #calculate then relevance for each term before sorting the list of records for (term, table) in lwords: term_recs = run_sql( """SELECT term, hitlist FROM %s WHERE term=%%s""" % methods[rank_method_code]["rnkWORD_table"], (term, )) if term_recs: #if term exists in database, use for ranking term_recs = deserialize_via_marshal(term_recs[0][1]) (recdict, rec_termcount) = calculate_record_relevance( (term, int(term_recs["Gi"][1])), term_recs, hitset, recdict, rec_termcount, verbose, quick=None) del term_recs if len(recdict) == 0 or (len(lwords) == 1 and lwords[0] == ""): return ( None, "Records not ranked. The query is not detailed enough, or not enough records found, for ranking to be possible.", "", voutput) else: #sort if we got something to sort (reclist, hitset) = sort_record_relevance(recdict, rec_termcount, hitset, rank_limit_relevance, verbose) #Add any documents not ranked to the end of the list if hitset: lrecIDs = list(hitset) #using 2-3mb reclist = zip(lrecIDs, [0] * len(lrecIDs)) + reclist #using 6mb if verbose > 0: voutput += "<br />Current number of recIDs: %s<br />" % ( methods[rank_method_code]["col_size"]) voutput += "Number of terms: %s<br />" % run_sql( "SELECT count(id) FROM %s" % methods[rank_method_code]["rnkWORD_table"])[0][0] voutput += "Terms: %s<br />" % lwords voutput += "Prepare and pre calculate time: %s<br />" % ( str(time.time() - startCreate)) voutput += "Total time used: %s<br />" % (str(time.time() - startCreate)) voutput += str(reclist) + "<br />" rank_method_stat(rank_method_code, reclist, lwords) return (reclist, methods[rank_method_code]["prefix"], methods[rank_method_code]["postfix"], voutput)
voutput = "" if verbose > 0: voutput += "<br />Running rank method: %s, using find_similar/word_frequency in bibrank_record_sorter<br />" % rank_method_code rank_limit_relevance = methods[rank_method_code]["default_min_relevance"] try: recID = int(recID) except Exception, e: return ( None, "Warning: Error in record ID, please check that a number is given.", "", voutput) rec_terms = run_sql( """SELECT termlist FROM %sR WHERE id_bibrec=%%s""" % methods[rank_method_code]["rnkWORD_table"][:-1], (recID, )) if not rec_terms: return (None, "Warning: Requested record does not seem to exist.", "", voutput) rec_terms = deserialize_via_marshal(rec_terms[0][0]) #Get all documents using terms from the selected documents if len(rec_terms) == 0: return ( None, "Warning: Record specified has no content indexed for use with this method.", "", voutput) else: terms = "%s" % rec_terms.keys() terms_recs = dict(
def bibreformat_task(fmt, sql, sql_queries, cds_query, process_format, process, recids): """ BibReformat main task @param fmt: output format to use @param sql: dictionary with pre-created sql queries for various cases (for selecting records). Some of these queries will be picked depending on the case @param sql_queries: a list of sql queries to be executed to select records to reformat. @param cds_query: a search query to be executed to select records to reformat @param process_format: @param process: @param recids: a list of record IDs to reformat @return: None """ write_message("Processing format %s" % fmt) t1 = os.times()[4] start_date = datetime.now() ### Query the database ### task_update_progress('Fetching records to process') if process_format: # '-without' parameter write_message("Querying database for records without cache...") without_format = without_fmt(sql) recIDs = intbitset(recids) if cds_query['field'] != "" or \ cds_query['collection'] != "" or \ cds_query['pattern'] != "": write_message("Querying database (CDS query)...") if cds_query['collection'] == "": # use search_pattern() whenever possible, as it can search # even in private collections res = search_pattern(p=cds_query['pattern'], f=cds_query['field'], m=cds_query['matching']) else: # use perform_request_search when '-c' argument has been # defined, as it is not supported by search_pattern() res = intbitset( perform_request_search(req=None, of='id', c=cds_query['collection'], p=cds_query['pattern'], f=cds_query['field'])) recIDs |= res for sql_query in sql_queries: write_message("Querying database (%s) ..." % sql_query, verbose=2) recIDs |= intbitset(run_sql(sql_query)) if fmt == "HDREF" and recIDs: # HDREF represents the references tab # the tab needs to be recomputed not only when the record changes # but also when one of the citations changes latest_bibrank_run = get_bibrankmethod_lastupdate('citation') sql = """SELECT id, modification_date FROM bibrec WHERE id in (%s)""" % ','.join(str(r) for r in recIDs) def check_date(mod_date): return mod_date < latest_bibrank_run recIDs = intbitset([recid for recid, mod_date in run_sql(sql) \ if check_date(mod_date)]) for r in recIDs: recIDs |= intbitset(get_cited_by(r)) ### list of corresponding record IDs was retrieved ### now format the selected records if process_format: write_message("Records to be processed: %d" % (len(recIDs) \ + len(without_format))) write_message("Out of it records without existing cache: %d" % len(without_format)) else: write_message("Records to be processed: %d" % (len(recIDs))) ### Initialize main loop total_rec = 0 # Total number of records tbibformat = 0 # time taken up by external call tbibupload = 0 # time taken up by external call ### Iterate over all records prepared in lists I (option) if process: if CFG_BIBFORMAT_USE_OLD_BIBFORMAT: # FIXME: remove this # when migration from php to # python bibformat is done (total_rec_1, tbibformat_1, tbibupload_1) = iterate_over_old(recIDs, fmt) else: (total_rec_1, tbibformat_1, tbibupload_1) = iterate_over_new(recIDs, fmt) total_rec += total_rec_1 tbibformat += tbibformat_1 tbibupload += tbibupload_1 ### Iterate over all records prepared in list II (no_format) if process_format and process: if CFG_BIBFORMAT_USE_OLD_BIBFORMAT: # FIXME: remove this # when migration from php to # python bibformat is done (total_rec_2, tbibformat_2, tbibupload_2) = iterate_over_old(without_format, fmt) else: (total_rec_2, tbibformat_2, tbibupload_2) = iterate_over_new(without_format, fmt) total_rec += total_rec_2 tbibformat += tbibformat_2 tbibupload += tbibupload_2 ### Store last run time if task_has_option("last"): write_message("storing run date to %s" % start_date) store_last_updated(fmt, start_date) ### Final statistics t2 = os.times()[4] elapsed = t2 - t1 message = "total records processed: %d" % total_rec write_message(message) message = "total processing time: %2f sec" % elapsed write_message(message) message = "Time spent on external call (os.system):" write_message(message) message = " bibformat: %2f sec" % tbibformat write_message(message) message = " bibupload: %2f sec" % tbibupload write_message(message)
def do_upgrade_atlantis(): field_id = run_sql("""INSERT INTO field SET name='note', code='note'""") tag_id = run_sql("""INSERT INTO tag SET name='note', value='500__a'""") run_sql("""INSERT INTO field_tag VALUES (%s, %s, 10)""", (field_id, tag_id))
def retrieve_bibdoc_bibdoc(): return run_sql('SELECT id_bibdoc1, id_bibdoc2 from bibdoc_bibdoc')
def guest_user_garbage_collector(): """Session Garbage Collector program flow/tasks: 1: delete expired sessions 1b:delete guest users without session 2: delete queries not attached to any user 3: delete baskets not attached to any user 4: delete alerts not attached to any user 5: delete expired mailcookies 5b: delete expired not confirmed email address 6: delete expired roles memberships verbose - level of program output. 0 - nothing 1 - default 9 - max, debug""" # dictionary used to keep track of number of deleted entries delcount = { 'session': 0, 'user': 0, 'user_query': 0, 'query': 0, 'bskBASKET': 0, 'user_bskBASKET': 0, 'bskREC': 0, 'bskRECORDCOMMENT': 0, 'bskEXTREC': 0, 'bskEXTFMT': 0, 'user_query_basket': 0, 'mail_cookie': 0, 'email_addresses': 0, 'role_membership': 0 } write_message("CLEANING OF GUEST SESSIONS STARTED") # 1 - DELETE EXPIRED SESSIONS write_message("- deleting expired sessions") timelimit = time.time() write_message(" DELETE FROM session WHERE" " session_expiry < %d \n" % (timelimit, ), verbose=9) delcount['session'] += run_sql("DELETE FROM session WHERE" " session_expiry < %s " "" % (timelimit, )) # 1b - DELETE GUEST USERS WITHOUT SESSION write_message("- deleting guest users without session") # get uids write_message( """ SELECT u.id\n FROM user AS u LEFT JOIN session AS s\n ON u.id = s.uid\n WHERE s.uid IS NULL AND u.email = ''""", verbose=9) result = run_sql("""SELECT u.id FROM user AS u LEFT JOIN session AS s ON u.id = s.uid WHERE s.uid IS NULL AND u.email = ''""") write_message(result, verbose=9) if result: # work on slices of result list in case of big result for i in range(0, len(result), CFG_MYSQL_ARGUMENTLIST_SIZE): # create string of uids uidstr = '' for (id_user, ) in result[i:i + CFG_MYSQL_ARGUMENTLIST_SIZE]: if uidstr: uidstr += ',' uidstr += "%s" % (id_user, ) # delete users write_message( " DELETE FROM user WHERE" " id IN (TRAVERSE LAST RESULT) AND email = '' \n", verbose=9) delcount['user'] += run_sql("DELETE FROM user WHERE" " id IN (%s) AND email = ''" % (uidstr, )) # 2 - DELETE QUERIES NOT ATTACHED TO ANY USER # first step, delete from user_query write_message("- deleting user_queries referencing" " non-existent users") # find user_queries referencing non-existent users write_message( " SELECT DISTINCT uq.id_user\n" " FROM user_query AS uq LEFT JOIN user AS u\n" " ON uq.id_user = u.id\n WHERE u.id IS NULL", verbose=9) result = run_sql("""SELECT DISTINCT uq.id_user FROM user_query AS uq LEFT JOIN user AS u ON uq.id_user = u.id WHERE u.id IS NULL""") write_message(result, verbose=9) # delete in user_query one by one write_message( " DELETE FROM user_query WHERE" " id_user = '******' \n", verbose=9) for (id_user, ) in result: delcount['user_query'] += run_sql("""DELETE FROM user_query WHERE id_user = %s""" % (id_user, )) # delete the actual queries write_message("- deleting queries not attached to any user") # select queries that must be deleted write_message( """ SELECT DISTINCT q.id\n FROM query AS q LEFT JOIN user_query AS uq\n ON uq.id_query = q.id\n WHERE uq.id_query IS NULL AND\n q.type <> 'p' """, verbose=9) result = run_sql("""SELECT DISTINCT q.id FROM query AS q LEFT JOIN user_query AS uq ON uq.id_query = q.id WHERE uq.id_query IS NULL AND q.type <> 'p'""") write_message(result, verbose=9) # delete queries one by one write_message( """ DELETE FROM query WHERE id = 'TRAVERSE LAST RESULT \n""", verbose=9) for (id_user, ) in result: delcount['query'] += run_sql("""DELETE FROM query WHERE id = %s""", (id_user, )) # 3 - DELETE BASKETS NOT OWNED BY ANY USER write_message("- deleting baskets not owned by any user") # select basket ids write_message( """ SELECT ub.id_bskBASKET\n FROM user_bskBASKET AS ub LEFT JOIN user AS u\n ON u.id = ub.id_user\n WHERE u.id IS NULL""", verbose=9) try: result = run_sql("""SELECT ub.id_bskBASKET FROM user_bskBASKET AS ub LEFT JOIN user AS u ON u.id = ub.id_user WHERE u.id IS NULL""") except: result = [] write_message(result, verbose=9) # delete from user_basket and basket one by one write_message( """ DELETE FROM user_bskBASKET WHERE id_bskBASKET = 'TRAVERSE LAST RESULT' """, verbose=9) write_message( """ DELETE FROM bskBASKET WHERE id = 'TRAVERSE LAST RESULT' """, verbose=9) write_message( """ DELETE FROM bskREC WHERE id_bskBASKET = 'TRAVERSE LAST RESULT'""", verbose=9) write_message( """ DELETE FROM bskRECORDCOMMENT WHERE id_bskBASKET = 'TRAVERSE LAST RESULT' \n""", verbose=9) for (id_basket, ) in result: delcount['user_bskBASKET'] += run_sql( """DELETE FROM user_bskBASKET WHERE id_bskBASKET = %s""", (id_basket, )) delcount['bskBASKET'] += run_sql( """DELETE FROM bskBASKET WHERE id = %s""", (id_basket, )) delcount['bskREC'] += run_sql( """DELETE FROM bskREC WHERE id_bskBASKET = %s""", (id_basket, )) delcount['bskRECORDCOMMENT'] += run_sql( """DELETE FROM bskRECORDCOMMENT WHERE id_bskBASKET = %s""", (id_basket, )) write_message( """ SELECT DISTINCT ext.id, rec.id_bibrec_or_bskEXTREC FROM bskEXTREC AS ext \nLEFT JOIN bskREC AS rec ON ext.id=-rec.id_bibrec_or_bskEXTREC WHERE id_bibrec_or_bskEXTREC is NULL""", verbose=9) try: result = run_sql("""SELECT DISTINCT ext.id FROM bskEXTREC AS ext LEFT JOIN bskREC AS rec ON ext.id=-rec.id_bibrec_or_bskEXTREC WHERE id_bibrec_or_bskEXTREC is NULL""") except: result = [] write_message(result, verbose=9) write_message( """ DELETE FROM bskEXTREC WHERE id = 'TRAVERSE LAST RESULT' """, verbose=9) write_message( """ DELETE FROM bskEXTFMT WHERE id_bskEXTREC = 'TRAVERSE LAST RESULT' \n""", verbose=9) for (id_basket, ) in result: delcount['bskEXTREC'] += run_sql( """DELETE FROM bskEXTREC WHERE id=%s""", (id_basket, )) delcount['bskEXTFMT'] += run_sql( """DELETE FROM bskEXTFMT WHERE id_bskEXTREC=%s""", (id_basket, )) # 4 - DELETE ALERTS NOT OWNED BY ANY USER write_message('- deleting alerts not owned by any user') # select user ids in uqb that reference non-existent users write_message( """SELECT DISTINCT uqb.id_user FROM user_query_basket AS uqb LEFT JOIN user AS u ON uqb.id_user = u.id WHERE u.id IS NULL""", verbose=9) result = run_sql( """SELECT DISTINCT uqb.id_user FROM user_query_basket AS uqb LEFT JOIN user AS u ON uqb.id_user = u.id WHERE u.id IS NULL""" ) write_message(result, verbose=9) # delete all these entries for (id_user, ) in result: write_message( """DELETE FROM user_query_basket WHERE id_user = '******'user_query_basket'] += run_sql( """DELETE FROM user_query_basket WHERE id_user = %s """, (id_user, )) # 5 - delete expired mailcookies write_message("""mail_cookie_gc()""", verbose=9) delcount['mail_cookie'] = mail_cookie_gc() ## 5b - delete expired not confirmed email address write_message( """DELETE FROM user WHERE note='2' AND NOW()>ADDTIME(last_login, '%s 0:0:0')""" % CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS, verbose=9) delcount['email_addresses'] = run_sql( """DELETE FROM user WHERE note='2' AND NOW()>ADDTIME(last_login, '%s 0:0:0')""" % CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS) # 6 - delete expired roles memberships write_message("""DELETE FROM user_accROLE WHERE expiration<NOW()""", verbose=9) delcount['role_membership'] = run_sql( """DELETE FROM user_accROLE WHERE expiration<NOW()""") # print STATISTICS write_message("""- statistics about deleted data: """) write_message(""" %7s sessions.""" % (delcount['session'], )) write_message(""" %7s users.""" % (delcount['user'], )) write_message(""" %7s user_queries.""" % (delcount['user_query'], )) write_message(""" %7s queries.""" % (delcount['query'], )) write_message(""" %7s baskets.""" % (delcount['bskBASKET'], )) write_message(""" %7s user_baskets.""" % (delcount['user_bskBASKET'], )) write_message(""" %7s basket_records.""" % (delcount['bskREC'], )) write_message(""" %7s basket_external_records.""" % (delcount['bskEXTREC'], )) write_message(""" %7s basket_external_formats.""" % (delcount['bskEXTFMT'], )) write_message(""" %7s basket_comments.""" % (delcount['bskRECORDCOMMENT'], )) write_message(""" %7s user_query_baskets.""" % (delcount['user_query_basket'], )) write_message(""" %7s mail_cookies.""" % (delcount['mail_cookie'], )) write_message(""" %7s non confirmed email addresses.""" % delcount['email_addresses']) write_message(""" %7s role_memberships.""" % (delcount['role_membership'], )) write_message("""CLEANING OF GUEST SESSIONS FINISHED""")
def tearDown(self): # Clean DB entries run_sql(""" DELETE FROM seqSTORE WHERE seq_name="texkey" AND seq_value IN ("%s", "%s", "%s") """ % (self.texkey1, self.texkey2, self.texkey3))
def get_recid_from_docid(docid): return run_sql('SELECT id_bibrec FROM bibrec_bibdoc WHERE id_bibdoc=%s', (docid, ))
def wait_for_task(task_id): sql = 'select status from schTASK where id = %s' while run_sql(sql, [task_id])[0][0] not in ('DONE', 'ACK', 'ACK DONE'): task_sleep_now_if_required(True) time.sleep(5)
except Exception, err: msg = "WARNING: when opening docid %s: %s" % (id_bibdoc1, err) print >> logfile, msg print msg return True try: msg = "Fixing icon for the document %s" % (id_bibdoc1, ) print msg, print >> logfile, msg, the_icon = BibDoc.create_instance(id_bibdoc2) for a_file in the_icon.list_latest_files(): the_bibdoc.add_icon(a_file.get_full_path(), format=a_file.get_format()) the_icon.delete() run_sql( "DELETE FROM bibdoc_bibdoc WHERE id_bibdoc1=%s AND id_bibdoc2=%s", (id_bibdoc1, id_bibdoc2)) print "OK" print >> logfile, "OK" return True except Exception, err: print "ERROR: %s" % err print >> logfile, "ERROR: %s" % err register_exception() return False def main(): """Core loop.""" check_running_process_user() logfilename = '%s/fulltext_files_migration_kit-%s.log' % (
def Send_APP_Mail(parameters, curdir, form, user_info=None): """ This function send an email informing the original submitter of a document that the referee has approved/ rejected the document. The email is also sent to the referee for checking. Parameters: * addressesAPP: email addresses of the people who will receive this email (comma separated list). this parameter may contain the <CATEG> string. In which case the variable computed from the [categformatAFP] parameter replaces this string. eg.: "<CATEG>[email protected]" * categformatAPP contains a regular expression used to compute the category of the document given the reference of the document. eg.: if [categformatAFP]="TEST-<CATEG>-.*" and the reference of the document is "TEST-CATEGORY1-2001-001", then the computed category equals "CATEGORY1" * newrnin: Name of the file containing the 2nd reference of the approved document (if any). * edsrn: Name of the file containing the reference of the approved document. """ global titlevalue, authorvalue, emailvalue, sysno, rn FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME, CFG_SITE_SUPPORT_EMAIL) sequence_id = bibtask_allocate_sequenceid(curdir) doctype = form['doctype'] titlevalue = titlevalue.replace("\n", " ") authorvalue = authorvalue.replace("\n", "; ") # variables declaration categformat = parameters['categformatAPP'] otheraddresses = parameters['addressesAPP'] newrnpath = parameters['newrnin'] ## Get the name of the decision file: try: decision_filename = parameters['decision_file'] except KeyError: decision_filename = "" ## Get the name of the comments file: try: comments_filename = parameters['comments_file'] except KeyError: comments_filename = "" ## Now try to read the comments from the comments_filename: if comments_filename in (None, "", "NULL"): ## We don't have a name for the comments file. ## For backward compatibility reasons, try to read the comments from ## a file called 'COM' in curdir: if os.path.exists("%s/COM" % curdir): try: fh_comments = open("%s/COM" % curdir, "r") comment = fh_comments.read() fh_comments.close() except IOError: ## Unable to open the comments file exception_prefix = "Error in WebSubmit function " \ "Send_APP_Mail. Tried to open " \ "comments file [%s/COM] but was " \ "unable to." % curdir register_exception(prefix=exception_prefix) comment = "" else: comment = comment.strip() else: comment = "" else: ## Try to read the comments from the comments file: if os.path.exists("%s/%s" % (curdir, comments_filename)): try: fh_comments = open("%s/%s" % (curdir, comments_filename), "r") comment = fh_comments.read() fh_comments.close() except IOError: ## Oops, unable to open the comments file. comment = "" exception_prefix = "Error in WebSubmit function " \ "Send_APP_Mail. Tried to open comments " \ "file [%s/%s] but was unable to." \ % (curdir, comments_filename) register_exception(prefix=exception_prefix) else: comment = comment.strip() else: comment = "" ## Now try to read the decision from the decision_filename: if decision_filename in (None, "", "NULL"): ## We don't have a name for the decision file. ## For backward compatibility reasons, try to read the decision from ## a file called 'decision' in curdir: if os.path.exists("%s/decision" % curdir): try: fh_decision = open("%s/decision" % curdir, "r") decision = fh_decision.read() fh_decision.close() except IOError: ## Unable to open the decision file exception_prefix = "Error in WebSubmit function " \ "Send_APP_Mail. Tried to open " \ "decision file [%s/decision] but was " \ "unable to." % curdir register_exception(prefix=exception_prefix) decision = "" else: decision = decision.strip() else: decision = "" else: ## Try to read the decision from the decision file: try: fh_decision = open("%s/%s" % (curdir, decision_filename), "r") decision = fh_decision.read() fh_decision.close() except IOError: ## Oops, unable to open the decision file. decision = "" exception_prefix = "Error in WebSubmit function " \ "Send_APP_Mail. Tried to open decision " \ "file [%s/%s] but was unable to." \ % (curdir, decision_filename) register_exception(prefix=exception_prefix) else: decision = decision.strip() if os.path.exists("%s/%s" % (curdir, newrnpath)): fp = open("%s/%s" % (curdir, newrnpath), "r") newrn = fp.read() fp.close() else: newrn = "" # Document name res = run_sql("SELECT ldocname FROM sbmDOCTYPE WHERE sdocname=%s", (doctype, )) docname = res[0][0] # retrieve category categformat = categformat.replace("<CATEG>", "([^-]*)") m_categ_search = re.match(categformat, rn) if m_categ_search is not None: if len(m_categ_search.groups()) > 0: ## Found a match for the category of this document. Get it: category = m_categ_search.group(1) else: ## This document has no category. category = "unknown" else: category = "unknown" ## Get the referee email address: if CFG_CERN_SITE: ## The referees system in CERN now works with listbox membership. ## List names should take the format ## "*****@*****.**" ## Make sure that your list exists! ## FIXME - to be replaced by a mailing alias in webaccess in the ## future. referee_listname = "service-cds-referee-%s" % doctype.lower() if category != "": referee_listname += "-%s" % category.lower() referee_listname += "@cern.ch" addresses = referee_listname else: # Build referee's email address refereeaddress = "" # Try to retrieve the referee's email from the referee's database for user in acc_get_role_users( acc_get_role_id("referee_%s_%s" % (doctype, category))): refereeaddress += user[1] + "," # And if there is a general referee for user in acc_get_role_users( acc_get_role_id("referee_%s_*" % doctype)): refereeaddress += user[1] + "," refereeaddress = re.sub(",$", "", refereeaddress) # Creation of the mail for the referee otheraddresses = otheraddresses.replace("<CATEG>", category) addresses = "" if refereeaddress != "": addresses = refereeaddress + "," if otheraddresses != "": addresses += otheraddresses else: addresses = re.sub(",$", "", addresses) ## Add the record's submitter(s) into the list of recipients: ## Get the email address(es) of the record submitter(s)/owner(s) from ## the record itself: record_owners = print_record(sysno, 'tm', \ [CFG_WEBSUBMIT_RECORD_OWNER_EMAIL]).strip() if record_owners != "": record_owners_list = record_owners.split("\n") record_owners_list = [email.lower().strip() \ for email in record_owners_list] else: #if the record owner can not be retrieved from the metadata #(in case the record has not been inserted yet), #try to use the global variable emailvalue try: record_owners_list = [emailvalue] except NameError: record_owners_list = [] record_owners = ",".join([owner for owner in record_owners_list]) if record_owners != "": addresses += ",%s" % record_owners if decision == "approve": mailtitle = "%s has been approved" % rn mailbody = "The %s %s has been approved." % (docname, rn) mailbody += "\nIt will soon be accessible here:\n\n<%s/%s/%s>" % ( CFG_SITE_URL, CFG_SITE_RECORD, sysno) else: mailtitle = "%s has been rejected" % rn mailbody = "The %s %s has been rejected." % (docname, rn) if rn != newrn and decision == "approve" and newrn != "": mailbody += "\n\nIts new reference number is: %s" % newrn mailbody += "\n\nTitle: %s\n\nAuthor(s): %s\n\n" % (titlevalue, authorvalue) if comment != "": mailbody += "Comments from the referee:\n%s\n" % comment # Send mail to referee if any recipients or copy to admin if addresses or CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN: scheduled_send_email( FROMADDR, addresses, mailtitle, mailbody, copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN, other_bibtasklet_arguments=['-I', str(sequence_id)]) return ""
def fetch_records_modified_since(last_date): """Fetch all the recids of records modified since last_date in the system """ return intbitset(run_sql("SELECT id FROM bibrec WHERE" " modification_date>=%s", (last_date, )))
def hoover(authors=None, check_db_consistency=False, dry_run=False, packet_size=1000, dry_hepnames_run=False, open_tickets=False, queue='Test'): """The actions that hoover performs are the following: 1. Find out the identifiers that belong to the authors(pids) in the database 2. Find and pull all the signatures that have the same identifier as the author to the author 3. Connect the profile of the author with the hepnames collection entry (optional) check the database to see if it is in a consistent state Keyword arguments: authors -- an iterable of authors to be hoovered check_db_consistency -- perform checks for the consistency of the database dry_run -- do not alter the database tables packet_size -- squeeze together the marcxml. This there are fewer bibupload processes for the bibsched to run. dry_hepnames_run -- do not alter the hepnames collection queue -- the name of the queue to be used in the rt system for the tickets """ global rt_ticket_report rt_ticket_report = open_tickets write_message("Packet size {0}".format(packet_size), verbose=1) write_message("Initializing hoover", verbose=1) write_message("Selecting records with identifiers...", verbose=1) recs = get_records_with_tag('100__i') task_sleep_now_if_required(can_stop_too=True) recs += get_records_with_tag('100__j') task_sleep_now_if_required(can_stop_too=True) recs += get_records_with_tag('700__i') task_sleep_now_if_required(can_stop_too=True) recs += get_records_with_tag('700__j') task_sleep_now_if_required(can_stop_too=True) write_message("Found {0} records".format(len(set(recs))), verbose=2) recs = set(recs) & set( run_sql("select DISTINCT(bibrec) from aidPERSONIDPAPERS")) write_message(" out of which {0} are in BibAuthorID".format(len(recs)), verbose=2) task_sleep_now_if_required(can_stop_too=True) records_with_id = set(rec[0] for rec in recs) destroy_partial_marc_caches() populate_partial_marc_caches(records_with_id, create_inverted_dicts=True) if rt_ticket_report: global ticket_hashes write_message("Ticketing system rt is used", verbose=9) write_message("Building hash cache for tickets for queue %s" % queue, verbose=9) ticket_ids = BIBCATALOG_SYSTEM.ticket_search(None, subject='[Hoover]', queue=queue) write_message("Found %s existing tickets" % len(ticket_ids), verbose=9) for ticket_id in ticket_ids: task_sleep_now_if_required(can_stop_too=True) try: ticket_data = BIBCATALOG_SYSTEM.ticket_get_info( None, ticket_id) ticket_hashes[ticket_data['subject'].split() [-1]] = ticket_data, ticket_id, False except IndexError: write_message( "Problem in subject of ticket {0}".format(ticket_id), verbose=5) write_message("Found {0} tickets".format(len(ticket_hashes)), verbose=2) task_sleep_now_if_required(can_stop_too=True) fdict_id_getters = { "INSPIREID": { 'reliable': [ get_inspire_id_of_author, get_inspireID_from_hepnames, lambda pid: get_inspireID_from_claimed_papers( pid, intersection_set=records_with_id, queue=queue) ], 'unreliable': [ lambda pid: get_inspireID_from_unclaimed_papers( pid, intersection_set=records_with_id, queue=queue) ], 'signatures_getter': get_signatures_with_inspireID, 'connection': dict_entry_for_hepnames_connector, 'data_dicts': { 'pid_mapping': defaultdict(set), 'id_mapping': defaultdict(set) } }, "ORCID": { 'reliable': [ # get_orcid_id_of_author, # get_inspireID_from_hepnames, # lambda pid: get_inspireID_from_claimed_papers(pid, # intersection_set=records_with_id)] ], 'unreliable': [ # get_inspireID_from_hepnames, # lambda pid: get_inspireID_from_claimed_papers(pid, # intersection_set=records_with_id)] ], 'signatures_getter': lambda x: list(), 'connection': lambda pid, _id: None, 'data_dicts': { 'pid_mapping': defaultdict(set), 'id_mapping': defaultdict(set) } } } if not authors: authors = get_existing_authors() write_message("Running on {0}".format(len(authors)), verbose=2) unclaimed_authors = defaultdict(set) hep_connector = HepnamesConnector(packet_size=packet_size, dry_hepnames_run=dry_hepnames_run) for index, pid in enumerate(authors): task_sleep_now_if_required(can_stop_too=True) write_message("Searching for reliable ids of person {0}".format(pid), verbose=2) for identifier_type, functions in fdict_id_getters.iteritems(): write_message(" Type: {0}".format(identifier_type, ), verbose=9) try: G = (func(pid) for func in functions['reliable']) if check_db_consistency: results = filter(None, (func for func in G if func)) try: # check if this is reduntant if len(results) == 1: consistent_db = True else: consistent_db = len(set(results)) <= 1 res = results[0] except IndexError: res = None else: if not consistent_db: res = None raise InconsistentIdentifiersException( 'Inconsistent database', pid, identifier_type, set(results)) else: res = next((func for func in G if func), None) except MultipleIdsOnSingleAuthorException as e: open_rt_ticket(e, queue=queue) except BrokenHepNamesRecordException as e: continue except InconsistentIdentifiersException as e: open_rt_ticket(e, queue=queue) except MultipleHepnamesRecordsWithSameIdException as e: open_rt_ticket(e, queue=queue) else: if res: HooverStats.new_ids_found += 1 write_message(" Found reliable id {0}".format(res, ), verbose=9) fdict_id_getters[identifier_type]['data_dicts'][ 'pid_mapping'][pid].add(res) fdict_id_getters[identifier_type]['data_dicts'][ 'id_mapping'][res].add(pid) else: write_message(" No reliable id found", verbose=9) unclaimed_authors[identifier_type].add(pid) write_message("Vacuuming reliable ids...", verbose=2) for identifier_type, data in fdict_id_getters.iteritems(): task_sleep_now_if_required(can_stop_too=True) hep_connector.produce_connection_entry = fdict_id_getters[ identifier_type]['connection'] for pid, identifiers in data['data_dicts']['pid_mapping'].iteritems(): write_message( " Person {0} has reliable identifier(s) {1} ".format( str(pid), str(identifiers)), verbose=9) try: if len(identifiers) == 1: identifier = list(identifiers)[0] write_message( " Considering {0}".format(identifier), verbose=9) if len(data['data_dicts']['id_mapping'][identifier]) == 1: if not dry_run: rowenta = Vacuumer(pid) signatures = data['signatures_getter'](identifier) write_message( " Vacuuming {0} signatures! ".format( str(len(signatures))), verbose=4) for sig in signatures: try: rowenta.vacuum_signature(sig) except DuplicateClaimedPaperException as e: open_rt_ticket(e, queue=queue) except DuplicateUnclaimedPaperException as e: unclaimed_authors[identifier_type].add( e.pid) write_message( " Adding inspireid {0} to pid {1}". format(identifier, pid), verbose=3) add_external_id_to_author(pid, identifier_type, identifier) hep_connector.add_connection(pid, identifier) else: raise MultipleAuthorsWithSameIdException( "More than one authors with the same identifier", data['data_dicts']['id_mapping'][identifier], identifier) else: raise MultipleIdsOnSingleAuthorException( "More than one identifier on a single author ", pid, 'INSPIREID', identifiers) except MultipleAuthorsWithSameIdException as e: open_rt_ticket(e, queue=queue) except MultipleIdsOnSingleAuthorException as e: open_rt_ticket(e, queue=queue) except MultipleHepnamesRecordsWithSameIdException as e: open_rt_ticket(e, queue=queue) write_message(" Done with {0}".format(pid, ), verbose=3) write_message("Vacuuming unreliable ids...", verbose=2) for identifier_type, functions in fdict_id_getters.iteritems(): task_sleep_now_if_required(can_stop_too=True) hep_connector.produce_connection_entry = fdict_id_getters[ identifier_type]['connection'] for index, pid in enumerate(unclaimed_authors[identifier_type]): write_message( "Searching for unreliable ids of person {0}".format(pid), verbose=9) try: G = (func(pid) for func in functions['unreliable']) res = next((func for func in G if func), None) if res is None: continue except MultipleIdsOnSingleAuthorException as e: continue except BrokenHepNamesRecordException as e: continue except MultipleHepnamesRecordsWithSameIdException as e: open_rt_ticket(e, queue=queue) HooverStats.new_ids_found += 1 write_message( " Person {0} has unreliable identifier {1} ".format( str(pid), str(res)), verbose=9) if res in fdict_id_getters[identifier_type]['data_dicts'][ 'id_mapping']: write_message( " Id {0} is already assigned to another person, skipping person {1} " .format(str(res), pid)) continue if not dry_run: rowenta = Vacuumer(pid) signatures = functions['signatures_getter'](res) for sig in signatures: try: rowenta.vacuum_signature(sig) except DuplicateClaimedPaperException as e: open_rt_ticket(e, queue=queue) except DuplicateUnclaimedPaperException as e: pass write_message(" Adding inspireid {0} to pid {1}".format( res, pid), verbose=3) add_external_id_to_author(pid, identifier_type, res) hep_connector.add_connection(pid, res) write_message(" Done with {0}".format(pid), verbose=3) hep_connector.execute_connection() for ticket in ticket_hashes: if ticket[2] == False: BIBCATALOG_SYSTEM.ticket_set_attribute(None, ticket[1], 'status', 'resolved') HooverStats.report_results() write_message("Terminating hoover", verbose=1)
return errorMsg(str(e), req, ln=ln) if u_email == "guest" or u_email == "": return warningMsg(websubmit_templates.tmpl_warning_message( ln=ln, msg=_("Sorry, you must log in to perform this action."), ), req, ln=ln) if deletedId != "": t += deleteSubmission(deletedId, deletedAction, deletedDoctype, u_email) # doctypes res = run_sql("select ldocname,sdocname from sbmDOCTYPE order by ldocname") doctypes = [] for row in res: doctypes.append({ 'id': row[1], 'name': row[0], 'selected': (doctype == row[1]), }) # submissions # request order default value reqorder = "sbmSUBMISSIONS.md DESC, lactname" # requested value if order == "actiondown": reqorder = "lactname ASC, sbmSUBMISSIONS.md DESC" elif order == "actionup":
def all_records(): """Produce record IDs for all available records.""" return intbitset(run_sql("SELECT id FROM bibrec"))
def CONFSUBMIT_Send_Approval_Request(parameters, curdir, form, user_info=None): """ This function sends an email to the referee in order to start the simple approval process. This function is very CERN-specific and should be changed in case of external use. Must be called after the Get_Report_Number function. Parameters: * addressesDAM: email addresses of the people who will receive this email (comma separated list). this parameter may contain the <CATEG> string. In which case the variable computed from the [categformatDAM] parameter replaces this string. eg.:"<CATEG>[email protected]" * categformatDAM: contains a regular expression used to compute the category of the document given the reference of the document. eg.: if [categformatAFP]="TEST-<CATEG>-.*" and the reference of the document is "TEST-CATEGORY1-2001-001", then the computed category equals "CATEGORY1" * titleFile: name of the file in which the title is stored. * submitteremailfile: name of the file in which the title is stored. * submitternamefile: name of the file in which the title is stored. * contactnamefile: name of the file in which the title is stored. * contactemailfile: name of the file in which the title is stored. * referencefile: name of the file in which the title is stored. * affiliationfile: name of the file in which the title is stored. * regionfile: name of the file in which the title is stored. * rankfile: name of the file in which the title is stored. * fieldfile: name of the file in which the title is stored. * experimentsfile: name of the file in which the title is stored. * urlfile: name of the file in which the title is stored. * datefile: name of the file in which the title is stored. * abstractfile: name of the file in which the title is stored. * seriesnamefile: name of the file where the series name is stored. * seriesnumberfile: name of the file where the series number is stored. * directory: parameter used to create the URL to access the files. """ global rn, sysno # variables declaration doctype = re.search(".*/([^/]*)/([^/]*)/[^/]*$", curdir).group(2) otheraddresses = parameters['addressesDAM'] categformat = parameters['categformatDAM'] # retrieve category categformat = categformat.replace("<CATEG>", "([^-]*)") m_categ_search = re.match(categformat, rn) if m_categ_search is not None: if len(m_categ_search.groups()) > 0: ## Found a match for the category of this document. Get it: category = m_categ_search.group(1) else: ## This document has no category. category = "unknown" else: category = "unknown" # get record data date = get_file_contents(curdir, "date") title = get_file_contents(curdir, parameters['titleFile']).replace("\n", "") title += " - %s" % date submitteremail = get_file_contents( curdir, parameters['submitteremailfile']).replace("\n", ", ") submittername = get_file_contents(curdir, parameters['submitternamefile']).replace( "\n", ", ") contactname = get_file_contents(curdir, parameters['contactnamefile']).replace( "\n", ", ") contactemail = get_file_contents(curdir, parameters['contactemailfile']).replace( "\n", ", ") subtitle = get_file_contents(curdir, parameters['subtitle']).replace("\n", ", ") city = get_file_contents(curdir, parameters['cityfile']).replace("\n", ", ") country = get_file_contents(curdir, parameters['countryfile']).replace("\n", ", ") state = get_file_contents(curdir, parameters['statefile']).replace("\n", ", ") stdate = get_file_contents(curdir, parameters['stdatefile']).replace("\n", ", ") fndate = get_file_contents(curdir, parameters['fndatefile']).replace("\n", ", ") field = get_file_contents(curdir, parameters['fieldfile']).replace("\n", ", ") url = get_file_contents(curdir, parameters['urlfile']).replace("\n", " ") shorttitle = get_file_contents(curdir, parameters['shorttitle']).replace( "\n", " ") keywords = get_file_contents(curdir, parameters['keywords']).replace("\n", " ") proceedings = get_file_contents(curdir, parameters['proceedings']).replace( "\n", " ") seriesname = get_file_contents(curdir, parameters['seriesnamefile']).replace( "\n", " ") seriesnumber = get_file_contents(curdir, parameters['seriesnumberfile']).replace( "\n", " ") abstract = get_file_contents(curdir, parameters['abstractfile']) # we get the referee password sth = run_sql("SELECT access FROM sbmAPPROVAL WHERE rn=%s", (rn, )) if len(sth) > 0: access = sth[0][0] # Build referee's email address refereeaddress = "" # Try to retrieve the referee's email from the referee's database for user in acc_get_role_users( acc_get_role_id("referee_%s_%s" % (doctype, category))): refereeaddress += user[1] + "," # And if there are general referees for user in acc_get_role_users(acc_get_role_id("referee_%s_*" % doctype)): refereeaddress += user[1] + "," refereeaddress = re.sub(",$", "", refereeaddress) # Creation of the mail for the referee addresses = "" if refereeaddress != "": addresses = refereeaddress + "," if otheraddresses != "": addresses += otheraddresses else: addresses = re.sub(",$", "", addresses) record_url = "%s/%s/%s" % (CFG_SITE_URL, CFG_SITE_RECORD, sysno) title_referee = "Request for approval of %s" % rn mail_referee = """ The document %(rn)s has been submitted to the Conferences database and it will appear here:\n%(recordlink)s. To approve/reject the document, you should go to this URL:\n%(access)s\n Title: %(title)s Date: from %(stdate)s to %(fndate)s Place: %(city)s, %(state)s, %(country)s Series name: %(seriesname)s Series number: %(seriesnumber)s URL: %(url)s Field(s): %(field)s Description: %(abstract)s Contact name(s): %(contactname)s Contact email(s): %(contactemail)s Submitter name(s): %(submittername)s Submitter email(s): %(submitteremail)s """ % { 'rn': rn, 'title': title, 'submitteremail': submitteremail, 'submittername': submittername, 'contactname': contactname, 'contactemail': contactemail, 'field': field, 'city': city, 'state': state, 'country': country, 'stdate': stdate, 'fndate': fndate, 'url': url, 'subtitle': subtitle, 'shorttitle': shorttitle, 'proceedings': proceedings, 'keywords': keywords, 'access': "%s/approve.py?access=%s" % (CFG_SITE_URL, access), 'recordlink': record_url, 'abstract': abstract, 'seriesname': seriesname, 'seriesnumber': seriesnumber } #Send mail to referee send_email(fromaddr=CFG_WEBSUBMIT_CONF_FROMADDR, toaddr=CFG_WEBSUBMIT_CONF_SUPPORT_EMAIL, subject=title_referee, \ content=mail_referee, footer=email_footer(support_email=CFG_WEBSUBMIT_CONF_SUPPORT_EMAIL), copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN, bccaddr=addresses, replytoaddr=contactemail) return ""
def get_expired_person_ids(expire_delay_days=CFG_WEBAUTHORPROFILE_CACHE_EXPIRED_DELAY_BIBSCHED): """ Returns pids with expired caches. """ keys = run_sql("select object_key from wapCACHE where object_status=%s or last_updated < " "timestampadd(day, -%s, now())", ('Expired', expire_delay_days)) keys = [int(x[0].split(':')[1]) for x in set(keys) if ':' in x[0]] return keys
def store_last_updated(fmt, iso_date): sql = "UPDATE format SET last_updated = %s " \ "WHERE code = %s AND (last_updated < %s or last_updated IS NULL)" run_sql(sql, (iso_date, fmt.lower(), iso_date))
def expire_cache_element(name, key): """ Sets cache element status to 'Expired'. """ run_sql("update wapCACHE set object_status=%s where " "object_name=%s and object_key=%s", ('Expired', str(name), str(key)))
def tearDown(self): """Remove inserted comments""" run_sql("""DELETE FROM cmtRECORDCOMMENT WHERE id=%s""", (self.public_comid, )) run_sql("""DELETE FROM cmtRECORDCOMMENT WHERE id=%s""", (self.restr_comid_1, )) run_sql("""DELETE FROM cmtRECORDCOMMENT WHERE id=%s""", (self.restr_comid_2, )) if self.restr_comid_3: run_sql("""DELETE FROM cmtRECORDCOMMENT WHERE id=%s""", (self.restr_comid_3, )) run_sql("""DELETE FROM cmtRECORDCOMMENT WHERE id=%s""", (self.restr_comid_4, )) run_sql("""DELETE FROM cmtRECORDCOMMENT WHERE id=%s""", (self.restr_comid_5, )) run_sql("""DELETE FROM cmtRECORDCOMMENT WHERE id=%s""", (self.deleted_comid, )) pass
def precache_element(name, key): """ Updates the last_updated flag of a cache to prevent parallel recomputation of the same cache. """ run_sql("insert into wapCACHE (object_name,object_key,last_updated,object_status) values (%s,%s,now(),%s) " "on duplicate key update last_updated=now(),object_status=%s" , (str(name), str(key), 'Precached', 'Precached'))
def expire_all_cache_for_person(person_id): """ Expires all caches for person n.canonical.1 """ run_sql("DELETE FROM wapCACHE WHERE object_key=%s", ('pid:' + str(person_id),))
def test_process_one(self): from invenio import arxiv_pdf_checker from invenio.arxiv_pdf_checker import process_one, \ FoundExistingPdf, \ fetch_arxiv_pdf_status, \ STATUS_OK, \ AlreadyHarvested arxiv_pdf_checker.CFG_ARXIV_URL_PATTERN = EXAMPLE_PDF_URL_1 + "?%s%s" # Make sure there is no harvesting state stored or this test will fail run_sql('DELETE FROM bibARXIVPDF WHERE id_bibrec = %s', [self.recid]) def look_for_fulltext(recid): """Look for fulltext pdf (bibdocfile) for a given recid""" rec_info = BibRecDocs(recid) docs = rec_info.list_bibdocs() for doc in docs: for d in doc.list_all_files(): if d.get_format().strip('.') in ['pdf', 'pdfa', 'PDF']: try: yield doc, d except InvenioBibDocFileError: pass # Remove all pdfs from record 3 for doc, docfile in look_for_fulltext(self.recid): doc.delete_file(docfile.get_format(), docfile.get_version()) if not doc.list_all_files(): doc.expunge() try: process_one(self.recid) finally: self.clean_bibtask() # Check for existing pdf docs = list(look_for_fulltext(self.recid)) if not docs: self.fail() # Check that harvesting state is stored status, version = fetch_arxiv_pdf_status(self.recid) self.assertEqual(status, STATUS_OK) self.assertEqual(version, 1) try: process_one(self.recid) self.fail() except AlreadyHarvested: pass # Even though the version is changed the md5 is the same self.arxiv_version = 2 try: process_one(self.recid) self.fail() except FoundExistingPdf: pass arxiv_pdf_checker.CFG_ARXIV_URL_PATTERN = EXAMPLE_PDF_URL_2 + "?%s%s" self.arxiv_version = 3 try: process_one(self.recid) finally: self.clean_bibtask() # We know the PDF is attached, run process_one again # and it needs to raise an error try: process_one(self.recid) self.fail() except AlreadyHarvested: run_sql('DELETE FROM bibARXIVPDF WHERE id_bibrec = %s', [self.recid]) # Restore state for doc, docfile in docs: doc.delete_file(docfile.get_format(), docfile.get_version()) if not doc.list_all_files(): doc.expunge() self.clean_bibupload_fft()
def cache_element(name, key, value): """ Insert an element into cache or update already present element. """ run_sql("insert into wapCACHE (object_name,object_key,object_value,object_status,last_updated) values (%s,%s,%s,%s,now()) " "on duplicate key update object_value=%s,last_updated=now(),object_status=%s" , (str(name), str(key), str(value), 'UpToDate', str(value), 'UpToDate'))
def clean_bibupload_fft(self): run_sql("""DELETE FROM schTASK WHERE proc = 'bibupload:FFT' ORDER BY id DESC LIMIT 1""")
def rank_by_method(rank_method_code, lwords, hitset, rank_limit_relevance, verbose): """Ranking of records based on predetermined values. input: rank_method_code - the code of the method, from the name field in rnkMETHOD, used to get predetermined values from rnkMETHODDATA lwords - a list of words from the query hitset - a list of hits for the query found by search_engine rank_limit_relevance - show only records with a rank value above this verbose - verbose value output: reclist - a list of sorted records, with unsorted added to the end: [[23,34], [344,24], [1,01]] prefix - what to show before the rank value postfix - what to show after the rank value voutput - contains extra information, content dependent on verbose value""" global voutput voutput = "" rnkdict = run_sql( "SELECT relevance_data FROM rnkMETHODDATA,rnkMETHOD where rnkMETHOD.id=id_rnkMETHOD and rnkMETHOD.name=%s", (rank_method_code, )) if not rnkdict: return (None, "Warning: Could not load ranking data for method %s." % rank_method_code, "", voutput) max_recid = 0 res = run_sql("SELECT max(id) FROM bibrec") if res and res[0][0]: max_recid = int(res[0][0]) lwords_hitset = None for j in range( 0, len(lwords) ): #find which docs to search based on ranges..should be done in search_engine... if lwords[j] and lwords[j][:6] == "recid:": if not lwords_hitset: lwords_hitset = intbitset() lword = lwords[j][6:] if string.find(lword, "->") > -1: lword = string.split(lword, "->") if int(lword[0]) >= max_recid or int( lword[1]) >= max_recid + 1: return (None, "Warning: Given record IDs are out of range.", "", voutput) for i in range(int(lword[0]), int(lword[1])): lwords_hitset.add(int(i)) elif lword < max_recid + 1: lwords_hitset.add(int(lword)) else: return (None, "Warning: Given record IDs are out of range.", "", voutput) rnkdict = deserialize_via_marshal(rnkdict[0][0]) if verbose > 0: voutput += "<br />Running rank method: %s, using rank_by_method function in bibrank_record_sorter<br />" % rank_method_code voutput += "Ranking data loaded, size of structure: %s<br />" % len( rnkdict) lrecIDs = list(hitset) if verbose > 0: voutput += "Number of records to rank: %s<br />" % len(lrecIDs) reclist = [] reclist_addend = [] if not lwords_hitset: #rank all docs, can this be speed up using something else than for loop? for recID in lrecIDs: if rnkdict.has_key(recID): reclist.append((recID, rnkdict[recID])) del rnkdict[recID] else: reclist_addend.append((recID, 0)) else: #rank docs in hitset, can this be speed up using something else than for loop? for recID in lwords_hitset: if rnkdict.has_key(recID) and recID in hitset: reclist.append((recID, rnkdict[recID])) del rnkdict[recID] elif recID in hitset: reclist_addend.append((recID, 0)) if verbose > 0: voutput += "Number of records ranked: %s<br />" % len(reclist) voutput += "Number of records not ranked: %s<br />" % len( reclist_addend) reclist.sort(lambda x, y: cmp(x[1], y[1])) return (reclist_addend + reclist, methods[rank_method_code]["prefix"], methods[rank_method_code]["postfix"], voutput)