def template_context_function(id_bibrec, pattern, qid): """ @param id_bibrec ID of record @param pattern search pattern @param current_user user object @param qid query id @return HTML containing snippet """ if not pattern: pattern = get_pattern_from_cache(qid) nb_chars = CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS.get('', 0) max_snippets = CFG_WEBSEARCH_FULLTEXT_SNIPPETS.get('', 0) if id_bibrec and pattern: if CFG_WEBSEARCH_FULLTEXT_SNIPPETS and 'fulltext:' in pattern: terms = get_fulltext_terms_from_search_pattern(pattern) if terms: snippets = '' try: snippets = solr_get_snippet(terms, id_bibrec, nb_chars, max_snippets).decode('utf8') if snippets: return ' ... ' + snippets + ' ... ' except: register_exception() return '' else: return '' else: return None
def hocr2pdf(input_file, output_file=None, working_dir=None, font="Courier", author=None, keywords=None, subject=None, title=None, draft=False, pdfopt=True, **dummy): """ @param working_dir the directory containing images to build the PDF. @param font the default font (e.g. Courier, Times-Roman). @param author the author name. @param subject the subject of the document. @param title the title of the document. @param draft whether to enable debug information in the output. """ if working_dir: working_dir = os.path.abspath(working_dir) else: working_dir = os.path.abspath(os.path.dirname(input_file)) if pdfopt: input_file, tmp_output_file, dummy = prepare_io(input_file, output_ext='.pdf', need_working_dir=False) else: input_file, output_file, dummy = prepare_io(input_file, output_file=output_file, need_working_dir=False) tmp_output_file = output_file try: create_pdf(extract_hocr(open(input_file).read()), tmp_output_file, font=font, author=author, keywords=keywords, subject=subject, title=title, image_path=working_dir, draft=draft) except: register_exception() raise if pdfopt: output_file = pdf2pdfopt(tmp_output_file, output_file) os.remove(tmp_output_file) return output_file else: return tmp_output_file
def _download_tars(self, check_integrity=True): if check_integrity: self.ftp.check_pkgs_integrity(self.retrieved_packages, self.logger) print("Downloading %i tar packages." % (len(self.retrieved_packages))) # Create progrss bar total_count = len(self.files_list) for i, filename in enumerate(self.retrieved_packages.iterkeys(), start=1): self.logger.info("Downloading tar package %s of %s: %s" % (i, total_count, filename,)) unpack_path = join(CFG_TAR_FILES, filename) self.retrieved_packages_unpacked.append(unpack_path) try: self.ftp.download(filename, CFG_TAR_FILES) self.retrieved_packages_unpacked.append(unpack_path) self.packages_delivery.append((filename[0:-4], datetime.now())) except: register_exception(alert_admin=True, prefix="Elsevier package download failed.") self.logger.error("Error downloading tar file %s of %s: %s" % (i, total_count, filename,)) print(sys.exc_info()) return self.retrieved_packages_unpacked
def calculate_RFC2104_HMAC(data, _amazon_secret_access_key): """ Computes a RFC 2104 compliant HMAC Signature and then Base64 encodes it. Module hashlib must be installed if Python < 2.5 <http://pypi.python.org/pypi/hashlib/20081119> @param data: data to sign @param _amazon_secret_access_key: your Amazon secret key @type data: string @type _amazon_secret_access_key: string. Empty if hashlib module not installed """ if not HASHLIB_IMPORTED: try: raise Exception( "Module hashlib not installed. Please install it.") except: from invenio.errorlib import register_exception register_exception(stream='warning', alert_admin=True, subject='Cannot create AWS signature') return "" else: if sys.version_info < (2, 5): # compatibility mode for Python < 2.5 and hashlib my_digest_algo = _MySHA256(sha256()) else: my_digest_algo = sha256 return base64.encodestring( hmac.new(_amazon_secret_access_key, data, my_digest_algo).digest()).strip()
def _crawl_elsevier_and_find_main_xml(self): """ A package contains several subdirectory corresponding to each article. An article is actually identified by the existence of a main.pdf and a main.xml in a given directory. """ self.found_articles = [] if not self.path and not self.package_name: for doc in self.conn.found_articles: dirname = doc['xml'].rstrip('/main.xml') try: self._normalize_article_dir_with_dtd(dirname) self.found_articles.append(dirname) except Exception as err: register_exception() print("ERROR: can't normalize %s: %s" % (dirname, err)) else: def visit(dummy, dirname, names): if "main.xml" in names and "main.pdf" in names: try: self._normalize_article_dir_with_dtd(dirname) self.found_articles.append(dirname) except Exception as err: register_exception() print("ERROR: can't normalize %s: %s" % (dirname, err)) walk(self.path, visit, None)
def get_pdfa_record(self, path=None): from invenio.search_engine import search_pattern xml = self.get_article(path) rec = {} journal, issn, volume, issue, first_page, last_page, year, start_date, doi = self.get_publication_information(xml) recid = search_pattern(p='0247_a:"%s" AND NOT 980:"DELETED"' % (doi,)) if recid: record_add_field(rec, '001', controlfield_value=recid[0]) else: record_add_field(rec, '024', ind1='7', subfields=[('a', doi), ('2', 'DOI')]) self.logger.error('Adding PDF/A. No paper with this DOI: %s. Trying to add it anyway.' % (doi,)) register_exception(alert_admin=True, prefix="'Adding PDF/A. No paper with this DOI: %s. Trying to add it anyway.." % (doi,)) try: if exists(join(path, 'main_a-2b.pdf')): record_add_field(rec, 'FFT', subfields=[('a', join(path, 'main_a-2b.pdf')), ('n', 'main'), ('f', '.pdf;pdfa')]) self.logger.debug('Adding PDF/A to record: %s' % (doi,)) elif exists(join(path, 'main.pdf')): record_add_field(rec, 'FFT', subfields=[('a', join(path, 'main.pdf'))]) self.logger.debug('No PDF/A in VTEX package for record: %s' % (doi,)) else: raise MissingFFTError("Record %s doesn't contain PDF file." % (doi,)) except MissingFFTError, err: register_exception(alert_admin=True, prefix="Elsevier paper: %s is missing PDF." % (doi,)) self.logger.warning("Record %s doesn't contain PDF file." % (doi,))
def _create_icon(file_path, icon_size, format='gif', verbosity=9): """ Creates icon of given file. Returns path to the icon. If creation fails, return None, and register exception (send email to admin). Parameters: - file_path : *str* full path to icon - icon_size : *int* the scaling information to be used for the creation of the new icon. - verbosity : *int* the verbosity level under which the program is to run; """ icon_path = None try: filename = os.path.splitext(os.path.basename(file_path))[0] (icon_dir, icon_name) = create_icon({ 'input-file': file_path, 'icon-name': "icon-%s" % filename, 'multipage-icon': False, 'multipage-icon-delay': 0, 'icon-scale': icon_size, 'icon-file-format': format, 'verbosity': verbosity }) icon_path = icon_dir + os.sep + icon_name except InvenioWebSubmitIconCreatorError, e: register_exception(prefix='Icon for file %s could not be created: %s' % \ (file_path, str(e)), alert_admin=False)
def bst_openaire_altmetric(): """ """ recids = search_pattern(p="0->Z", f="0247_a") a = Altmetric() for recid in recids: try: # Check if we already have an Altmetric id sysno_inst = get_fieldvalues(recid, "035__9") if ['Altmetric'] in sysno_inst: continue doi_val = get_fieldvalues(recid, "0247_a")[0] json_res = a.doi(doi_val) rec = {} record_add_field(rec, "001", controlfield_value=str(recid)) if json_res: record_add_field(rec, '035', subfields=[('a', str(json_res['altmetric_id'])), ('9', 'Altmetric')]) bibupload(rec, opt_mode='correct') except AltmetricHTTPException, e: register_exception(prefix='Altmetric error (status code %s): %s' % (e.status_code, str(e)), alert_admin=False)
def alert(req, journal_name="", ln=CFG_SITE_LANG, sent="False", plainText=u"", htmlMail="", recipients="", subject="", issue="", force="False"): """ Sends an email alert, in HTML/PlainText or only PlainText to a mailing list to alert for new journal releases. """ navtrail_previous_links = wjn.getnavtrail(' > <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py">WebJournal Admin</a> > <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py/administrate?journal_name=%s">%s</a>' % (CFG_SITE_URL, CFG_SITE_URL, journal_name, journal_name)) ln = wash_language(ln) _ = gettext_set_language(ln) try: uid = getUid(req) except: return error_page('Error', req) try: journal_name = wash_journal_name(ln, journal_name) issue = wash_issue_number(ln, journal_name, issue) plain_text = wash_url_argument(plainText, 'str') html_mail = wash_url_argument(htmlMail, 'str') recipients = wash_url_argument(recipients, 'str') subject = wash_url_argument(subject, 'str') sent = wash_url_argument(sent, 'str') force = wash_url_argument(force, 'str') except InvenioWebJournalNoJournalOnServerError, e: register_exception(req=req) return e.user_box()
def article(self, req, form): """ Article page. Washes all the parameters and stores them in journal_defaults dict for subsequent format_elements. Passes on to logic function and eventually returns HTML. """ argd = wash_urlargd(form, {'name': (str, ""), 'issue': (str, ""), 'category': (str, ""), 'number': (str, ""), 'ln': (str, ""), } ) try: ln = wash_journal_language(argd['ln']) journal_name = wash_journal_name(ln, argd['name']) issue = wash_issue_number(ln, journal_name, argd['issue']) issue_year = issue.split('/')[1] issue_number = issue.split('/')[0] category = wash_category(ln, argd['category'], journal_name, issue_number) number = wash_article_number(ln, argd['number'], journal_name) recid = get_recid_from_legacy_number(issue, category, int(number)) except InvenioWebJournalNoJournalOnServerError, e: register_exception(req=req) return e.user_box(req)
def _extract_packages(self): """ Extract a package in a new directory. """ if not hasattr(self, "retrieved_packages_unpacked"): self.retrieved_packages_unpacked = [self.package_name] for path in self.retrieved_packages_unpacked: package_name = basename(path) self.path_unpacked = join(CFG_UNPACKED_FILES, package_name.split('.')[0]) self.logger.debug( "Extracting package: %s" % (path.split("/")[-1], )) try: if "_archival_pdf" in self.path_unpacked: self.path_unpacked = ( self.path_unpacked.rstrip("_archival_pdf")) ZipFile(path).extractall( join(self.path_unpacked, "archival_pdfs")) else: ZipFile(path).extractall(self.path_unpacked) #TarFile.open(path).extractall(self.path_unpacked) except Exception: register_exception( alert_admin=True, prefix="OUP error extracting package.") self.logger.error( "Error extraction package file: %s" % (path, )) if hasattr(self, "path_unpacked"): return self.path_unpacked
def search(self, req, form): """ Display search interface """ argd = wash_urlargd(form, {'name': (str, ""), 'issue': (str, ""), 'archive_year': (str, ""), 'archive_issue': (str, ""), 'archive_select': (str, "False"), 'archive_date': (str, ""), 'archive_search': (str, "False"), 'ln': (str, CFG_SITE_LANG), 'verbose': (int, 0)}) try: # FIXME: if journal_name is empty, redirect ln = wash_journal_language(argd['ln']) washed_journal_name = wash_journal_name(ln, argd['name']) archive_issue = wash_issue_number(ln, washed_journal_name, argd['archive_issue']) archive_date = wash_archive_date(ln, washed_journal_name, argd['archive_date']) archive_select = argd['archive_select'] archive_search = argd['archive_search'] except InvenioWebJournalNoJournalOnServerError, e: register_exception(req=req) return e.user_box(req)
def Generate_Group_File(parameters, curdir, form, user_info=None): """ Generates a group file (stored in 'curdir/Group') for use with publiline. @param parameters: (dictionary) - must contain: + group_name: (string) - the id of the Group for use in the complex approval refereeing workflow @param curdir: (string) - the current submission's working directory. @param form: (dictionary) - form fields. @param user_info: (dictionary) - various information about the submitting user (includes the apache req object). @return: (string) - empty string. @Exceptions raised: InvenioWebSubmitFunctionError when an unexpected error is encountered. """ try: group_file = open("%s/%s" % (curdir, CFG_WEBSUBMIT_GROUP_FILE_NAME), "w") group_file.write(parameters['group_name']) group_file.flush() group_file.close() except IOError, err: ## Unable to write the Group file to curdir. err_msg = "Error: Unable to create Group file [%s/%s]. " \ "Perhaps check directory permissions. " \ % (curdir, CFG_WEBSUBMIT_GROUP_FILE_NAME) register_exception(req=req_obj, prefix=err_msg) raise InvenioWebSubmitFunctionError(err_msg)
def run(self, run_localy=False): if not run_localy: try: self.connect() self._get_file_listing('.ready') self._download_file_listing() except LoginException as err: register_exception(alert_admin=True, prefix=('Failed to connect to ' 'the Elsevier server. %s') % (err,)) return except: self.logger.info('No new packages to process') return self._get_packages() self._download_tars() self._check_md5() else: self.logger.info("Running on local files.") self.retrieved_packages_unpacked = [] self.files_list = [] for p in listdir(CFG_TAR_FILES): self.retrieved_packages_unpacked.append(join(CFG_TAR_FILES, p)) for p in listdir(CFG_READY_PACKAGES): self.files_list.append(p.strip(".ready.xml")) self._extract_packages() self._get_metadata_and_fulltex_dir()
def run(self, run_localy=False): if not run_localy: try: self.connect() self._get_file_listing('.ready') self._download_file_listing() except LoginException as err: register_exception(alert_admin=True, prefix=('Failed to connect to ' 'the Elsevier server. %s') % (err, )) return except Exception as e: self.logger.info('No new packages to process') self.logger.info('Registered error: %s' % e) return self._get_packages() self._download_tars() self._check_md5() else: self.logger.info("Running on local files.") self.retrieved_packages_unpacked = [] self.files_list = [] for p in listdir(CFG_TAR_FILES): self.retrieved_packages_unpacked.append(join(CFG_TAR_FILES, p)) for p in listdir(CFG_READY_PACKAGES): self.files_list.append(p.strip(".ready.xml")) self._extract_packages() self._get_metadata_and_fulltex_dir()
def regenerate(req, journal_name="", issue="", ln=CFG_SITE_LANG, confirmed_p="", publish_draft_articles_p=""): """ Clears the cache for the given issue. """ navtrail_previous_links = wjn.getnavtrail(' > <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py">WebJournal Admin</a> > <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py/administrate?journal_name=%s">%s</a>' % (CFG_SITE_URL, CFG_SITE_URL, journal_name, journal_name)) ln = wash_language(ln) _ = gettext_set_language(ln) try: uid = getUid(req) except: return error_page('Error', req) try: journal_name = wash_journal_name(ln, journal_name) issue_number = wash_issue_number(ln, journal_name, issue) confirmed_p = wash_url_argument(confirmed_p, 'str') == "confirmed" publish_draft_articles_p = wash_url_argument(publish_draft_articles_p, 'str') == "move" except InvenioWebJournalNoJournalOnServerError, e: register_exception(req=req) return e.user_box()
def issue_control(req, journal_name="", issue=[], ln=CFG_SITE_LANG, action="cfg"): """ Page that allows full control over creating, backtracing, adding to, removing from issues. """ navtrail_previous_links = wjn.getnavtrail(' > <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py">WebJournal Admin</a> > <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py/administrate?journal_name=%s">%s</a>' % (CFG_SITE_URL, CFG_SITE_URL, journal_name, journal_name)) ln = wash_language(ln) _ = gettext_set_language(ln) try: uid = getUid(req) except: return error_page('Error', req) try: journal_name = wash_journal_name(ln, journal_name) action = wash_url_argument(action, 'str') issue = wash_url_argument(issue, 'list') issues = [wash_issue_number(ln,journal_name, _issue) \ for _issue in issue \ if _issue != "ww/YYYY"] except InvenioWebJournalNoJournalOnServerError, e: register_exception(req=req) return e.user_box()
def format_record_for_bibedit_global(record, field): """ Process the record to be manipulated by bibedit :param record: The record you want to manipulate. :param field: The field which is interesting you. :return: Dict with the values needed by bibedit to work. """ final_shape = {} final_shape["fields"] = {} try: fields = CFG_ARBITRARY_AUTOSUGGEST_FIELD[field] final_shape["print"] = record_get_field(record, fields["main"].keys()[0])[0] final_shape["fields"][field[:3]] = {} final_shape["fields"][field[:3]][fields["main"][ fields["main"].keys()[0]]] = [ record_get_field(record, fields["main"].keys()[0])[0] ] for field_to_add in fields["sub"]: final_shape["fields"][field[:3]][field_to_add.values()[0]] = [ record_get_field(record, field_to_add.keys()[0])[0] ] except: register_exception() final_shape["print"] = "" return final_shape
def rank_records_obelix(user_info, hitset, rg=10, jrec=0, settings=None): """ Public method Ranks a given search result based on recommendations Expects the hitset to be sorted by latest last [1,2,3,4,5] (recids) """ hitset = list(hitset) hitset.reverse() jrec = max(jrec - 1, 0) try: if not settings: settings = ObelixSearchEngineSettings() uid = "" if CFG_WEBSEARCH_OBELIX_USER_KEY: if CFG_WEBSEARCH_OBELIX_USER_KEY in user_info: uid = user_info[CFG_WEBSEARCH_OBELIX_USER_KEY] if settings.recommendations_impact == 0 or uid == 0 or uid == "" or uid == "0": records, scores = hitset, [0] * len(hitset) else: records, scores = ObelixSearchEngine(uid, hitset, redis=settings.redis).rank() return records[jrec:jrec + rg], scores[jrec:jrec + rg] except Exception: register_exception(alert_admin=True) return hitset[jrec:jrec + rg], [0] * len(hitset[jrec:jrec + rg])
def get_oai_set(id=''): """Returns a row parameters for a given id""" sets = [] sql = "SELECT id, setSpec, setName, setCollection, setDescription, p1,f1,m1, p2,f2,m2, p3,f3,m3, setDefinition FROM oaiREPOSITORY" try: if id: sql += " WHERE id=%s" % id sql += " ORDER BY setSpec asc" res = run_sql(sql) for row in res: set = ['']*16 set[0] = row[0] set[1] = row[1] set[2] = row[2] params = parse_set_definition(row[14]) set[3] = params.get('c', '') set[5] = params.get('p1', '') set[6] = params.get('f1', '') set[7] = params.get('m1', '') set[8] = params.get('p2', '') set[9] = params.get('f2', '') set[10] = params.get('m2', '') set[11] = params.get('p3', '') set[12] = params.get('f3', '') set[13] = params.get('m3', '') set[14] = params.get('op1', 'a') set[15] = params.get('op2', 'a') sets.append(set) return sets except StandardError, e: register_exception(alert_admin=True) return str(e)
def log_search_result_obelix(user_info, original_result_ordered, record_ids, results_final_colls_scores, cols_in_result_ordered, seconds_to_rank_and_print, jrec, rg, rm, cc): """ Public method Used to log search_results :param user_info: :param original_result_ordered: :param record_ids: :param results_final_colls_scores: :param cols_in_result_ordered: :param seconds_to_rank_and_print: :param jrec: :param rg: :param rm: :param cc: :return: """ try: ObelixSearchEngineLogger().search_result(user_info, original_result_ordered, record_ids, results_final_colls_scores, cols_in_result_ordered, seconds_to_rank_and_print, jrec, rg, rm, cc) except Exception: register_exception(alert_admin=True)
def add_oai_set(oai_set_name, oai_set_spec, oai_set_collection, oai_set_description, oai_set_p1, oai_set_f1, oai_set_m1, oai_set_p2, oai_set_f2, oai_set_m2, oai_set_p3, oai_set_f3, oai_set_m3, oai_set_op1, oai_set_op2): """Add a definition into the OAI Repository""" try: if not oai_set_spec: oai_set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC set_definition = 'c=' + oai_set_collection + ';' + \ 'p1=' + oai_set_p1 + ';' + \ 'f1=' + oai_set_f1 + ';' + \ 'm1=' + oai_set_m1 + ';' + \ 'op1='+ oai_set_op1 + ';' + \ 'p2=' + oai_set_p2 + ';' + \ 'f2=' + oai_set_f2 + ';' + \ 'm2=' + oai_set_m2 + ';' + \ 'op2='+ oai_set_op2 + ';' + \ 'p3=' + oai_set_p3 + ';' + \ 'f3=' + oai_set_f3 + ';' + \ 'm3=' + oai_set_m3 + ';' run_sql( """INSERT INTO oaiREPOSITORY (id, setName, setSpec, setCollection, setDescription, setDefinition, setRecList, p1, f1, m1, p2, f2, m2, p3, f3, m3) VALUES (0, %s, %s, %s, %s, %s, NULL, %s, %s, %s, %s, %s, %s, %s, %s, %s)""", (oai_set_name, oai_set_spec, oai_set_collection, oai_set_description, set_definition, oai_set_p1, oai_set_f1, oai_set_m1, oai_set_p2, oai_set_f2, oai_set_m2, oai_set_p3, oai_set_f3, oai_set_m3)) return (1, "") except StandardError, e: register_exception(alert_admin=True) return (0, e)
def sendfile(self, path, offset=0, the_len=-1): try: self.send_http_header() file_to_send = open(path) file_to_send.seek(offset) file_wrapper = FileWrapper(file_to_send) count = 0 if the_len < 0: for chunk in file_wrapper: count += len(chunk) self.__bytes_sent += len(chunk) self.__write(chunk) else: for chunk in file_wrapper: if the_len >= len(chunk): the_len -= len(chunk) count += len(chunk) self.__bytes_sent += len(chunk) self.__write(chunk) else: count += the_len self.__bytes_sent += the_len self.__write(chunk[:the_len]) break except IOError, err: if "failed to write data" in str(err) or "client connection closed" in str(err): ## Let's just log this exception without alerting the admin: register_exception(req=self) else: raise
def _extract_packages(self): """ Extract a package in a new directory. """ self.path_unpacked = [] if not hasattr(self, "retrieved_packages_unpacked"): self.retrieved_packages_unpacked = [self.package_name] for path in self.retrieved_packages_unpacked: self.logger.debug("Extracting package: %s" % (path,)) p_name = 'EPJC' if 'EPJC' in path else 'JHEP' p_message = 'scoap3_package_%s_%s_' % (p_name, datetime.now()) self.path_unpacked.append(mkdtemp(prefix=p_message, dir=CFG_TMPSHAREDDIR)) try: ZipFile(path).extractall(self.path_unpacked[-1]) except Exception: register_exception(alert_admin=True, prefix="Springer error extracting package.") self.logger.error("Error extraction package file: %s" % (path,)) return self.path_unpacked
def perform_request_article(req, journal_name, issue_number, ln, category, recid, editor=False, verbose=0): """ Central logic function for article pages. Loads the format template for article display and displays the requested article using BibFormat. 'Editor' mode generates edit links on the article view page and disables caching. """ current_issue = get_current_issue(ln, journal_name) if not get_release_datetime(issue_number, journal_name): # Unreleased issue. Display latest released issue? unreleased_issues_mode = get_unreleased_issue_hiding_mode(journal_name) if not editor and \ (unreleased_issues_mode == 'all' or \ (unreleased_issues_mode == 'future' and \ issue_is_later_than(issue_number, current_issue))): redirect_to_url(req, "%s/journal/%s/%s/%s?ln=%s" % \ (CFG_SITE_URL, journal_name, current_issue.split('/')[1], current_issue.split('/')[0], ln)) try: index_page_template = get_journal_template('detailed', journal_name, ln) except InvenioWebJournalTemplateNotFoundError, e: register_exception(req=req) return e.user_box()
def insert_cit_ref_list_intodb(citation_dic, reference_dic, selfcbdic, selfdic, authorcitdic): """Insert the reference and citation list into the database""" insert_into_cit_db(reference_dic, "reversedict") insert_into_cit_db(citation_dic, "citationdict") insert_into_cit_db(selfcbdic, "selfcitedbydict") insert_into_cit_db(selfdic, "selfcitdict") for a in authorcitdic.keys(): lserarr = (serialize_via_marshal(authorcitdic[a])) #author name: replace " with something else a.replace('"', '\'') a = unicode(a, 'utf-8') try: ablob = run_sql( "select hitlist from rnkAUTHORDATA where aterm = %s", (a, )) if not (ablob): #print "insert into rnkAUTHORDATA(aterm,hitlist) values (%s,%s)" , (a,lserarr) run_sql( "insert into rnkAUTHORDATA(aterm,hitlist) values (%s,%s)", (a, lserarr)) else: #print "UPDATE rnkAUTHORDATA SET hitlist = %s where aterm=%s""" , (lserarr,a) run_sql( "UPDATE rnkAUTHORDATA SET hitlist = %s where aterm=%s", (lserarr, a)) except: register_exception( prefix="could not read/write rnkAUTHORDATA aterm=" + a + " hitlist=" + str(lserarr), alert_admin=True)
def perform_request_index(req, journal_name, issue_number, ln, category, editor=False, verbose=0): """ Central logic function for index pages. Brings together format templates and MARC rules from the config, with the requested index page, given by the url parameters. From config: - page template for index pages -> formatting - MARC rule list -> Category Navigation - MARC tag used for issue numbers -> search (later in the format elements) Uses BibFormatObject and format_with_format_template to produce the required HTML. """ current_issue = get_current_issue(ln, journal_name) if not get_release_datetime(issue_number, journal_name): # Unreleased issue. Display latest released issue? unreleased_issues_mode = get_unreleased_issue_hiding_mode(journal_name) if not editor and \ (unreleased_issues_mode == 'all' or \ (unreleased_issues_mode == 'future' and \ issue_is_later_than(issue_number, current_issue))): redirect_to_url(req, "%s/journal/%s/%s/%s?ln=%s" % \ (CFG_SITE_URL, journal_name, current_issue.split('/')[1], current_issue.split('/')[0], ln)) try: index_page_template = get_journal_template('index', journal_name, ln) except InvenioWebJournalTemplateNotFoundError, e: register_exception(req=req) return e.user_box()
def render_self_citations(d_recids, d_total_recs, ln): try: tags = get_authors_tags() except IndexError, e: register_exception(prefix="attribute " + \ str(e) + " missing in config", alert_admin=True) return ""
def perform_request_create_group(uid, group_name, group_description, join_policy, ln=CFG_SITE_LANG): """Create new group. @param group_name: name of the group entered @param group_description: description of the group entered @param join_policy: join policy of the group entered @param ln: language @return: body with warnings warning != [] if group_name or join_policy are not valid or if the name already exists in the database body="1" if succeed in order to display info on the main page """ _ = gettext_set_language(ln) body = "" warnings = [] infos = [] if group_name == "": try: raise InvenioWebSessionWarning(_('Please enter a group name.')) except InvenioWebSessionWarning, exc: register_exception(stream='warning') warnings.append(exc.message) body = perform_request_input_create_group(group_name, group_description, join_policy, warnings=warnings)
def _download_tars(self, check_integrity=True): if check_integrity: self.ftp.check_pkgs_integrity(self.retrieved_packages, self.logger) print("Downloading %i tar packages." % (len(self.retrieved_packages))) # Create progrss bar total_count = len(self.files_list) for i, filename in enumerate(self.retrieved_packages.iterkeys(), start=1): self.logger.info("Downloading tar package %s of %s: %s" % ( i, total_count, filename, )) unpack_path = join(CFG_TAR_FILES, filename) self.retrieved_packages_unpacked.append(unpack_path) try: self.ftp.download(filename, CFG_TAR_FILES) self.retrieved_packages_unpacked.append(unpack_path) self.packages_delivery.append((filename[0:-4], datetime.now())) except: register_exception(alert_admin=True, prefix="Elsevier package download failed.") self.logger.error("Error downloading tar file %s of %s: %s" % ( i, total_count, filename, )) print(sys.exc_info()) return self.retrieved_packages_unpacked
def perform_request_leave_group(uid, grpID, confirmed=0, ln=CFG_SITE_LANG): """Leave group. @param uid: user ID @param grpID: ID of the group the user wants to leave @param warnings: warnings != [] if 0 group is selected @param confirmed: a confirmed page is first displayed @param ln: language @return: body with warnings """ _ = gettext_set_language(ln) body = "" warnings = [] infos = [] if not grpID == -1: if confirmed: db.leave_group(grpID, uid) infos.append(CFG_WEBSESSION_INFO_MESSAGES["LEAVE_GROUP"]) body = perform_request_groups_display(uid, infos=infos, warnings=warnings, ln=ln) else: body = websession_templates.tmpl_confirm_leave(uid, grpID, ln) else: try: raise InvenioWebSessionWarning(_('Please select one group.')) except InvenioWebSessionWarning, exc: register_exception(stream='warning') warnings.append(exc.message) body = perform_request_input_leave_group(uid, warnings=warnings, ln=ln)
def _create_icon(file_path, icon_size, format='gif', verbosity=9): """ Creates icon of given file. Returns path to the icon. If creation fails, return None, and register exception (send email to admin). Parameters: - file_path : *str* full path to icon - icon_size : *int* the scaling information to be used for the creation of the new icon. - verbosity : *int* the verbosity level under which the program is to run; """ icon_path = None try: filename = os.path.splitext(os.path.basename(file_path))[0] (icon_dir, icon_name) = create_icon( {'input-file':file_path, 'icon-name': "icon-%s" % filename, 'multipage-icon': False, 'multipage-icon-delay': 0, 'icon-scale': icon_size, 'icon-file-format': format, 'verbosity': verbosity}) icon_path = icon_dir + os.sep + icon_name except InvenioWebSubmitIconCreatorError, e: register_exception(prefix='Icon for file %s could not be created: %s' % \ (file_path, str(e)), alert_admin=False)
def perform_request_update_group(uid, grpID, group_name, group_description, join_policy, ln=CFG_SITE_LANG): """Update group datas in database. @param uid: user ID @param grpID: ID of the group @param group_name: name of the group @param group_description: description of the group @param join_policy: join policy of the group @param ln: language @return: body with warnings """ body = '' warnings = [] infos = [] _ = gettext_set_language(ln) group_name_available = db.group_name_exist(group_name) if group_name == "": try: raise InvenioWebSessionWarning(_('Please enter a group name.')) except InvenioWebSessionWarning, exc: register_exception(stream='warning') warnings.append(exc.message) body = perform_request_edit_group(uid, grpID, warnings=warnings, ln=ln)
def calculate_RFC2104_HMAC(data, _amazon_secret_access_key): """ Computes a RFC 2104 compliant HMAC Signature and then Base64 encodes it. Module hashlib must be installed if Python < 2.5 <http://pypi.python.org/pypi/hashlib/20081119> @param data: data to sign @param _amazon_secret_access_key: your Amazon secret key @type data: string @type _amazon_secret_access_key: string. Empty if hashlib module not installed """ if not HASHLIB_IMPORTED: try: raise Exception("Module hashlib not installed. Please install it.") except: from invenio.errorlib import register_exception register_exception(stream='warning', alert_admin=True, subject='Cannot create AWS signature') return "" else: if sys.version_info < (2, 5): # compatibility mode for Python < 2.5 and hashlib my_digest_algo = _MySHA256(sha256()) else: my_digest_algo = sha256 return base64.encodestring(hmac.new(_amazon_secret_access_key, data, my_digest_algo).digest()).strip()
def perform_request_delete_group(uid, grpID, confirmed=0, ln=CFG_SITE_LANG): """First display confirm message(confirmed=0). then(confirmed=1) delete group and all its members @param uid: user ID @param grpID: ID of the group @param confirmed: =1 if confirmed message has been previously displayed @param ln: language @return: body with warnings """ body = "" warnings = [] infos = [] _ = gettext_set_language(ln) group_infos = db.get_group_infos(grpID) user_status = db.get_user_status(uid, grpID) if not group_infos: try: raise InvenioWebSessionWarning( _('The group has already been deleted.')) except InvenioWebSessionWarning, exc: register_exception(stream='warning') warnings.append(exc.message) body = perform_request_groups_display(uid, infos=infos, warnings=warnings, ln=CFG_SITE_LANG)
def get_publication_information(self, xml): jid = get_value_in_tag(xml, "journal-title") journal = "" if "European Physical Journal" in jid: journal = "EPJC" try: art = xml.getElementsByTagName('article-meta')[0] except IndexError as err: register_exception() print >> sys.stderr, "ERROR: XML corrupted: %s" % err pass except Exception as err: register_exception() print >> sys.stderr, "ERROR: Exception captured: %s" % err pass issn = self.get_issn(art) volume = get_value_in_tag(art, "volume") issue = get_value_in_tag(art, "issue") year = self.get_date(art) first_page = get_value_in_tag(art, "fpage") last_page = get_value_in_tag(art, "lpage") doi = self.get_doi(art) return (journal, issn, volume, issue, first_page, last_page, year, doi)
def process_alerts(alerts): """Process the given alerts and store the records found to the user defined baskets and/or notify them by e-mail""" # TBD: do not generate the email each time, forge it once and then # send it to all appropriate people for a in alerts['alerts']: if alert_use_basket_p(a): add_records_to_basket(alerts['records'], a[2]) if alert_use_notification_p(a): argstr = update_arguments(alerts['argstr'], alerts['date_from'], alerts['date_until']) try: email_notify(a, alerts['records'], argstr) except Exception: # There were troubles sending this alert, so register # this exception and continue with other alerts: register_exception(alert_admin=True, prefix="Error when sending alert %s, %s\n." % \ (repr(a), repr(argstr))) # Inform the admin when external collections time out if len(alerts['records'][1][1]) > 0: register_exception(alert_admin=True, prefix="External collections %s timed out when sending alert %s, %s\n." % \ (", ".join(alerts['records'][1][1]), repr(a), repr(argstr))) update_date_lastrun(a)
def record_get_field(record, field): values = [] fa = field[:3] ind1 = None ind2 = None fb = None try: ind1 = field[3] ind2 = field[4] fb = field[5] except: pass if ind1 == "_": ind1 = " " if ind2 == "_": ind2 = " " fields = record.get(fa) if fields: for field in fields: if fa[:2] != "00": try: if (ind1 and ind2 and ind1 == field.ind1 and ind2 == field.ind2) or (not ind1 and not ind2): if fb: values.append(field.find_subfields(fb)[0].value) else: values.append(" ".join(x.value for x in field.subfields)) except: register_exception() else: values.append(field.value) return values
def _crawl_elsevier_and_find_issue_xml(self): """ Information about the current volume, issue, etc. is available in a file called issue.xml that is available in a higher directory. """ self._found_issues = [] if not self.path and not self.package_name: for issue in self.conn._get_issues(): dirname = issue.rstrip('/issue.xml') try: self._normalize_issue_dir_with_dtd(dirname) self._found_issues.append(dirname) except Exception as err: register_exception() print("ERROR: can't normalize %s: %s" % (dirname, err)) else: def visit(dummy, dirname, names): if "issue.xml" in names: try: self._normalize_issue_dir_with_dtd(dirname) self._found_issues.append(dirname) except Exception as err: register_exception() print("ERROR: can't normalize %s: %s" % (dirname, err)) walk(self.path, visit, None)
def add_oai_set(oai_set_name, oai_set_spec, oai_set_collection, oai_set_description, oai_set_p1, oai_set_f1,oai_set_m1, oai_set_p2, oai_set_f2,oai_set_m2, oai_set_p3, oai_set_f3, oai_set_m3, oai_set_op1, oai_set_op2): """Add a definition into the OAI Repository""" try: if not oai_set_spec: oai_set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC set_definition = 'c=' + oai_set_collection + ';' + \ 'p1=' + oai_set_p1 + ';' + \ 'f1=' + oai_set_f1 + ';' + \ 'm1=' + oai_set_m1 + ';' + \ 'op1='+ oai_set_op1 + ';' + \ 'p2=' + oai_set_p2 + ';' + \ 'f2=' + oai_set_f2 + ';' + \ 'm2=' + oai_set_m2 + ';' + \ 'op2='+ oai_set_op2 + ';' + \ 'p3=' + oai_set_p3 + ';' + \ 'f3=' + oai_set_f3 + ';' + \ 'm3=' + oai_set_m3 + ';' run_sql("""INSERT INTO oaiREPOSITORY (id, setName, setSpec, setCollection, setDescription, setDefinition, setRecList, p1, f1, m1, p2, f2, m2, p3, f3, m3) VALUES (0, %s, %s, %s, %s, %s, NULL, %s, %s, %s, %s, %s, %s, %s, %s, %s)""", (oai_set_name, oai_set_spec, oai_set_collection, oai_set_description, set_definition, oai_set_p1, oai_set_f1, oai_set_m1, oai_set_p2, oai_set_f2, oai_set_m2, oai_set_p3, oai_set_f3, oai_set_m3)) return (1, "") except StandardError, e: register_exception(alert_admin=True) return (0, e)
def get_oai_set(id=''): """Returns a row parameters for a given id""" sets = [] sql = "SELECT id, setSpec, setName, setCollection, setDescription, p1,f1,m1, p2,f2,m2, p3,f3,m3, setDefinition FROM oaiREPOSITORY" try: if id: sql += " WHERE id=%s" % id sql += " ORDER BY setSpec asc" res = run_sql(sql) for row in res: set = [''] * 16 set[0] = row[0] set[1] = row[1] set[2] = row[2] params = parse_set_definition(row[14]) set[3] = params.get('c', '') set[5] = params.get('p1', '') set[6] = params.get('f1', '') set[7] = params.get('m1', '') set[8] = params.get('p2', '') set[9] = params.get('f2', '') set[10] = params.get('m2', '') set[11] = params.get('p3', '') set[12] = params.get('f3', '') set[13] = params.get('m3', '') set[14] = params.get('op1', 'a') set[15] = params.get('op2', 'a') sets.append(set) return sets except StandardError, e: register_exception(alert_admin=True) return str(e)
def log_search_result_obelix(user_info, original_result_ordered, record_ids, results_final_colls_scores, cols_in_result_ordered, seconds_to_rank_and_print, jrec, rg, rm, cc): """ Public method Used to log search_results :param user_info: :param original_result_ordered: :param record_ids: :param results_final_colls_scores: :param cols_in_result_ordered: :param seconds_to_rank_and_print: :param jrec: :param rg: :param rm: :param cc: :return: """ try: ObelixSearchEngineLogger().search_result( user_info, original_result_ordered, record_ids, results_final_colls_scores, cols_in_result_ordered, seconds_to_rank_and_print, jrec, rg, rm, cc) except Exception: register_exception(alert_admin=True)
def search(self, req, form): """ Display search interface """ argd = wash_urlargd( form, { "name": (str, ""), "issue": (str, ""), "archive_year": (str, ""), "archive_issue": (str, ""), "archive_select": (str, "False"), "archive_date": (str, ""), "archive_search": (str, "False"), "ln": (str, CFG_SITE_LANG), "verbose": (int, 0), }, ) try: # FIXME: if journal_name is empty, redirect ln = wash_journal_language(argd["ln"]) washed_journal_name = wash_journal_name(ln, argd["name"]) archive_issue = wash_issue_number(ln, washed_journal_name, argd["archive_issue"]) archive_date = wash_archive_date(ln, washed_journal_name, argd["archive_date"]) archive_select = argd["archive_select"] archive_search = argd["archive_search"] except InvenioWebJournalNoJournalOnServerError, e: register_exception(req=req) return e.user_box(req)
def insert_cit_ref_list_intodb(citation_dic, reference_dic, selfcbdic, selfdic, authorcitdic): """Insert the reference and citation list into the database""" insert_into_cit_db(reference_dic,"reversedict") insert_into_cit_db(citation_dic,"citationdict") insert_into_cit_db(selfcbdic,"selfcitedbydict") insert_into_cit_db(selfdic,"selfcitdict") for a in authorcitdic.keys(): lserarr = (serialize_via_marshal(authorcitdic[a])) #author name: replace " with something else a.replace('"', '\'') a = unicode(a, 'utf-8') try: ablob = run_sql("select hitlist from rnkAUTHORDATA where aterm = %s", (a,)) if not (ablob): #print "insert into rnkAUTHORDATA(aterm,hitlist) values (%s,%s)" , (a,lserarr) run_sql("insert into rnkAUTHORDATA(aterm,hitlist) values (%s,%s)", (a,lserarr)) else: #print "UPDATE rnkAUTHORDATA SET hitlist = %s where aterm=%s""" , (lserarr,a) run_sql("UPDATE rnkAUTHORDATA SET hitlist = %s where aterm=%s", (lserarr,a)) except: register_exception(prefix="could not read/write rnkAUTHORDATA aterm="+a+" hitlist="+str(lserarr), alert_admin=True)
def sub(self, req, form): """DEPRECATED: /submit/sub is deprecated now, so raise email to the admin (but allow submission to continue anyway)""" args = wash_urlargd(form, {'password': (str, '')}) uid = getUid(req) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return page_not_authorized(req, "../sub/", navmenuid='submit') try: raise DeprecationWarning, 'submit/sub handler has been used. Please use submit/direct. e.g. "submit/sub?RN=123@SBIFOO" -> "submit/direct?RN=123&sub=SBIFOO"' except DeprecationWarning: register_exception(req=req, alert_admin=True) ln = args['ln'] _ = gettext_set_language(ln) #DEMOBOO_RN=DEMO-BOOK-2008-001&ln=en&password=1223993532.26572%40APPDEMOBOO params = dict(form) password = args['password'] if password: del params['password'] if "@" in password: params['access'], params['sub'] = password.split('@', 1) else: params['sub'] = password else: args = str(req.args).split('@') if len(args) > 1: params = {'sub': args[-1]} args = '@'.join(args[:-1]) params.update(cgi.parse_qs(args)) else: return warning_page(_("Sorry, invalid URL..."), req, ln=ln) url = "%s/submit/direct?%s" % (CFG_SITE_SECURE_URL, urlencode(params, doseq=True)) redirect_to_url(req, url)
def perform_request_index(req, journal_name, issue_number, ln, category, editor=False, verbose=0): """ Central logic function for index pages. Brings together format templates and MARC rules from the config, with the requested index page, given by the url parameters. From config: - page template for index pages -> formatting - MARC rule list -> Category Navigation - MARC tag used for issue numbers -> search (later in the format elements) Uses BibFormatObject and format_with_format_template to produce the required HTML. """ current_issue = get_current_issue(ln, journal_name) if not get_release_datetime(issue_number, journal_name): # Unreleased issue. Display latest released issue? unreleased_issues_mode = get_unreleased_issue_hiding_mode(journal_name) if not editor and ( unreleased_issues_mode == "all" or (unreleased_issues_mode == "future" and issue_is_later_than(issue_number, current_issue)) ): redirect_to_url( req, "%s/journal/%s/%s/%s?ln=%s" % (CFG_SITE_URL, journal_name, current_issue.split("/")[1], current_issue.split("/")[0], ln), ) try: index_page_template = get_journal_template("index", journal_name, ln) except InvenioWebJournalTemplateNotFoundError, e: register_exception(req=req) return e.user_box(req)
def insert_into_cit_db(dic, name): """an aux thing to avoid repeating code""" ndate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) try: s = serialize_via_marshal(dic) write_message("size of " + name + " " + str(len(s))) #check that this column really exists testres = run_sql( "select object_name from rnkCITATIONDATA where object_name = %s", (name, )) if testres: run_sql( "UPDATE rnkCITATIONDATA SET object_value = %s where object_name = %s", (s, name)) else: #there was no entry for name, let's force.. run_sql( "INSERT INTO rnkCITATIONDATA(object_name,object_value) values (%s,%s)", (name, s)) run_sql( "UPDATE rnkCITATIONDATA SET last_updated = %s where object_name = %s", (ndate, name)) except: register_exception(prefix="could not write " + name + " into db", alert_admin=True)
def perform_request_article(req, journal_name, issue_number, ln, category, recid, editor=False, verbose=0): """ Central logic function for article pages. Loads the format template for article display and displays the requested article using BibFormat. 'Editor' mode generates edit links on the article view page and disables caching. """ current_issue = get_current_issue(ln, journal_name) if not get_release_datetime(issue_number, journal_name): # Unreleased issue. Display latest released issue? unreleased_issues_mode = get_unreleased_issue_hiding_mode(journal_name) if not editor and ( unreleased_issues_mode == "all" or (unreleased_issues_mode == "future" and issue_is_later_than(issue_number, current_issue)) ): redirect_to_url( req, "%s/journal/%s/%s/%s?ln=%s" % (CFG_SITE_URL, journal_name, current_issue.split("/")[1], current_issue.split("/")[0], ln), ) try: index_page_template = get_journal_template("detailed", journal_name, ln) except InvenioWebJournalTemplateNotFoundError, e: register_exception(req=req) return e.user_box(req)
def sub(self, req, form): """DEPRECATED: /submit/sub is deprecated now, so raise email to the admin (but allow submission to continue anyway)""" args = wash_urlargd(form, {"password": (str, "")}) uid = getUid(req) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return page_not_authorized(req, "../sub/", navmenuid="submit") try: raise DeprecationWarning, 'submit/sub handler has been used. Please use submit/direct. e.g. "submit/sub?RN=123@SBIFOO" -> "submit/direct?RN=123&sub=SBIFOO"' except DeprecationWarning: register_exception(req=req, alert_admin=True) ln = args["ln"] _ = gettext_set_language(ln) # DEMOBOO_RN=DEMO-BOOK-2008-001&ln=en&password=1223993532.26572%40APPDEMOBOO params = dict(form) password = args["password"] if password: del params["password"] if "@" in password: params["access"], params["sub"] = password.split("@", 1) else: params["sub"] = password else: args = str(req.args).split("@") if len(args) > 1: params = {"sub": args[-1]} args = "@".join(args[:-1]) params.update(cgi.parse_qs(args)) else: return warningMsg(_("Sorry, invalid URL..."), req, ln=ln) url = "%s/submit/direct?%s" % (CFG_SITE_URL, urlencode(params, doseq=True)) redirect_to_url(req, url)
def _get_metadata_and_fulltex_dir(self): # Prints stuff print >> sys.stdout, "\nRetrieving journal items directories." # Create progrss bar p_bar = progress_bar(len(self.files_list)) # Print stuff sys.stdout.write(p_bar.next()) sys.stdout.flush() print self.path_unpacked print self.files_list for name in self.files_list: dataset_link = join(self.path_unpacked, name.split('.')[0], 'dataset.xml') try: dataset_xml = parse(dataset_link) except Exception, err: register_exception(alert_admin=True, prefix="Elsevier error reading dataset.xml file.") self.logger.error("Error reading dataset.xml file: %s" % (dataset_link,)) print >> sys.stdout, "\nError reading dataset.xml file: %s" % (dataset_link,) continue # created = get_value_in_tag(dataset_xml.getElementsByTagName('dataset-unique-ids')[0], 'timestamp') journal_items = dataset_xml.getElementsByTagName('journal-item') self.logger.info("Getting metadata and fulltex directories for %i journal items." % (len(journal_items),)) for journal_item in journal_items: xml_pathname = join(self.path_unpacked, name.split('.')[0], xml_to_text(journal_item.getElementsByTagName('ml')[0].getElementsByTagName('pathname')[0])) pdf_pathname = join(self.path_unpacked, name.split('.')[0], xml_to_text(journal_item.getElementsByTagName('web-pdf')[0].getElementsByTagName('pathname')[0])) self.found_articles.append(dict(xml=xml_pathname, pdf=pdf_pathname)) self.logger.info("Got metadata and fulltex directories of %i journals." % (len(self.found_articles),)) # Print stuff sys.stdout.write(p_bar.next()) sys.stdout.flush()