def template_context_function(id_bibrec, pattern, qid):
    """
    @param id_bibrec ID of record
    @param pattern search pattern
    @param current_user user object
    @param qid query id
    @return HTML containing snippet
    """

    if not pattern: pattern = get_pattern_from_cache(qid)

    nb_chars = CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS.get('', 0)
    max_snippets = CFG_WEBSEARCH_FULLTEXT_SNIPPETS.get('', 0)

    if id_bibrec and pattern:
        if CFG_WEBSEARCH_FULLTEXT_SNIPPETS and 'fulltext:' in pattern:
            terms = get_fulltext_terms_from_search_pattern(pattern)
            if terms:
                snippets = ''
                try:
                    snippets = solr_get_snippet(terms, id_bibrec, nb_chars,
                                                max_snippets).decode('utf8')
                    if snippets: return ' ... ' + snippets + ' ... '
                except:
                    register_exception()
                return ''
        else:
            return ''
    else:
        return None
def hocr2pdf(input_file, output_file=None, working_dir=None, font="Courier", author=None, keywords=None, subject=None, title=None, draft=False, pdfopt=True, **dummy):
    """
    @param working_dir the directory containing images to build the PDF.
    @param font the default font (e.g. Courier, Times-Roman).
    @param author the author name.
    @param subject the subject of the document.
    @param title the title of the document.
    @param draft whether to enable debug information in the output.
    """
    if working_dir:
        working_dir = os.path.abspath(working_dir)
    else:
        working_dir = os.path.abspath(os.path.dirname(input_file))

    if pdfopt:
        input_file, tmp_output_file, dummy = prepare_io(input_file, output_ext='.pdf', need_working_dir=False)
    else:
        input_file, output_file, dummy = prepare_io(input_file, output_file=output_file, need_working_dir=False)
        tmp_output_file = output_file

    try:
        create_pdf(extract_hocr(open(input_file).read()), tmp_output_file, font=font, author=author, keywords=keywords, subject=subject, title=title, image_path=working_dir, draft=draft)
    except:
        register_exception()
        raise

    if pdfopt:
        output_file = pdf2pdfopt(tmp_output_file, output_file)
        os.remove(tmp_output_file)
        return output_file
    else:
        return tmp_output_file
Example #3
0
    def _download_tars(self, check_integrity=True):
        if check_integrity:
            self.ftp.check_pkgs_integrity(self.retrieved_packages, self.logger)

        print("Downloading %i tar packages." % (len(self.retrieved_packages)))
        # Create progrss bar
        total_count = len(self.files_list)

        for i, filename in enumerate(self.retrieved_packages.iterkeys(),
                                     start=1):
            self.logger.info("Downloading tar package %s of %s: %s"
                             % (i, total_count, filename,))
            unpack_path = join(CFG_TAR_FILES, filename)
            self.retrieved_packages_unpacked.append(unpack_path)
            try:
                self.ftp.download(filename, CFG_TAR_FILES)
                self.retrieved_packages_unpacked.append(unpack_path)
                self.packages_delivery.append((filename[0:-4], datetime.now()))
            except:
                register_exception(alert_admin=True,
                                   prefix="Elsevier package download failed.")
                self.logger.error("Error downloading tar file %s of %s: %s"
                                  % (i, total_count, filename,))
                print(sys.exc_info())

        return self.retrieved_packages_unpacked
Example #4
0
    def calculate_RFC2104_HMAC(data, _amazon_secret_access_key):
        """
        Computes a RFC 2104 compliant HMAC Signature and then Base64
        encodes it.

        Module hashlib must be installed if Python < 2.5
        <http://pypi.python.org/pypi/hashlib/20081119>

        @param data: data to sign
        @param _amazon_secret_access_key: your Amazon secret key

        @type data: string
        @type _amazon_secret_access_key: string. Empty if hashlib module not installed
        """
        if not HASHLIB_IMPORTED:
            try:
                raise Exception(
                    "Module hashlib not installed. Please install it.")
            except:
                from invenio.errorlib import register_exception
                register_exception(stream='warning',
                                   alert_admin=True,
                                   subject='Cannot create AWS signature')
                return ""
        else:
            if sys.version_info < (2, 5):
                # compatibility mode for Python < 2.5 and hashlib
                my_digest_algo = _MySHA256(sha256())
            else:
                my_digest_algo = sha256

        return base64.encodestring(
            hmac.new(_amazon_secret_access_key, data,
                     my_digest_algo).digest()).strip()
 def _crawl_elsevier_and_find_main_xml(self):
     """
     A package contains several subdirectory corresponding to each article.
     An article is actually identified by the existence of a main.pdf and
     a main.xml in a given directory.
     """
     self.found_articles = []
     if not self.path and not self.package_name:
         for doc in self.conn.found_articles:
             dirname = doc['xml'].rstrip('/main.xml')
             try:
                 self._normalize_article_dir_with_dtd(dirname)
                 self.found_articles.append(dirname)
             except Exception as err:
                 register_exception()
                 print("ERROR: can't normalize %s: %s" % (dirname, err))
     else:
         def visit(dummy, dirname, names):
             if "main.xml" in names and "main.pdf" in names:
                 try:
                     self._normalize_article_dir_with_dtd(dirname)
                     self.found_articles.append(dirname)
                 except Exception as err:
                     register_exception()
                     print("ERROR: can't normalize %s: %s" % (dirname, err))
         walk(self.path, visit, None)
    def get_pdfa_record(self, path=None):
        from invenio.search_engine import search_pattern
        xml = self.get_article(path)
        rec = {}
        journal, issn, volume, issue, first_page, last_page, year, start_date, doi = self.get_publication_information(xml)

        recid = search_pattern(p='0247_a:"%s" AND NOT 980:"DELETED"' % (doi,))
        if recid:
            record_add_field(rec, '001', controlfield_value=recid[0])
        else:
            record_add_field(rec, '024', ind1='7', subfields=[('a', doi), ('2', 'DOI')])
            self.logger.error('Adding PDF/A. No paper with this DOI: %s. Trying to add it anyway.' % (doi,))
            register_exception(alert_admin=True, prefix="'Adding PDF/A. No paper with this DOI: %s. Trying to add it anyway.." % (doi,))

        try:
            if exists(join(path, 'main_a-2b.pdf')):
                record_add_field(rec, 'FFT', subfields=[('a', join(path, 'main_a-2b.pdf')), ('n', 'main'), ('f', '.pdf;pdfa')])
                self.logger.debug('Adding PDF/A to record: %s' % (doi,))
            elif exists(join(path, 'main.pdf')):
                record_add_field(rec, 'FFT', subfields=[('a', join(path, 'main.pdf'))])
                self.logger.debug('No PDF/A in VTEX package for record: %s' % (doi,))
            else:
                raise MissingFFTError("Record %s doesn't contain PDF file." % (doi,))
        except MissingFFTError, err:
            register_exception(alert_admin=True, prefix="Elsevier paper: %s is missing PDF." % (doi,))
            self.logger.warning("Record %s doesn't contain PDF file." % (doi,))
def _create_icon(file_path, icon_size, format='gif', verbosity=9):
    """
    Creates icon of given file.

    Returns path to the icon. If creation fails, return None, and
    register exception (send email to admin).

    Parameters:

       - file_path : *str* full path to icon

       - icon_size : *int* the scaling information to be used for the
                     creation of the new icon.

       - verbosity : *int* the verbosity level under which the program
                     is to run;
    """
    icon_path = None
    try:
        filename = os.path.splitext(os.path.basename(file_path))[0]
        (icon_dir, icon_name) = create_icon({
            'input-file': file_path,
            'icon-name': "icon-%s" % filename,
            'multipage-icon': False,
            'multipage-icon-delay': 0,
            'icon-scale': icon_size,
            'icon-file-format': format,
            'verbosity': verbosity
        })
        icon_path = icon_dir + os.sep + icon_name
    except InvenioWebSubmitIconCreatorError, e:
        register_exception(prefix='Icon for file %s could not be created: %s' % \
                           (file_path, str(e)),
                           alert_admin=False)
Example #8
0
def bst_openaire_altmetric():
    """
    """
    recids = search_pattern(p="0->Z", f="0247_a")
    a = Altmetric()

    for recid in recids:
        try:
            # Check if we already have an Altmetric id
            sysno_inst = get_fieldvalues(recid, "035__9")
            if ['Altmetric'] in sysno_inst:
                continue

            doi_val = get_fieldvalues(recid, "0247_a")[0]
            json_res = a.doi(doi_val)

            rec = {}
            record_add_field(rec, "001", controlfield_value=str(recid))

            if json_res:
                record_add_field(rec, '035', subfields=[('a',
                    str(json_res['altmetric_id'])), ('9', 'Altmetric')])
                bibupload(rec, opt_mode='correct')
        except AltmetricHTTPException, e:
            register_exception(prefix='Altmetric error (status code %s): %s' %
                (e.status_code, str(e)), alert_admin=False)
Example #9
0
def alert(req, journal_name="", ln=CFG_SITE_LANG, sent="False", plainText=u"",
          htmlMail="", recipients="", subject="", issue="", force="False"):
    """
    Sends an email alert, in HTML/PlainText or only PlainText to a mailing
    list to alert for new journal releases.
    """
    navtrail_previous_links = wjn.getnavtrail(' &gt; <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py">WebJournal Admin</a> &gt; <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py/administrate?journal_name=%s">%s</a>' % (CFG_SITE_URL, CFG_SITE_URL, journal_name, journal_name))

    ln = wash_language(ln)
    _ = gettext_set_language(ln)

    try:
        uid = getUid(req)
    except:
        return error_page('Error', req)

    try:
        journal_name = wash_journal_name(ln, journal_name)
        issue = wash_issue_number(ln,
                                         journal_name,
                                         issue)
        plain_text = wash_url_argument(plainText, 'str')
        html_mail = wash_url_argument(htmlMail, 'str')
        recipients = wash_url_argument(recipients, 'str')
        subject = wash_url_argument(subject, 'str')
        sent = wash_url_argument(sent, 'str')
        force = wash_url_argument(force, 'str')
    except InvenioWebJournalNoJournalOnServerError, e:
        register_exception(req=req)
        return e.user_box()
 def article(self, req, form):
     """
     Article page.
     Washes all the parameters and stores them in journal_defaults dict
     for subsequent format_elements.
     Passes on to logic function and eventually returns HTML.
     """
     argd = wash_urlargd(form, {'name': (str, ""),
                                 'issue': (str, ""),
                                 'category': (str, ""),
                                 'number': (str, ""),
                                 'ln': (str, ""),
                                }
                         )
     try:
         ln = wash_journal_language(argd['ln'])
         journal_name = wash_journal_name(ln, argd['name'])
         issue = wash_issue_number(ln, journal_name,
                                   argd['issue'])
         issue_year = issue.split('/')[1]
         issue_number = issue.split('/')[0]
         category = wash_category(ln, argd['category'], journal_name, issue_number)
         number = wash_article_number(ln, argd['number'], journal_name)
         recid = get_recid_from_legacy_number(issue, category, int(number))
     except InvenioWebJournalNoJournalOnServerError, e:
         register_exception(req=req)
         return e.user_box(req)
Example #11
0
    def _extract_packages(self):
        """
        Extract a package in a new directory.
        """
        if not hasattr(self, "retrieved_packages_unpacked"):
            self.retrieved_packages_unpacked = [self.package_name]
        for path in self.retrieved_packages_unpacked:
            package_name = basename(path)
            self.path_unpacked = join(CFG_UNPACKED_FILES,
                                      package_name.split('.')[0])
            self.logger.debug(
                "Extracting package: %s" % (path.split("/")[-1], ))
            try:
                if "_archival_pdf" in self.path_unpacked:
                    self.path_unpacked = (
                        self.path_unpacked.rstrip("_archival_pdf"))
                    ZipFile(path).extractall(
                        join(self.path_unpacked, "archival_pdfs"))
                else:
                    ZipFile(path).extractall(self.path_unpacked)
                #TarFile.open(path).extractall(self.path_unpacked)
            except Exception:
                register_exception(
                    alert_admin=True, prefix="OUP error extracting package.")
                self.logger.error(
                    "Error extraction package file: %s" % (path, ))

        if hasattr(self, "path_unpacked"):
            return self.path_unpacked
 def search(self, req, form):
     """
     Display search interface
     """
     argd = wash_urlargd(form, {'name': (str, ""),
                                'issue': (str, ""),
                                'archive_year': (str, ""),
                                'archive_issue': (str, ""),
                                'archive_select': (str, "False"),
                                'archive_date': (str, ""),
                                'archive_search': (str, "False"),
                                'ln': (str, CFG_SITE_LANG),
                                'verbose': (int, 0)})
     try:
         # FIXME: if journal_name is empty, redirect
         ln = wash_journal_language(argd['ln'])
         washed_journal_name = wash_journal_name(ln, argd['name'])
         archive_issue = wash_issue_number(ln, washed_journal_name,
                                           argd['archive_issue'])
         archive_date = wash_archive_date(ln, washed_journal_name,
                                          argd['archive_date'])
         archive_select = argd['archive_select']
         archive_search = argd['archive_search']
     except InvenioWebJournalNoJournalOnServerError, e:
         register_exception(req=req)
         return e.user_box(req)
def Generate_Group_File(parameters, curdir, form, user_info=None):
    """
    Generates a group file (stored in 'curdir/Group') for use with
    publiline.

    @param parameters: (dictionary) - must contain:
                      + group_name: (string) - the id of the Group for
                      use in the complex approval refereeing workflow

    @param curdir: (string) - the current submission's working
    directory.

    @param form: (dictionary) - form fields.

    @param user_info: (dictionary) - various information about the
                                     submitting user (includes the
                                     apache req object).

    @return: (string) - empty string.

    @Exceptions raised: InvenioWebSubmitFunctionError when an
                        unexpected error is encountered.
    """
    try:
        group_file = open("%s/%s" % (curdir, CFG_WEBSUBMIT_GROUP_FILE_NAME), "w")
        group_file.write(parameters['group_name'])
        group_file.flush()
        group_file.close()
    except IOError, err:
        ## Unable to write the Group file to curdir.
        err_msg = "Error: Unable to create Group file [%s/%s]. " \
          "Perhaps check directory permissions. " \
          % (curdir, CFG_WEBSUBMIT_GROUP_FILE_NAME)
        register_exception(req=req_obj, prefix=err_msg)
        raise InvenioWebSubmitFunctionError(err_msg)
Example #14
0
 def run(self, run_localy=False):
     if not run_localy:
         try:
             self.connect()
             self._get_file_listing('.ready')
             self._download_file_listing()
         except LoginException as err:
             register_exception(alert_admin=True,
                                prefix=('Failed to connect to '
                                        'the Elsevier server. %s') % (err,))
             return
         except:
             self.logger.info('No new packages to process')
             return
         self._get_packages()
         self._download_tars()
         self._check_md5()
     else:
         self.logger.info("Running on local files.")
         self.retrieved_packages_unpacked = []
         self.files_list = []
         for p in listdir(CFG_TAR_FILES):
             self.retrieved_packages_unpacked.append(join(CFG_TAR_FILES, p))
         for p in listdir(CFG_READY_PACKAGES):
             self.files_list.append(p.strip(".ready.xml"))
     self._extract_packages()
     self._get_metadata_and_fulltex_dir()
Example #15
0
 def run(self, run_localy=False):
     if not run_localy:
         try:
             self.connect()
             self._get_file_listing('.ready')
             self._download_file_listing()
         except LoginException as err:
             register_exception(alert_admin=True,
                                prefix=('Failed to connect to '
                                        'the Elsevier server. %s') %
                                (err, ))
             return
         except Exception as e:
             self.logger.info('No new packages to process')
             self.logger.info('Registered error: %s' % e)
             return
         self._get_packages()
         self._download_tars()
         self._check_md5()
     else:
         self.logger.info("Running on local files.")
         self.retrieved_packages_unpacked = []
         self.files_list = []
         for p in listdir(CFG_TAR_FILES):
             self.retrieved_packages_unpacked.append(join(CFG_TAR_FILES, p))
         for p in listdir(CFG_READY_PACKAGES):
             self.files_list.append(p.strip(".ready.xml"))
     self._extract_packages()
     self._get_metadata_and_fulltex_dir()
Example #16
0
def regenerate(req, journal_name="", issue="", ln=CFG_SITE_LANG,
               confirmed_p="", publish_draft_articles_p=""):
    """
    Clears the cache for the given issue.
    """
    navtrail_previous_links = wjn.getnavtrail(' &gt; <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py">WebJournal Admin</a> &gt; <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py/administrate?journal_name=%s">%s</a>' % (CFG_SITE_URL, CFG_SITE_URL, journal_name, journal_name))

    ln = wash_language(ln)
    _ = gettext_set_language(ln)

    try:
        uid = getUid(req)
    except:
        return error_page('Error', req)

    try:
        journal_name = wash_journal_name(ln, journal_name)
        issue_number = wash_issue_number(ln, journal_name,
                                         issue)
        confirmed_p = wash_url_argument(confirmed_p, 'str') == "confirmed"
        publish_draft_articles_p = wash_url_argument(publish_draft_articles_p, 'str') == "move"

    except InvenioWebJournalNoJournalOnServerError, e:
        register_exception(req=req)
        return e.user_box()
def hocr2pdf(input_file, output_file=None, working_dir=None, font="Courier", author=None, keywords=None, subject=None, title=None, draft=False, pdfopt=True, **dummy):
    """
    @param working_dir the directory containing images to build the PDF.
    @param font the default font (e.g. Courier, Times-Roman).
    @param author the author name.
    @param subject the subject of the document.
    @param title the title of the document.
    @param draft whether to enable debug information in the output.
    """
    if working_dir:
        working_dir = os.path.abspath(working_dir)
    else:
        working_dir = os.path.abspath(os.path.dirname(input_file))

    if pdfopt:
        input_file, tmp_output_file, dummy = prepare_io(input_file, output_ext='.pdf', need_working_dir=False)
    else:
        input_file, output_file, dummy = prepare_io(input_file, output_file=output_file, need_working_dir=False)
        tmp_output_file = output_file

    try:
        create_pdf(extract_hocr(open(input_file).read()), tmp_output_file, font=font, author=author, keywords=keywords, subject=subject, title=title, image_path=working_dir, draft=draft)
    except:
        register_exception()
        raise

    if pdfopt:
        output_file = pdf2pdfopt(tmp_output_file, output_file)
        os.remove(tmp_output_file)
        return output_file
    else:
        return tmp_output_file
Example #18
0
def issue_control(req, journal_name="", issue=[],
                  ln=CFG_SITE_LANG, action="cfg"):
    """
    Page that allows full control over creating, backtracing, adding to,
    removing from issues.
    """
    navtrail_previous_links = wjn.getnavtrail(' &gt; <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py">WebJournal Admin</a> &gt; <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py/administrate?journal_name=%s">%s</a>' % (CFG_SITE_URL, CFG_SITE_URL, journal_name, journal_name))

    ln = wash_language(ln)
    _ = gettext_set_language(ln)

    try:
        uid = getUid(req)
    except:
        return error_page('Error', req)
    try:
        journal_name = wash_journal_name(ln, journal_name)
        action = wash_url_argument(action, 'str')
        issue = wash_url_argument(issue, 'list')
        issues = [wash_issue_number(ln,journal_name, _issue) \
                  for _issue in issue \
                  if _issue != "ww/YYYY"]
    except InvenioWebJournalNoJournalOnServerError, e:
        register_exception(req=req)
        return e.user_box()
Example #19
0
def format_record_for_bibedit_global(record, field):
    """
    Process the record to be manipulated by bibedit

    :param record: The record you want to manipulate.
    :param field: The field which is interesting you.
    :return: Dict with the values needed by bibedit to work.
    """
    final_shape = {}
    final_shape["fields"] = {}
    try:
        fields = CFG_ARBITRARY_AUTOSUGGEST_FIELD[field]
        final_shape["print"] = record_get_field(record,
                                                fields["main"].keys()[0])[0]
        final_shape["fields"][field[:3]] = {}
        final_shape["fields"][field[:3]][fields["main"][
            fields["main"].keys()[0]]] = [
                record_get_field(record, fields["main"].keys()[0])[0]
            ]

        for field_to_add in fields["sub"]:
            final_shape["fields"][field[:3]][field_to_add.values()[0]] = [
                record_get_field(record,
                                 field_to_add.keys()[0])[0]
            ]

    except:
        register_exception()
        final_shape["print"] = ""

    return final_shape
Example #20
0
def rank_records_obelix(user_info, hitset, rg=10, jrec=0, settings=None):
    """
    Public method
    Ranks a given search result based on recommendations
    Expects the hitset to be sorted by latest last [1,2,3,4,5] (recids)
    """
    hitset = list(hitset)
    hitset.reverse()

    jrec = max(jrec - 1, 0)

    try:
        if not settings:
            settings = ObelixSearchEngineSettings()

        uid = ""

        if CFG_WEBSEARCH_OBELIX_USER_KEY:
            if CFG_WEBSEARCH_OBELIX_USER_KEY in user_info:
                uid = user_info[CFG_WEBSEARCH_OBELIX_USER_KEY]

        if settings.recommendations_impact == 0 or uid == 0 or uid == "" or uid == "0":
            records, scores = hitset, [0] * len(hitset)

        else:
            records, scores = ObelixSearchEngine(uid, hitset, redis=settings.redis).rank()

        return records[jrec:jrec + rg], scores[jrec:jrec + rg]

    except Exception:
        register_exception(alert_admin=True)
        return hitset[jrec:jrec + rg], [0] * len(hitset[jrec:jrec + rg])
def get_oai_set(id=''):
    """Returns a row parameters for a given id"""
    sets = []
    sql = "SELECT id, setSpec, setName, setCollection, setDescription, p1,f1,m1, p2,f2,m2, p3,f3,m3, setDefinition FROM oaiREPOSITORY"
    try:
        if id:
            sql += " WHERE id=%s" % id
        sql += " ORDER BY setSpec asc"
        res = run_sql(sql)
        for row in res:
            set = ['']*16
            set[0] = row[0]
            set[1] = row[1]
            set[2] = row[2]
            params = parse_set_definition(row[14])
            set[3] = params.get('c', '')
            set[5] = params.get('p1', '')
            set[6] = params.get('f1', '')
            set[7] = params.get('m1', '')
            set[8] = params.get('p2', '')
            set[9] = params.get('f2', '')
            set[10] = params.get('m2', '')
            set[11] = params.get('p3', '')
            set[12] = params.get('f3', '')
            set[13] = params.get('m3', '')
            set[14] = params.get('op1', 'a')
            set[15] = params.get('op2', 'a')
            sets.append(set)
        return sets
    except StandardError, e:
        register_exception(alert_admin=True)
        return str(e)
Example #22
0
def log_search_result_obelix(user_info, original_result_ordered, record_ids,
                             results_final_colls_scores, cols_in_result_ordered,
                             seconds_to_rank_and_print, jrec, rg, rm, cc):
    """
    Public method
    Used to log search_results
    :param user_info:
    :param original_result_ordered:
    :param record_ids:
    :param results_final_colls_scores:
    :param cols_in_result_ordered:
    :param seconds_to_rank_and_print:
    :param jrec:
    :param rg:
    :param rm:
    :param cc:
    :return:
    """
    try:
        ObelixSearchEngineLogger().search_result(user_info, original_result_ordered,
                                                 record_ids, results_final_colls_scores,
                                                 cols_in_result_ordered, seconds_to_rank_and_print,
                                                 jrec, rg, rm, cc)
    except Exception:
        register_exception(alert_admin=True)
Example #23
0
def add_oai_set(oai_set_name, oai_set_spec, oai_set_collection,
                oai_set_description, oai_set_p1, oai_set_f1, oai_set_m1,
                oai_set_p2, oai_set_f2, oai_set_m2, oai_set_p3, oai_set_f3,
                oai_set_m3, oai_set_op1, oai_set_op2):
    """Add a definition into the OAI Repository"""
    try:
        if not oai_set_spec:
            oai_set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
        set_definition = 'c=' + oai_set_collection + ';' + \
                         'p1=' + oai_set_p1  + ';' + \
                         'f1=' + oai_set_f1  + ';' + \
                         'm1=' + oai_set_m1  + ';' + \
                         'op1='+ oai_set_op1 + ';' + \
                         'p2=' + oai_set_p2  + ';' + \
                         'f2=' + oai_set_f2  + ';' + \
                         'm2=' + oai_set_m2  + ';' + \
                         'op2='+ oai_set_op2 + ';' + \
                         'p3=' + oai_set_p3  + ';' + \
                         'f3=' + oai_set_f3  + ';' + \
                         'm3=' + oai_set_m3  + ';'

        run_sql(
            """INSERT INTO oaiREPOSITORY (id, setName, setSpec,
                           setCollection, setDescription, setDefinition,
                           setRecList, p1, f1, m1, p2, f2, m2, p3, f3, m3)
                         VALUES (0, %s, %s, %s, %s, %s, NULL, %s, %s, %s,
                           %s, %s, %s, %s, %s, %s)""",
            (oai_set_name, oai_set_spec, oai_set_collection,
             oai_set_description, set_definition, oai_set_p1, oai_set_f1,
             oai_set_m1, oai_set_p2, oai_set_f2, oai_set_m2, oai_set_p3,
             oai_set_f3, oai_set_m3))
        return (1, "")
    except StandardError, e:
        register_exception(alert_admin=True)
        return (0, e)
 def sendfile(self, path, offset=0, the_len=-1):
     try:
         self.send_http_header()
         file_to_send = open(path)
         file_to_send.seek(offset)
         file_wrapper = FileWrapper(file_to_send)
         count = 0
         if the_len < 0:
             for chunk in file_wrapper:
                 count += len(chunk)
                 self.__bytes_sent += len(chunk)
                 self.__write(chunk)
         else:
             for chunk in file_wrapper:
                 if the_len >= len(chunk):
                     the_len -= len(chunk)
                     count += len(chunk)
                     self.__bytes_sent += len(chunk)
                     self.__write(chunk)
                 else:
                     count += the_len
                     self.__bytes_sent += the_len
                     self.__write(chunk[:the_len])
                     break
     except IOError, err:
         if "failed to write data" in str(err) or "client connection closed" in str(err):
             ## Let's just log this exception without alerting the admin:
             register_exception(req=self)
         else:
             raise
Example #25
0
    def _extract_packages(self):
        """
        Extract a package in a new directory.
        """
        self.path_unpacked = []
        if not hasattr(self, "retrieved_packages_unpacked"):
            self.retrieved_packages_unpacked = [self.package_name]
        for path in self.retrieved_packages_unpacked:
            self.logger.debug("Extracting package: %s" % (path,))

            p_name = 'EPJC' if 'EPJC' in path else 'JHEP'
            p_message = 'scoap3_package_%s_%s_' % (p_name, datetime.now())

            self.path_unpacked.append(mkdtemp(prefix=p_message,
                                              dir=CFG_TMPSHAREDDIR))

            try:
                ZipFile(path).extractall(self.path_unpacked[-1])
            except Exception:
                register_exception(alert_admin=True,
                                   prefix="Springer error extracting package.")
                self.logger.error("Error extraction package file: %s"
                                  % (path,))

        return self.path_unpacked
Example #26
0
def perform_request_article(req, journal_name, issue_number, ln,
                            category, recid, editor=False, verbose=0):
    """
    Central logic function for article pages.
    Loads the format template for article display and displays the requested
    article using BibFormat.
    'Editor' mode generates edit links on the article view page and disables
    caching.
    """
    current_issue = get_current_issue(ln, journal_name)
    if not get_release_datetime(issue_number, journal_name):
        # Unreleased issue. Display latest released issue?
        unreleased_issues_mode = get_unreleased_issue_hiding_mode(journal_name)
        if not editor and \
               (unreleased_issues_mode == 'all' or \
                (unreleased_issues_mode == 'future' and \
                 issue_is_later_than(issue_number, current_issue))):
            redirect_to_url(req, "%s/journal/%s/%s/%s?ln=%s" % \
                            (CFG_SITE_URL,
                             journal_name,
                             current_issue.split('/')[1],
                             current_issue.split('/')[0],
                             ln))

    try:
        index_page_template = get_journal_template('detailed',
                                                   journal_name,
                                                   ln)
    except InvenioWebJournalTemplateNotFoundError, e:
        register_exception(req=req)
        return e.user_box()
def insert_cit_ref_list_intodb(citation_dic, reference_dic, selfcbdic, selfdic,
                               authorcitdic):
    """Insert the reference and citation list into the database"""
    insert_into_cit_db(reference_dic, "reversedict")
    insert_into_cit_db(citation_dic, "citationdict")
    insert_into_cit_db(selfcbdic, "selfcitedbydict")
    insert_into_cit_db(selfdic, "selfcitdict")

    for a in authorcitdic.keys():
        lserarr = (serialize_via_marshal(authorcitdic[a]))
        #author name: replace " with something else
        a.replace('"', '\'')
        a = unicode(a, 'utf-8')
        try:
            ablob = run_sql(
                "select hitlist from rnkAUTHORDATA where aterm = %s", (a, ))
            if not (ablob):
                #print "insert into rnkAUTHORDATA(aterm,hitlist) values (%s,%s)" , (a,lserarr)
                run_sql(
                    "insert into rnkAUTHORDATA(aterm,hitlist) values (%s,%s)",
                    (a, lserarr))
            else:
                #print "UPDATE rnkAUTHORDATA SET hitlist  = %s where aterm=%s""" , (lserarr,a)
                run_sql(
                    "UPDATE rnkAUTHORDATA SET hitlist  = %s where aterm=%s",
                    (lserarr, a))
        except:
            register_exception(
                prefix="could not read/write rnkAUTHORDATA aterm=" + a +
                " hitlist=" + str(lserarr),
                alert_admin=True)
Example #28
0
def perform_request_index(req, journal_name, issue_number, ln,
                          category, editor=False, verbose=0):
    """
    Central logic function for index pages.
    Brings together format templates and MARC rules from the config, with
    the requested index page, given by the url parameters.
    From config:
        - page template for index pages -> formatting
        - MARC rule list -> Category Navigation
        - MARC tag used for issue numbers -> search (later in the format
          elements)
    Uses BibFormatObject and format_with_format_template to produce the
    required HTML.
    """
    current_issue = get_current_issue(ln, journal_name)
    if not get_release_datetime(issue_number, journal_name):
        # Unreleased issue. Display latest released issue?
        unreleased_issues_mode = get_unreleased_issue_hiding_mode(journal_name)
        if not editor and \
               (unreleased_issues_mode == 'all' or \
                (unreleased_issues_mode == 'future' and \
                 issue_is_later_than(issue_number, current_issue))):
            redirect_to_url(req, "%s/journal/%s/%s/%s?ln=%s" % \
                            (CFG_SITE_URL,
                             journal_name,
                             current_issue.split('/')[1],
                             current_issue.split('/')[0],
                             ln))
    try:
        index_page_template = get_journal_template('index',
                                                   journal_name,
                                                   ln)
    except InvenioWebJournalTemplateNotFoundError, e:
        register_exception(req=req)
        return e.user_box()
def render_self_citations(d_recids, d_total_recs, ln):
    try:
        tags = get_authors_tags()
    except IndexError, e:
        register_exception(prefix="attribute " + \
            str(e) + " missing in config", alert_admin=True)
        return ""
Example #30
0
def perform_request_create_group(uid,
                                 group_name,
                                 group_description,
                                 join_policy,
                                 ln=CFG_SITE_LANG):
    """Create new group.
    @param group_name: name of the group entered
    @param group_description: description of the group entered
    @param join_policy: join  policy of the group entered
    @param ln: language
    @return: body with warnings
    warning != [] if group_name or join_policy are not valid
    or if the name already exists in the database
    body="1" if succeed in order to display info on the main page
    """
    _ = gettext_set_language(ln)
    body = ""
    warnings = []
    infos = []
    if group_name == "":
        try:
            raise InvenioWebSessionWarning(_('Please enter a group name.'))
        except InvenioWebSessionWarning, exc:
            register_exception(stream='warning')
            warnings.append(exc.message)
        body = perform_request_input_create_group(group_name,
                                                  group_description,
                                                  join_policy,
                                                  warnings=warnings)
Example #31
0
    def _download_tars(self, check_integrity=True):
        if check_integrity:
            self.ftp.check_pkgs_integrity(self.retrieved_packages, self.logger)

        print("Downloading %i tar packages." % (len(self.retrieved_packages)))
        # Create progrss bar
        total_count = len(self.files_list)

        for i, filename in enumerate(self.retrieved_packages.iterkeys(),
                                     start=1):
            self.logger.info("Downloading tar package %s of %s: %s" % (
                i,
                total_count,
                filename,
            ))
            unpack_path = join(CFG_TAR_FILES, filename)
            self.retrieved_packages_unpacked.append(unpack_path)
            try:
                self.ftp.download(filename, CFG_TAR_FILES)
                self.retrieved_packages_unpacked.append(unpack_path)
                self.packages_delivery.append((filename[0:-4], datetime.now()))
            except:
                register_exception(alert_admin=True,
                                   prefix="Elsevier package download failed.")
                self.logger.error("Error downloading tar file %s of %s: %s" % (
                    i,
                    total_count,
                    filename,
                ))
                print(sys.exc_info())

        return self.retrieved_packages_unpacked
Example #32
0
def perform_request_leave_group(uid, grpID, confirmed=0, ln=CFG_SITE_LANG):
    """Leave group.
    @param uid: user ID
    @param grpID: ID of the group the user wants to leave
    @param warnings: warnings != [] if 0 group is selected
    @param confirmed: a confirmed page is first displayed
    @param ln: language
    @return: body with warnings
    """
    _ = gettext_set_language(ln)
    body = ""
    warnings = []
    infos = []
    if not grpID == -1:
        if confirmed:
            db.leave_group(grpID, uid)
            infos.append(CFG_WEBSESSION_INFO_MESSAGES["LEAVE_GROUP"])
            body = perform_request_groups_display(uid,
                                                  infos=infos,
                                                  warnings=warnings,
                                                  ln=ln)
        else:
            body = websession_templates.tmpl_confirm_leave(uid, grpID, ln)
    else:
        try:
            raise InvenioWebSessionWarning(_('Please select one group.'))
        except InvenioWebSessionWarning, exc:
            register_exception(stream='warning')
            warnings.append(exc.message)
        body = perform_request_input_leave_group(uid, warnings=warnings, ln=ln)
def _create_icon(file_path, icon_size, format='gif', verbosity=9):
    """
    Creates icon of given file.

    Returns path to the icon. If creation fails, return None, and
    register exception (send email to admin).

    Parameters:

       - file_path : *str* full path to icon

       - icon_size : *int* the scaling information to be used for the
                     creation of the new icon.

       - verbosity : *int* the verbosity level under which the program
                     is to run;
    """
    icon_path = None
    try:
        filename = os.path.splitext(os.path.basename(file_path))[0]
        (icon_dir, icon_name) = create_icon(
            {'input-file':file_path,
             'icon-name': "icon-%s" % filename,
             'multipage-icon': False,
             'multipage-icon-delay': 0,
             'icon-scale': icon_size,
             'icon-file-format': format,
             'verbosity': verbosity})
        icon_path = icon_dir + os.sep + icon_name
    except InvenioWebSubmitIconCreatorError, e:
        register_exception(prefix='Icon for file %s could not be created: %s' % \
                           (file_path, str(e)),
                           alert_admin=False)
Example #34
0
def perform_request_update_group(uid,
                                 grpID,
                                 group_name,
                                 group_description,
                                 join_policy,
                                 ln=CFG_SITE_LANG):
    """Update group datas in database.
    @param uid: user ID
    @param grpID: ID of the group
    @param group_name: name of the group
    @param group_description: description of the group
    @param join_policy: join policy of the group
    @param ln: language
    @return: body with warnings
    """
    body = ''
    warnings = []
    infos = []
    _ = gettext_set_language(ln)
    group_name_available = db.group_name_exist(group_name)
    if group_name == "":
        try:
            raise InvenioWebSessionWarning(_('Please enter a group name.'))
        except InvenioWebSessionWarning, exc:
            register_exception(stream='warning')
            warnings.append(exc.message)
        body = perform_request_edit_group(uid, grpID, warnings=warnings, ln=ln)
Example #35
0
    def calculate_RFC2104_HMAC(data, _amazon_secret_access_key):
        """
        Computes a RFC 2104 compliant HMAC Signature and then Base64
        encodes it.

        Module hashlib must be installed if Python < 2.5
        <http://pypi.python.org/pypi/hashlib/20081119>

        @param data: data to sign
        @param _amazon_secret_access_key: your Amazon secret key

        @type data: string
        @type _amazon_secret_access_key: string. Empty if hashlib module not installed
        """
        if not HASHLIB_IMPORTED:
            try:
                raise Exception("Module hashlib not installed. Please install it.")
            except:
                from invenio.errorlib import register_exception
                register_exception(stream='warning', alert_admin=True, subject='Cannot create AWS signature')
                return ""
        else:
            if sys.version_info < (2, 5):
                # compatibility mode for Python < 2.5 and hashlib
                my_digest_algo = _MySHA256(sha256())
            else:
                my_digest_algo = sha256

        return base64.encodestring(hmac.new(_amazon_secret_access_key,
                                            data, my_digest_algo).digest()).strip()
Example #36
0
def perform_request_delete_group(uid, grpID, confirmed=0, ln=CFG_SITE_LANG):
    """First display confirm message(confirmed=0).
    then(confirmed=1) delete group and all its members
    @param uid: user ID
    @param grpID: ID of the group
    @param confirmed: =1 if confirmed message has been previously displayed
    @param ln: language
    @return: body with warnings
    """
    body = ""
    warnings = []
    infos = []
    _ = gettext_set_language(ln)
    group_infos = db.get_group_infos(grpID)
    user_status = db.get_user_status(uid, grpID)
    if not group_infos:
        try:
            raise InvenioWebSessionWarning(
                _('The group has already been deleted.'))
        except InvenioWebSessionWarning, exc:
            register_exception(stream='warning')
            warnings.append(exc.message)
        body = perform_request_groups_display(uid,
                                              infos=infos,
                                              warnings=warnings,
                                              ln=CFG_SITE_LANG)
Example #37
0
    def get_publication_information(self, xml):
        jid = get_value_in_tag(xml, "journal-title")
        journal = ""
        if "European Physical Journal" in jid:
            journal = "EPJC"

        try:
            art = xml.getElementsByTagName('article-meta')[0]
        except IndexError as err:
            register_exception()
            print >> sys.stderr, "ERROR: XML corrupted: %s" % err
            pass
        except Exception as err:
            register_exception()
            print >> sys.stderr, "ERROR: Exception captured: %s" % err
            pass

        issn = self.get_issn(art)
        volume = get_value_in_tag(art, "volume")
        issue = get_value_in_tag(art, "issue")
        year = self.get_date(art)
        first_page = get_value_in_tag(art, "fpage")
        last_page = get_value_in_tag(art, "lpage")
        doi = self.get_doi(art)

        return (journal, issn, volume, issue, first_page, last_page, year, doi)
Example #38
0
def process_alerts(alerts):
    """Process the given alerts and store the records found to the user defined baskets
    and/or notify them by e-mail"""

    # TBD: do not generate the email each time, forge it once and then
    # send it to all appropriate people

    for a in alerts['alerts']:
        if alert_use_basket_p(a):
            add_records_to_basket(alerts['records'], a[2])
        if alert_use_notification_p(a):
            argstr = update_arguments(alerts['argstr'], alerts['date_from'],
                                      alerts['date_until'])
            try:
                email_notify(a, alerts['records'], argstr)
            except Exception:
                # There were troubles sending this alert, so register
                # this exception and continue with other alerts:
                register_exception(alert_admin=True,
                                   prefix="Error when sending alert %s, %s\n." % \
                                   (repr(a), repr(argstr)))
        # Inform the admin when external collections time out
        if len(alerts['records'][1][1]) > 0:
            register_exception(alert_admin=True,
                               prefix="External collections %s timed out when sending alert %s, %s\n." % \
                                      (", ".join(alerts['records'][1][1]), repr(a), repr(argstr)))

        update_date_lastrun(a)
Example #39
0
def record_get_field(record, field):
    values = []
    fa = field[:3]
    ind1 = None
    ind2 = None
    fb = None
    try:
        ind1 = field[3]
        ind2 = field[4]
        fb = field[5]
    except:
        pass
    if ind1 == "_":
        ind1 = " "
    if ind2 == "_":
        ind2 = " "
    fields = record.get(fa)
    if fields:
        for field in fields:
            if fa[:2] != "00":
                try:
                    if (ind1 and ind2 and ind1 == field.ind1
                            and ind2 == field.ind2) or (not ind1 and not ind2):
                        if fb:
                            values.append(field.find_subfields(fb)[0].value)
                        else:
                            values.append(" ".join(x.value
                                                   for x in field.subfields))
                except:
                    register_exception()
            else:
                values.append(field.value)
    return values
Example #40
0
    def _crawl_elsevier_and_find_main_xml(self):
        """
        A package contains several subdirectory corresponding to each article.
        An article is actually identified by the existence of a main.pdf and
        a main.xml in a given directory.
        """
        self.found_articles = []
        if not self.path and not self.package_name:
            for doc in self.conn.found_articles:
                dirname = doc['xml'].rstrip('/main.xml')
                try:
                    self._normalize_article_dir_with_dtd(dirname)
                    self.found_articles.append(dirname)
                except Exception as err:
                    register_exception()
                    print("ERROR: can't normalize %s: %s" % (dirname, err))
        else:

            def visit(dummy, dirname, names):
                if "main.xml" in names and "main.pdf" in names:
                    try:
                        self._normalize_article_dir_with_dtd(dirname)
                        self.found_articles.append(dirname)
                    except Exception as err:
                        register_exception()
                        print("ERROR: can't normalize %s: %s" % (dirname, err))

            walk(self.path, visit, None)
Example #41
0
def bst_openaire_altmetric():
    """
    """
    recids = search_pattern(p="0->Z", f="0247_a")
    a = Altmetric()

    for recid in recids:
        try:
            # Check if we already have an Altmetric id
            sysno_inst = get_fieldvalues(recid, "035__9")
            if ['Altmetric'] in sysno_inst:
                continue

            doi_val = get_fieldvalues(recid, "0247_a")[0]
            json_res = a.doi(doi_val)

            rec = {}
            record_add_field(rec, "001", controlfield_value=str(recid))

            if json_res:
                record_add_field(rec,
                                 '035',
                                 subfields=[('a',
                                             str(json_res['altmetric_id'])),
                                            ('9', 'Altmetric')])
                bibupload(rec, opt_mode='correct')
        except AltmetricHTTPException, e:
            register_exception(prefix='Altmetric error (status code %s): %s' %
                               (e.status_code, str(e)),
                               alert_admin=False)
Example #42
0
    def _crawl_elsevier_and_find_issue_xml(self):
        """
        Information about the current volume, issue, etc. is available
        in a file called issue.xml that is available in a higher directory.
        """
        self._found_issues = []
        if not self.path and not self.package_name:
            for issue in self.conn._get_issues():
                dirname = issue.rstrip('/issue.xml')
                try:
                    self._normalize_issue_dir_with_dtd(dirname)
                    self._found_issues.append(dirname)
                except Exception as err:
                    register_exception()
                    print("ERROR: can't normalize %s: %s" % (dirname, err))
        else:

            def visit(dummy, dirname, names):
                if "issue.xml" in names:
                    try:
                        self._normalize_issue_dir_with_dtd(dirname)
                        self._found_issues.append(dirname)
                    except Exception as err:
                        register_exception()
                        print("ERROR: can't normalize %s: %s" % (dirname, err))

            walk(self.path, visit, None)
def add_oai_set(oai_set_name, oai_set_spec, oai_set_collection,
                oai_set_description, oai_set_p1, oai_set_f1,oai_set_m1,
                oai_set_p2, oai_set_f2,oai_set_m2, oai_set_p3,
                oai_set_f3, oai_set_m3, oai_set_op1, oai_set_op2):
    """Add a definition into the OAI Repository"""
    try:
        if not oai_set_spec:
            oai_set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
        set_definition = 'c=' + oai_set_collection + ';' + \
                         'p1=' + oai_set_p1  + ';' + \
                         'f1=' + oai_set_f1  + ';' + \
                         'm1=' + oai_set_m1  + ';' + \
                         'op1='+ oai_set_op1 + ';' + \
                         'p2=' + oai_set_p2  + ';' + \
                         'f2=' + oai_set_f2  + ';' + \
                         'm2=' + oai_set_m2  + ';' + \
                         'op2='+ oai_set_op2 + ';' + \
                         'p3=' + oai_set_p3  + ';' + \
                         'f3=' + oai_set_f3  + ';' + \
                         'm3=' + oai_set_m3  + ';'

        run_sql("""INSERT INTO oaiREPOSITORY (id, setName, setSpec,
                           setCollection, setDescription, setDefinition,
                           setRecList, p1, f1, m1, p2, f2, m2, p3, f3, m3)
                         VALUES (0, %s, %s, %s, %s, %s, NULL, %s, %s, %s,
                           %s, %s, %s, %s, %s, %s)""",
                      (oai_set_name, oai_set_spec, oai_set_collection,
                       oai_set_description, set_definition, oai_set_p1,
                       oai_set_f1, oai_set_m1, oai_set_p2, oai_set_f2,
                       oai_set_m2, oai_set_p3, oai_set_f3, oai_set_m3))
        return (1, "")
    except StandardError, e:
        register_exception(alert_admin=True)
        return (0, e)
Example #44
0
def rank_records_obelix(user_info, hitset, rg=10, jrec=0, settings=None):
    """
    Public method
    Ranks a given search result based on recommendations
    Expects the hitset to be sorted by latest last [1,2,3,4,5] (recids)
    """
    hitset = list(hitset)
    hitset.reverse()

    jrec = max(jrec - 1, 0)

    try:
        if not settings:
            settings = ObelixSearchEngineSettings()

        uid = ""

        if CFG_WEBSEARCH_OBELIX_USER_KEY:
            if CFG_WEBSEARCH_OBELIX_USER_KEY in user_info:
                uid = user_info[CFG_WEBSEARCH_OBELIX_USER_KEY]

        if settings.recommendations_impact == 0 or uid == 0 or uid == "" or uid == "0":
            records, scores = hitset, [0] * len(hitset)

        else:
            records, scores = ObelixSearchEngine(uid,
                                                 hitset,
                                                 redis=settings.redis).rank()

        return records[jrec:jrec + rg], scores[jrec:jrec + rg]

    except Exception:
        register_exception(alert_admin=True)
        return hitset[jrec:jrec + rg], [0] * len(hitset[jrec:jrec + rg])
Example #45
0
def get_oai_set(id=''):
    """Returns a row parameters for a given id"""
    sets = []
    sql = "SELECT id, setSpec, setName, setCollection, setDescription, p1,f1,m1, p2,f2,m2, p3,f3,m3, setDefinition FROM oaiREPOSITORY"
    try:
        if id:
            sql += " WHERE id=%s" % id
        sql += " ORDER BY setSpec asc"
        res = run_sql(sql)
        for row in res:
            set = [''] * 16
            set[0] = row[0]
            set[1] = row[1]
            set[2] = row[2]
            params = parse_set_definition(row[14])
            set[3] = params.get('c', '')
            set[5] = params.get('p1', '')
            set[6] = params.get('f1', '')
            set[7] = params.get('m1', '')
            set[8] = params.get('p2', '')
            set[9] = params.get('f2', '')
            set[10] = params.get('m2', '')
            set[11] = params.get('p3', '')
            set[12] = params.get('f3', '')
            set[13] = params.get('m3', '')
            set[14] = params.get('op1', 'a')
            set[15] = params.get('op2', 'a')
            sets.append(set)
        return sets
    except StandardError, e:
        register_exception(alert_admin=True)
        return str(e)
Example #46
0
def log_search_result_obelix(user_info, original_result_ordered, record_ids,
                             results_final_colls_scores,
                             cols_in_result_ordered, seconds_to_rank_and_print,
                             jrec, rg, rm, cc):
    """
    Public method
    Used to log search_results
    :param user_info:
    :param original_result_ordered:
    :param record_ids:
    :param results_final_colls_scores:
    :param cols_in_result_ordered:
    :param seconds_to_rank_and_print:
    :param jrec:
    :param rg:
    :param rm:
    :param cc:
    :return:
    """
    try:
        ObelixSearchEngineLogger().search_result(
            user_info, original_result_ordered, record_ids,
            results_final_colls_scores, cols_in_result_ordered,
            seconds_to_rank_and_print, jrec, rg, rm, cc)
    except Exception:
        register_exception(alert_admin=True)
 def search(self, req, form):
     """
     Display search interface
     """
     argd = wash_urlargd(
         form,
         {
             "name": (str, ""),
             "issue": (str, ""),
             "archive_year": (str, ""),
             "archive_issue": (str, ""),
             "archive_select": (str, "False"),
             "archive_date": (str, ""),
             "archive_search": (str, "False"),
             "ln": (str, CFG_SITE_LANG),
             "verbose": (int, 0),
         },
     )
     try:
         # FIXME: if journal_name is empty, redirect
         ln = wash_journal_language(argd["ln"])
         washed_journal_name = wash_journal_name(ln, argd["name"])
         archive_issue = wash_issue_number(ln, washed_journal_name, argd["archive_issue"])
         archive_date = wash_archive_date(ln, washed_journal_name, argd["archive_date"])
         archive_select = argd["archive_select"]
         archive_search = argd["archive_search"]
     except InvenioWebJournalNoJournalOnServerError, e:
         register_exception(req=req)
         return e.user_box(req)
def insert_cit_ref_list_intodb(citation_dic, reference_dic, selfcbdic,
                               selfdic, authorcitdic):
    """Insert the reference and citation list into the database"""
    insert_into_cit_db(reference_dic,"reversedict")
    insert_into_cit_db(citation_dic,"citationdict")
    insert_into_cit_db(selfcbdic,"selfcitedbydict")
    insert_into_cit_db(selfdic,"selfcitdict")

    for a in authorcitdic.keys():
        lserarr = (serialize_via_marshal(authorcitdic[a]))
        #author name: replace " with something else
        a.replace('"', '\'')
        a = unicode(a, 'utf-8')
        try:
            ablob = run_sql("select hitlist from rnkAUTHORDATA where aterm = %s", (a,))
            if not (ablob):
                #print "insert into rnkAUTHORDATA(aterm,hitlist) values (%s,%s)" , (a,lserarr)
                run_sql("insert into rnkAUTHORDATA(aterm,hitlist) values (%s,%s)",
                         (a,lserarr))
            else:
                #print "UPDATE rnkAUTHORDATA SET hitlist  = %s where aterm=%s""" , (lserarr,a)
                run_sql("UPDATE rnkAUTHORDATA SET hitlist  = %s where aterm=%s",
                        (lserarr,a))
        except:
            register_exception(prefix="could not read/write rnkAUTHORDATA aterm="+a+" hitlist="+str(lserarr), alert_admin=True)
 def _crawl_elsevier_and_find_issue_xml(self):
     """
     Information about the current volume, issue, etc. is available
     in a file called issue.xml that is available in a higher directory.
     """
     self._found_issues = []
     if not self.path and not self.package_name:
         for issue in self.conn._get_issues():
             dirname = issue.rstrip('/issue.xml')
             try:
                 self._normalize_issue_dir_with_dtd(dirname)
                 self._found_issues.append(dirname)
             except Exception as err:
                 register_exception()
                 print("ERROR: can't normalize %s: %s" % (dirname, err))
     else:
         def visit(dummy, dirname, names):
             if "issue.xml" in names:
                 try:
                     self._normalize_issue_dir_with_dtd(dirname)
                     self._found_issues.append(dirname)
                 except Exception as err:
                     register_exception()
                     print("ERROR: can't normalize %s: %s"
                           % (dirname, err))
         walk(self.path, visit, None)
    def sub(self, req, form):
        """DEPRECATED: /submit/sub is deprecated now, so raise email to the admin (but allow submission to continue anyway)"""
        args = wash_urlargd(form, {'password': (str, '')})
        uid = getUid(req)
        if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
            return page_not_authorized(req, "../sub/", navmenuid='submit')
        try:
            raise DeprecationWarning, 'submit/sub handler has been used. Please use submit/direct. e.g. "submit/sub?RN=123@SBIFOO" -> "submit/direct?RN=123&sub=SBIFOO"'
        except DeprecationWarning:
            register_exception(req=req, alert_admin=True)

        ln = args['ln']
        _ = gettext_set_language(ln)
        #DEMOBOO_RN=DEMO-BOOK-2008-001&ln=en&password=1223993532.26572%40APPDEMOBOO
        params = dict(form)
        password = args['password']
        if password:
            del params['password']
            if "@" in password:
                params['access'], params['sub'] = password.split('@', 1)
            else:
                params['sub'] = password
        else:
            args = str(req.args).split('@')
            if len(args) > 1:
                params = {'sub': args[-1]}
                args = '@'.join(args[:-1])
                params.update(cgi.parse_qs(args))
            else:
                return warning_page(_("Sorry, invalid URL..."), req, ln=ln)
        url = "%s/submit/direct?%s" % (CFG_SITE_SECURE_URL,
                                       urlencode(params, doseq=True))
        redirect_to_url(req, url)
def process_alerts(alerts):
    """Process the given alerts and store the records found to the user defined baskets
    and/or notify them by e-mail"""

    # TBD: do not generate the email each time, forge it once and then
    # send it to all appropriate people

    for a in alerts['alerts']:
        if alert_use_basket_p(a):
            add_records_to_basket(alerts['records'], a[2])
        if alert_use_notification_p(a):
            argstr = update_arguments(alerts['argstr'], alerts['date_from'], alerts['date_until'])
            try:
                email_notify(a, alerts['records'], argstr)
            except Exception:
                # There were troubles sending this alert, so register
                # this exception and continue with other alerts:
                register_exception(alert_admin=True,
                                   prefix="Error when sending alert %s, %s\n." % \
                                   (repr(a), repr(argstr)))
        # Inform the admin when external collections time out
        if len(alerts['records'][1][1]) > 0:
            register_exception(alert_admin=True,
                               prefix="External collections %s timed out when sending alert %s, %s\n." % \
                                      (", ".join(alerts['records'][1][1]), repr(a), repr(argstr)))

        update_date_lastrun(a)
Example #52
0
def perform_request_index(req, journal_name, issue_number, ln, category, editor=False, verbose=0):
    """
    Central logic function for index pages.
    Brings together format templates and MARC rules from the config, with
    the requested index page, given by the url parameters.
    From config:
        - page template for index pages -> formatting
        - MARC rule list -> Category Navigation
        - MARC tag used for issue numbers -> search (later in the format
          elements)
    Uses BibFormatObject and format_with_format_template to produce the
    required HTML.
    """
    current_issue = get_current_issue(ln, journal_name)
    if not get_release_datetime(issue_number, journal_name):
        # Unreleased issue. Display latest released issue?
        unreleased_issues_mode = get_unreleased_issue_hiding_mode(journal_name)
        if not editor and (
            unreleased_issues_mode == "all"
            or (unreleased_issues_mode == "future" and issue_is_later_than(issue_number, current_issue))
        ):
            redirect_to_url(
                req,
                "%s/journal/%s/%s/%s?ln=%s"
                % (CFG_SITE_URL, journal_name, current_issue.split("/")[1], current_issue.split("/")[0], ln),
            )
    try:
        index_page_template = get_journal_template("index", journal_name, ln)
    except InvenioWebJournalTemplateNotFoundError, e:
        register_exception(req=req)
        return e.user_box(req)
def insert_into_cit_db(dic, name):
    """an aux thing to avoid repeating code"""
    ndate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    try:
        s = serialize_via_marshal(dic)
        write_message("size of " + name + " " + str(len(s)))
        #check that this column really exists
        testres = run_sql(
            "select object_name from rnkCITATIONDATA where object_name = %s",
            (name, ))
        if testres:
            run_sql(
                "UPDATE rnkCITATIONDATA SET object_value = %s where object_name = %s",
                (s, name))
        else:
            #there was no entry for name, let's force..
            run_sql(
                "INSERT INTO rnkCITATIONDATA(object_name,object_value) values (%s,%s)",
                (name, s))
        run_sql(
            "UPDATE rnkCITATIONDATA SET last_updated = %s where object_name = %s",
            (ndate, name))
    except:
        register_exception(prefix="could not write " + name + " into db",
                           alert_admin=True)
Example #54
0
def perform_request_article(req, journal_name, issue_number, ln, category, recid, editor=False, verbose=0):
    """
    Central logic function for article pages.
    Loads the format template for article display and displays the requested
    article using BibFormat.
    'Editor' mode generates edit links on the article view page and disables
    caching.
    """
    current_issue = get_current_issue(ln, journal_name)
    if not get_release_datetime(issue_number, journal_name):
        # Unreleased issue. Display latest released issue?
        unreleased_issues_mode = get_unreleased_issue_hiding_mode(journal_name)
        if not editor and (
            unreleased_issues_mode == "all"
            or (unreleased_issues_mode == "future" and issue_is_later_than(issue_number, current_issue))
        ):
            redirect_to_url(
                req,
                "%s/journal/%s/%s/%s?ln=%s"
                % (CFG_SITE_URL, journal_name, current_issue.split("/")[1], current_issue.split("/")[0], ln),
            )

    try:
        index_page_template = get_journal_template("detailed", journal_name, ln)
    except InvenioWebJournalTemplateNotFoundError, e:
        register_exception(req=req)
        return e.user_box(req)
    def sub(self, req, form):
        """DEPRECATED: /submit/sub is deprecated now, so raise email to the admin (but allow submission to continue anyway)"""
        args = wash_urlargd(form, {"password": (str, "")})
        uid = getUid(req)
        if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
            return page_not_authorized(req, "../sub/", navmenuid="submit")
        try:
            raise DeprecationWarning, 'submit/sub handler has been used. Please use submit/direct. e.g. "submit/sub?RN=123@SBIFOO" -> "submit/direct?RN=123&sub=SBIFOO"'
        except DeprecationWarning:
            register_exception(req=req, alert_admin=True)

        ln = args["ln"]
        _ = gettext_set_language(ln)
        # DEMOBOO_RN=DEMO-BOOK-2008-001&ln=en&password=1223993532.26572%40APPDEMOBOO
        params = dict(form)
        password = args["password"]
        if password:
            del params["password"]
            if "@" in password:
                params["access"], params["sub"] = password.split("@", 1)
            else:
                params["sub"] = password
        else:
            args = str(req.args).split("@")
            if len(args) > 1:
                params = {"sub": args[-1]}
                args = "@".join(args[:-1])
                params.update(cgi.parse_qs(args))
            else:
                return warningMsg(_("Sorry, invalid URL..."), req, ln=ln)
        url = "%s/submit/direct?%s" % (CFG_SITE_URL, urlencode(params, doseq=True))
        redirect_to_url(req, url)
Example #56
0
    def _get_metadata_and_fulltex_dir(self):
        # Prints stuff
        print >> sys.stdout, "\nRetrieving journal items directories."
        # Create progrss bar
        p_bar = progress_bar(len(self.files_list))
        # Print stuff
        sys.stdout.write(p_bar.next())
        sys.stdout.flush()

        print self.path_unpacked
        print self.files_list
        for name in self.files_list:
            dataset_link = join(self.path_unpacked, name.split('.')[0], 'dataset.xml')

            try:
                dataset_xml = parse(dataset_link)
            except Exception, err:
                register_exception(alert_admin=True, prefix="Elsevier error reading dataset.xml file.")
                self.logger.error("Error reading dataset.xml file: %s" % (dataset_link,))
                print >> sys.stdout, "\nError reading dataset.xml file: %s" % (dataset_link,)
                continue

            # created = get_value_in_tag(dataset_xml.getElementsByTagName('dataset-unique-ids')[0], 'timestamp')
            journal_items = dataset_xml.getElementsByTagName('journal-item')
            self.logger.info("Getting metadata and fulltex directories for %i journal items." % (len(journal_items),))
            for journal_item in journal_items:
                xml_pathname = join(self.path_unpacked, name.split('.')[0], xml_to_text(journal_item.getElementsByTagName('ml')[0].getElementsByTagName('pathname')[0]))
                pdf_pathname = join(self.path_unpacked, name.split('.')[0], xml_to_text(journal_item.getElementsByTagName('web-pdf')[0].getElementsByTagName('pathname')[0]))
                self.found_articles.append(dict(xml=xml_pathname, pdf=pdf_pathname))
            self.logger.info("Got metadata and fulltex directories of %i journals." % (len(self.found_articles),))
            # Print stuff
            sys.stdout.write(p_bar.next())
            sys.stdout.flush()