def setUp(self): """Initialize stuff.""" from invenio import config self.original_tmpdir = config.CFG_TMPDIR config.CFG_TMPDIR = tempfile.gettempdir() self.oldstdout = sys.stdout self.oldstderr = sys.stderr self.stdout = None self.stderr = None self.taxonomy_name = "test" from invenio.legacy.bibclassify import config as bconfig self.log = bconfig.get_logger("bibclassify.tests") self.log_level = bconfig.logging_level bconfig.set_global_level(bconfig.logging.CRITICAL) self.app.extensions['registry']['classifierext.taxonomies'] = \ taxonomies_registry()
def setUp(self): """Initialize stuff""" from invenio import config self.original_tmpdir = config.CFG_TMPDIR config.CFG_TMPDIR = tempfile.gettempdir() self.oldstdout = sys.stdout self.oldstderr = sys.stderr self.stdout = None self.stderr = None self.taxonomy_name = "test" from invenio.legacy.bibclassify import config as bconfig self.log = bconfig.get_logger("bibclassify.tests") self.log_level = bconfig.logging_level bconfig.set_global_level(bconfig.logging.CRITICAL) self.app.extensions['registry']['classifierext.taxonomies'] = \ taxonomies_registry()
def setUp(self): """Initialize stuff""" ## NOTE next time please make sure that you change global variables ## back to initial values in tearDown. Thank you!!! from invenio import config self.__CFG_TMPDIR = config.CFG_TMPDIR config.CFG_TMPDIR = tempfile.gettempdir() self.oldstdout = sys.stdout self.oldstderr = sys.stderr self.stdout = None self.stderr = None self.taxonomy_name = "test" from invenio.legacy.bibclassify import config as bconfig self.log = bconfig.get_logger("bibclassify.tests") self.log_level = bconfig.logging_level bconfig.set_global_level(bconfig.logging.CRITICAL) self.app.extensions['registry']['classifierext.taxonomies'] = \ taxonomies_registry()
This module is STANDALONE safe """ import os import re from invenio.legacy.bibclassify import config as bconfig if bconfig.STANDALONE: from urllib2 import urlopen else: from invenio.utils.url import make_invenio_opener urlopen = make_invenio_opener('BibClassify').open log = bconfig.get_logger("bibclassify.text_extractor") _ONE_WORD = re.compile("[A-Za-z]{2,}") def is_pdf(document): """Check if a document is a PDF file and return True if is is.""" if not executable_exists('pdftotext'): log.warning("GNU file was not found on the system. " "Switching to a weak file extension test.") if document.lower().endswith(".pdf"): return True return False # Tested with file version >= 4.10. First test is secure and works # with file version 4.25. Second condition is tested for file # version 4.10.
import six from cgi import escape from invenio.base.i18n import gettext_set_language from invenio.legacy.bibdocfile.api import BibRecDocs from invenio.legacy.search_engine import get_record from invenio.legacy.template import load from invenio.ext.legacy.handler import wash_urlargd import invenio.modules.access.engine as acce from invenio.legacy.bibsched import bibtask from invenio.legacy.bibupload.engine import open_marc_file, xml_marc_to_records from invenio.legacy import bibrecord, dbquery from invenio.legacy.bibclassify.engine import get_tmp_file, build_marc, _parse_marc_code from invenio.legacy.bibclassify import (config as bconfig, ontology_reader as bor) log = bconfig.get_logger("bibclassify.webinterface") template = load('bibclassify') def main_page(req, recid, tabs, ln, template): """Generate the main page for the keyword tab Url style : http://url/record/[recid]/keywords :param req: request object :param recid: int docid :param tabs: list of tab links :param ln: language id :param template: template object :return: nothing, writes using req object """
import os import re import sys import tempfile import time import urllib2 import traceback import xml.sax import thread import rdflib from invenio.legacy.bibclassify import config as bconfig from invenio.modules.classifier.errors import TaxonomyError log = bconfig.get_logger("bibclassify.ontology_reader") from invenio import config from invenio.modules.classifier.registry import taxonomies # only if not running in a stanalone mode if bconfig.STANDALONE: dbquery = None from urllib2 import urlopen else: from invenio.legacy import dbquery from invenio.utils.url import make_invenio_opener urlopen = make_invenio_opener('BibClassify').open _contains_digit = re.compile("\d")
BibClassify command-line interface. This modules provides a CLI for BibClassify. It reads the options and calls the method output_keywords_for_sources from bibclassify_engine. This module is STANDALONE safe. """ from __future__ import print_function import getopt import sys from invenio.legacy.bibclassify import config as bconfig log = bconfig.get_logger("bibclassify.cli") from invenio.legacy.bibclassify import engine from invenio.legacy.bibclassify import ontology_reader as reader daemon = None def get_recids_list(recids_string): """Return a list of recIDs.""" recids = {} elements = recids_string.split(",") for element in elements: bounds = element.split("-") bounds_nb = len(bounds) if bounds_nb == 1:
import os import re import sys import tempfile import time import urllib2 import traceback import xml.sax import thread import rdflib from invenio.legacy.bibclassify import config as bconfig from invenio.modules.classifier.errors import TaxonomyError log = bconfig.get_logger("bibclassify.ontology_reader") from invenio import config from invenio.modules.classifier.registry import taxonomies # only if not running in a stanalone mode if bconfig.STANDALONE: dbquery = None from urllib2 import urlopen else: from invenio.legacy import dbquery from invenio.utils.url import make_invenio_opener urlopen = make_invenio_opener('BibClassify').open _contains_digit = re.compile("\d")
import os import six from cgi import escape from invenio.base.i18n import gettext_set_language from invenio.legacy.bibdocfile.api import BibRecDocs from invenio.legacy.search_engine import get_record from invenio.legacy.template import load from invenio.ext.legacy.handler import wash_urlargd import invenio.modules.access.engine as acce from invenio.legacy.bibsched import bibtask from invenio.legacy.bibupload.engine import open_marc_file, xml_marc_to_records from invenio.legacy import bibrecord, dbquery from invenio.legacy.bibclassify.engine import get_tmp_file, build_marc, _parse_marc_code from invenio.legacy.bibclassify import config as bconfig, ontology_reader as bor log = bconfig.get_logger("bibclassify.webinterface") template = load("bibclassify") def main_page(req, recid, tabs, ln, template): """Generate the main page for the keyword tab Url style : http://url/record/[recid]/keywords :param req: request object :param recid: int docid :param tabs: list of tab links :param ln: language id :param template: template object :return: nothing, writes using req object """
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. from __future__ import print_function """Module for running microtests on how well the extraction works - this module is STANDALONE safe""" import ConfigParser import glob import traceback import codecs from invenio.legacy.bibclassify import config as bconfig from invenio.legacy.bibclassify import engine as engine log = bconfig.get_logger("bibclassify.microtest") def run(glob_patterns, verbose=20, plevel=1 ): """Execute microtests""" if verbose is not None: log.setLevel(int(verbose)) results = {} for pattern in glob_patterns: log.info("Looking for microtests: %s" % pattern) for cfgfile in glob.glob(pattern):
""" import os import re import tempfile import urllib2 from invenio.legacy.bibclassify import config as bconfig if bconfig.STANDALONE: from urllib2 import urlopen else: from invenio.utils.url import make_invenio_opener urlopen = make_invenio_opener('BibClassify').open log = bconfig.get_logger("bibclassify.text_extractor") _ONE_WORD = re.compile("[A-Za-z]{2,}") def is_pdf(document): """Checks if a document is a PDF file. Returns True if is is.""" if not executable_exists('pdftotext'): log.warning("GNU file was not found on the system. " "Switching to a weak file extension test.") if document.lower().endswith(".pdf"): return True return False # Tested with file version >= 4.10. First test is secure and works # with file version 4.25. Second condition is tested for file # version 4.10.