def __init__(self, package_name=None, path=None,
              run_localy=False, CONSYN=False):
     self.CONSYN = CONSYN
     try:
         self.logger = create_logger("Elsevier")
     except IOError:  # Could not access log file
                      # Use std.out for logging
         self.logger = self
         self.info = print
         self.warning = print
         self.error = print
         self.debug = print
     if self.CONSYN:
         self._build_journal_mappings()
     else:
         self.package_name = package_name
         self.path = path
         self.found_articles = []
         self._found_issues = []
         if run_localy:
             from invenio.contrast_out import ContrastOutConnector
             self.conn = ContrastOutConnector(self.logger)
             self.conn.run(run_localy)
         else:
             if not path and package_name:
                 self.logger.info("Got package: %s" % (package_name,))
                 self._extract_package()
             elif not path and not package_name:
                 from invenio.contrast_out import ContrastOutConnector
                 self.conn = ContrastOutConnector(self.logger)
                 self.conn.run()
         self._crawl_elsevier_and_find_main_xml()
         self._crawl_elsevier_and_find_issue_xml()
         self._build_doi_mapping()
 def __init__(self, package_name=None, path=None,
              run_localy=False, CONSYN=False):
     self.CONSYN = CONSYN
     self.package_name = package_name
     self.path = path
     self.found_articles = []
     self._found_issues = []
     self.logger = create_logger("Elsevier")
     if run_localy:
         from invenio.contrast_out import ContrastOutConnector
         self.conn = ContrastOutConnector(self.logger)
         self.conn.run(run_localy)
     else:
         if not path and package_name:
             self.logger.info("Got package: %s" % (package_name,))
             self._extract_package()
         elif not path and not package_name:
             from invenio.contrast_out import ContrastOutConnector
             self.conn = ContrastOutConnector(self.logger)
             self.conn.run()
     if CONSYN:
         self._build_journal_mappings()
     else:
         self._crawl_elsevier_and_find_main_xml()
         self._crawl_elsevier_and_find_issue_xml()
         self._build_doi_mapping()
class ElsevierPackage(object):
    """
    This class is specialized in parsing an Elsevier package
    and creating a SCOAP3-compatible bibupload containing the original
    PDF, XML, and every possible metadata filled in.

    @param package_name: the path to a tar.gz file to expand and parse
    @param path: the actual path of an already expanded package.

    @note: either C{package_name} or C{path} don't have to be passed to the
    constructor, in this case the Elsevier server will be harvested.
    """
    def __init__(self, package_name=None, path=None, run_localy=False):
        self.package_name = package_name
        self.path = path
        self.found_articles = []
        self._found_issues = []
        self.logger = create_logger("Elsevier")

        if run_localy:
            from invenio.contrast_out import ContrastOutConnector
            self.conn = ContrastOutConnector(self.logger)
            self.conn.run(run_localy)
        else:
            if not path and package_name:
                self.logger.info("Got package: %s" % (package_name,))
                self._extract_package()
            elif not path and not package_name:
                print "Starting harves"
                from invenio.contrast_out import ContrastOutConnector
                self.conn = ContrastOutConnector(self.logger)
                self.conn.run()
        self._crawl_elsevier_and_find_main_xml()
        self._crawl_elsevier_and_find_issue_xml()
        self._build_doi_mapping()

    def _extract_package(self):
        """
        Extract a package in a new temporary directory.
        """
        self.path = mkdtemp(prefix="scoap3_package_", dir=CFG_TMPSHAREDDIR)
        self.logger.debug("Extracting package: %s" % (self.package_name,))
        try:
            if ".tar" in self.package_name:
                TarFile.open(self.package_name).extractall(self.path)
            elif ".zip" in self.package_name:
                ZipFile(self.package_name).extractall(self.path)
            else:
                raise FileTypeError("It's not a TAR or ZIP archive.")
        except Exception, err:
            register_exception(alert_admin=True, prefix="Elsevier error extracting package.")
            self.logger.error("Error extraction package file: %s %s" % (self.path, err))
            print >> sys.stdout, "\nError extracting package file: %s %s" % (self.path, err)
Example #4
0
    def __init__(self, package_name=None, path=None):
        self.package_name = package_name
        self.path = path
        self.found_articles = []
        self._found_issues = []
        self.logger = create_logger("Elsevier")

        if not path and package_name:
            self.logger.info("Got package: %s" % (package_name,))
            self._extract_package()
        elif not path and not package_name:
            print "Starting harves"
            from invenio.contrast_out import ContrastOutConnector
            self.conn = ContrastOutConnector(self.logger)
            self.conn.run()
        self._crawl_elsevier_and_find_main_xml()
        self._crawl_elsevier_and_find_issue_xml()
        self._build_doi_mapping()
class ElsevierPackage(object):

    """
    This class is specialized in parsing an Elsevier package
    and creating a SCOAP3-compatible bibupload containing the original
    PDF, XML, and every possible metadata filled in.

    @param package_name: the path to a tar.gz file to expand and parse
    @param path: the actual path of an already expanded package.

    @note: either C{package_name} or C{path} don't have to be passed to the
    constructor, in this case the Elsevier server will be harvested.
    """

    def __init__(self, package_name=None, path=None,
                 run_localy=False, CONSYN=False):
        self.CONSYN = CONSYN
        self.package_name = package_name
        self.path = path
        self.found_articles = []
        self._found_issues = []
        self.logger = create_logger("Elsevier")
        if run_localy:
            from invenio.contrast_out import ContrastOutConnector
            self.conn = ContrastOutConnector(self.logger)
            self.conn.run(run_localy)
        else:
            if not path and package_name:
                self.logger.info("Got package: %s" % (package_name,))
                self._extract_package()
            elif not path and not package_name:
                from invenio.contrast_out import ContrastOutConnector
                self.conn = ContrastOutConnector(self.logger)
                self.conn.run()
        if CONSYN:
            self._build_journal_mappings()
        else:
            self._crawl_elsevier_and_find_main_xml()
            self._crawl_elsevier_and_find_issue_xml()
            self._build_doi_mapping()

    def _fix_journal_name(self, journal):
        """ Converts journal name to Inspire's short form """
        if not journal:
            return '', ''
        volume = ''
        if (journal[-1] <= 'Z' and journal[-1] >= 'A') \
                and (journal[-2] == '.' or journal[-2] == ' '):
            volume += journal[-1]
            journal = journal[:-1]
            journal = journal.strip()
            try:
                journal = self.journal_mappings[journal.upper()].strip()
            except KeyError:
                try:
                    journal = self.journal_mappings[journal].strip()
                except KeyError:
                    pass
        journal = journal.replace('. ', '.')
        return journal, volume

    def _build_journal_mappings(self):
        try:
            self.journal_mappings = get_kbs()['journals'][1]
        except KeyError:
            self.journal_mappings = {}
            return

    def _extract_package(self):
        """
        Extract a package in a new temporary directory.
        """
        self.path = mkdtemp(prefix="scoap3_package_", dir=CFG_TMPSHAREDDIR)
        self.logger.debug("Extracting package: %s" % (self.package_name,))
        try:
            if ".tar" in self.package_name:
                TarFile.open(self.package_name).extractall(self.path)
            elif ".zip" in self.package_name:
                ZipFile(self.package_name).extractall(self.path)
            else:
                raise FileTypeError("It's not a TAR or ZIP archive.")
        except Exception, err:
            register_exception(alert_admin=True,
                               prefix="Elsevier error extracting package.")
            self.logger.error("Error extraction package file: %s %s"
                              % (self.path, err))
            print("\nError extracting package file: %s %s" % (self.path, err))