def __init__(self, package_name=None, path=None, run_localy=False, CONSYN=False): self.CONSYN = CONSYN try: self.logger = create_logger("Elsevier") except IOError: # Could not access log file # Use std.out for logging self.logger = self self.info = print self.warning = print self.error = print self.debug = print if self.CONSYN: self._build_journal_mappings() else: self.package_name = package_name self.path = path self.found_articles = [] self._found_issues = [] if run_localy: from invenio.contrast_out import ContrastOutConnector self.conn = ContrastOutConnector(self.logger) self.conn.run(run_localy) else: if not path and package_name: self.logger.info("Got package: %s" % (package_name,)) self._extract_package() elif not path and not package_name: from invenio.contrast_out import ContrastOutConnector self.conn = ContrastOutConnector(self.logger) self.conn.run() self._crawl_elsevier_and_find_main_xml() self._crawl_elsevier_and_find_issue_xml() self._build_doi_mapping()
def __init__(self, package_name=None, path=None, run_localy=False, CONSYN=False): self.CONSYN = CONSYN self.package_name = package_name self.path = path self.found_articles = [] self._found_issues = [] self.logger = create_logger("Elsevier") if run_localy: from invenio.contrast_out import ContrastOutConnector self.conn = ContrastOutConnector(self.logger) self.conn.run(run_localy) else: if not path and package_name: self.logger.info("Got package: %s" % (package_name,)) self._extract_package() elif not path and not package_name: from invenio.contrast_out import ContrastOutConnector self.conn = ContrastOutConnector(self.logger) self.conn.run() if CONSYN: self._build_journal_mappings() else: self._crawl_elsevier_and_find_main_xml() self._crawl_elsevier_and_find_issue_xml() self._build_doi_mapping()
class ElsevierPackage(object): """ This class is specialized in parsing an Elsevier package and creating a SCOAP3-compatible bibupload containing the original PDF, XML, and every possible metadata filled in. @param package_name: the path to a tar.gz file to expand and parse @param path: the actual path of an already expanded package. @note: either C{package_name} or C{path} don't have to be passed to the constructor, in this case the Elsevier server will be harvested. """ def __init__(self, package_name=None, path=None, run_localy=False): self.package_name = package_name self.path = path self.found_articles = [] self._found_issues = [] self.logger = create_logger("Elsevier") if run_localy: from invenio.contrast_out import ContrastOutConnector self.conn = ContrastOutConnector(self.logger) self.conn.run(run_localy) else: if not path and package_name: self.logger.info("Got package: %s" % (package_name,)) self._extract_package() elif not path and not package_name: print "Starting harves" from invenio.contrast_out import ContrastOutConnector self.conn = ContrastOutConnector(self.logger) self.conn.run() self._crawl_elsevier_and_find_main_xml() self._crawl_elsevier_and_find_issue_xml() self._build_doi_mapping() def _extract_package(self): """ Extract a package in a new temporary directory. """ self.path = mkdtemp(prefix="scoap3_package_", dir=CFG_TMPSHAREDDIR) self.logger.debug("Extracting package: %s" % (self.package_name,)) try: if ".tar" in self.package_name: TarFile.open(self.package_name).extractall(self.path) elif ".zip" in self.package_name: ZipFile(self.package_name).extractall(self.path) else: raise FileTypeError("It's not a TAR or ZIP archive.") except Exception, err: register_exception(alert_admin=True, prefix="Elsevier error extracting package.") self.logger.error("Error extraction package file: %s %s" % (self.path, err)) print >> sys.stdout, "\nError extracting package file: %s %s" % (self.path, err)
def __init__(self, package_name=None, path=None): self.package_name = package_name self.path = path self.found_articles = [] self._found_issues = [] self.logger = create_logger("Elsevier") if not path and package_name: self.logger.info("Got package: %s" % (package_name,)) self._extract_package() elif not path and not package_name: print "Starting harves" from invenio.contrast_out import ContrastOutConnector self.conn = ContrastOutConnector(self.logger) self.conn.run() self._crawl_elsevier_and_find_main_xml() self._crawl_elsevier_and_find_issue_xml() self._build_doi_mapping()
class ElsevierPackage(object): """ This class is specialized in parsing an Elsevier package and creating a SCOAP3-compatible bibupload containing the original PDF, XML, and every possible metadata filled in. @param package_name: the path to a tar.gz file to expand and parse @param path: the actual path of an already expanded package. @note: either C{package_name} or C{path} don't have to be passed to the constructor, in this case the Elsevier server will be harvested. """ def __init__(self, package_name=None, path=None, run_localy=False, CONSYN=False): self.CONSYN = CONSYN self.package_name = package_name self.path = path self.found_articles = [] self._found_issues = [] self.logger = create_logger("Elsevier") if run_localy: from invenio.contrast_out import ContrastOutConnector self.conn = ContrastOutConnector(self.logger) self.conn.run(run_localy) else: if not path and package_name: self.logger.info("Got package: %s" % (package_name,)) self._extract_package() elif not path and not package_name: from invenio.contrast_out import ContrastOutConnector self.conn = ContrastOutConnector(self.logger) self.conn.run() if CONSYN: self._build_journal_mappings() else: self._crawl_elsevier_and_find_main_xml() self._crawl_elsevier_and_find_issue_xml() self._build_doi_mapping() def _fix_journal_name(self, journal): """ Converts journal name to Inspire's short form """ if not journal: return '', '' volume = '' if (journal[-1] <= 'Z' and journal[-1] >= 'A') \ and (journal[-2] == '.' or journal[-2] == ' '): volume += journal[-1] journal = journal[:-1] journal = journal.strip() try: journal = self.journal_mappings[journal.upper()].strip() except KeyError: try: journal = self.journal_mappings[journal].strip() except KeyError: pass journal = journal.replace('. ', '.') return journal, volume def _build_journal_mappings(self): try: self.journal_mappings = get_kbs()['journals'][1] except KeyError: self.journal_mappings = {} return def _extract_package(self): """ Extract a package in a new temporary directory. """ self.path = mkdtemp(prefix="scoap3_package_", dir=CFG_TMPSHAREDDIR) self.logger.debug("Extracting package: %s" % (self.package_name,)) try: if ".tar" in self.package_name: TarFile.open(self.package_name).extractall(self.path) elif ".zip" in self.package_name: ZipFile(self.package_name).extractall(self.path) else: raise FileTypeError("It's not a TAR or ZIP archive.") except Exception, err: register_exception(alert_admin=True, prefix="Elsevier error extracting package.") self.logger.error("Error extraction package file: %s %s" % (self.path, err)) print("\nError extracting package file: %s %s" % (self.path, err))