Python harvest_single Examples

Programming Language: Python

Namespace/Package Name: invenio.plotextractor_getter

Method/Function: harvest_single

Examples at hotexamples.com: 4

Python harvest_single - 4 examples found. These are the top rated real world Python examples of invenio.plotextractor_getter.harvest_single extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: oai_harvest_daemon.py Project: pombredanne/invenio

def plotextractor_harvest(identifier, active_file):
    """
    Function that calls plotextractor library to download and extract tarball
    and fulltext pdf for each record.

    @param identifier: OAI identifier of the record to harvest
    @param active_file: path to the currently processed file

    @return: exitcode, errormessages and paths to generated MARCXML for plots and fulltext as a tuple
             (exitcode, err_msg, fulltext_xml, plotextracted_xml)
    """
    err_msg = ""
    exitcode = 0
    plotextracted_xml = None
    fulltext_xml = None
    active_dir, active_name = os.path.split(active_file)
    extract_path = make_single_directory(active_dir, active_name + \
                                          "_plotextraction")
    tarball, pdf = harvest_single(identifier, extract_path)
    if tarball != None:
        plotextracted_xml_path = process_single(tarball, clean = True)
        if plotextracted_xml_path != None:
            plotsxml_fd = open(plotextracted_xml_path, 'r')
            plotextracted_xml = plotsxml_fd.read()
            plotsxml_fd.close()
        else:
            err_msg += "Error extracting plots from id: %s %s\n" % \
                     (identifier, tarball)
            exitcode = 1
    else:
        err_msg += "Error harvesting plots from id: %s %s\n" % \
                     (identifier, extract_path)
        exitcode = 1

    if pdf != None:
        fulltext_xml = '<datafield tag="FFT" ind1=" " ind2=" ">' + \
                   '<subfield code="a">' + pdf + '</subfield>' + \
                   '<subfield code="t"></subfield>' + \
                   '</datafield>'
    else:
        err_msg += "Error harvesting fulltext from id: %s %s\n" % \
                     (identifier, extract_path)
        exitcode = 1
    return exitcode, err_msg, fulltext_xml, plotextracted_xml

Example #2

Show file

File: oai_harvest_daemon.py Project: epfl-si/invenio-infoscience

def plotextractor_harvest(identifier, active_file):
    """
    Function that calls plotextractor library to download and extract tarball
    and fulltext pdf for each record.

    @param identifier: OAI identifier of the record to harvest
    @param active_file: path to the currently processed file

    @return: exitcode, errormessages and paths to generated MARCXML for plots and fulltext as a tuple
             (exitcode, err_msg, fulltext_xml, plotextracted_xml)
    """
    err_msg = ""
    exitcode = 0
    plotextracted_xml = None
    fulltext_xml = None
    active_dir, active_name = os.path.split(active_file)
    extract_path = make_single_directory(active_dir, active_name + \
                                          "_plotextraction")
    tarball, pdf = harvest_single(identifier, extract_path)
    if tarball != None:
        plotextracted_xml_path = process_single(tarball, clean=True)
        if plotextracted_xml_path != None:
            plotsxml_fd = open(plotextracted_xml_path, 'r')
            plotextracted_xml = plotsxml_fd.read()
            plotsxml_fd.close()
        else:
            err_msg += "Error extracting plots from id: %s %s\n" % \
                     (identifier, tarball)
            exitcode = 1
    else:
        err_msg += "Error harvesting plots from id: %s %s\n" % \
                     (identifier, extract_path)
        exitcode = 1

    if pdf != None:
        fulltext_xml = '<datafield tag="FFT" ind1=" " ind2=" ">' + \
                   '<subfield code="a">' + pdf + '</subfield>' + \
                   '<subfield code="t"></subfield>' + \
                   '</datafield>'
    else:
        err_msg += "Error harvesting fulltext from id: %s %s\n" % \
                     (identifier, extract_path)
        exitcode = 1
    return exitcode, err_msg, fulltext_xml, plotextracted_xml

Example #3

Show file

File: plotextractor_regression_tests.py Project: aw-bib/tind-invenio

 def test_harvest_single(self):
     """plotextractor - check harvest_single"""
     tarball, pdf = harvest_single('arXiv:1204.6260', '/tmp', ('pdf', 'tarball'))
     self.assertTrue(pdf is not None, "PDF is of unknown type")
     self.assertTrue(tarball is not None, "Tarball is of unknown type")

Example #4

Show file

 def test_harvest_single(self):
     """plotextractor - check harvest_single"""
     tarball, pdf = harvest_single('arXiv:1204.6260', '/tmp',
                                   ('pdf', 'tarball'))
     self.assertTrue(pdf is not None, "PDF is of unknown type")
     self.assertTrue(tarball is not None, "Tarball is of unknown type")