Ejemplo n.º 1
0
def task_extract(message):
    """
    Extracts the full text from the given location and pushes to the writing
    queue.
    """
    logger.debug('Extract content: %s', message)
    if not isinstance(message, list):
        message = [message]

    results = extraction.extract_content(message, extract_pdf_script=app.conf['EXTRACT_PDF_SCRIPT'])
    logger.debug('Results: %s', results)
    for r in results:
        logger.debug("Calling 'write_content' with '%s'", str(r))
        # Write locally to filesystem
        writer.write_content(r)

        # Send results to master
        msg = {
                'bibcode': r['bibcode'],
                'body': r['fulltext'],
                }
        for x in ('acknowledgements', 'dataset'):
            if x in r and r[x]:
                msg[x] = r[x]

        logger.debug("Calling 'task_output_results' with '%s'", msg)
        task_output_results.delay(msg)

        # Send results to master only if fulltext is not an empty string
        if r['fulltext'] != "":
            logger.debug("Calling 'task_output_results' with '%s'", msg)
            task_output_results.delay(msg)
Ejemplo n.º 2
0
def task_extract(message):
    """
    Extracts the full text from the given location and pushes to the writing
    queue.
    """
    logger.debug('Extract content: %s', message)
    if not isinstance(message, list):
        message = [message]

    results = extraction.extract_content(
        message, extract_pdf_script=app.conf['EXTRACT_PDF_SCRIPT'])
    logger.debug('Results: %s', results)
    for r in results:
        logger.debug("Calling 'write_content' with '%s'", str(r))
        # Write locally to filesystem
        writer.write_content(r)

        # Send results to master
        msg = {
            'bibcode': r['bibcode'],
            'body': r['fulltext'],
        }
        for x in ('acknowledgements', 'dataset'):
            if x in r and r[x]:
                msg[x] = r[x]

        logger.debug("Calling 'task_output_results' with '%s'", msg)
        task_output_results.delay(msg)

        # Send results to master only if fulltext is not an empty string
        if r['fulltext'] != "":
            logger.debug("Calling 'task_output_results' with '%s'", msg)
            task_output_results.delay(msg)
Ejemplo n.º 3
0
def task_extract(message):
    """
    Extracts the full text from the given location and pushes to the writing
    queue.
    """
    logger.debug('Extract content: %s', message)
    if not isinstance(message, list):
        message = [message]

    results = extraction.extract_content(message, extract_pdf_script=app.conf['EXTRACT_PDF_SCRIPT'])
    logger.debug('Results: %s', results)
    for r in results:

        logger.debug("Calling 'write_content' with '%s'", str(r))
        # Write locally to filesystem
        writer.write_content(r)

        # Send results to master
        msg = {
                'bibcode': r['bibcode'],
                'body': r['fulltext'],
                }
        for x in ('acknowledgements', 'dataset', 'facility'):
            if x in r and r[x]:
                msg[x] = r[x]

        # Call task without checking if fulltext is empty
        # to ensure other components (acks, etc) are output/sent to master
        logger.debug("Calling 'task_output_results' with '%s'", msg)
        logger.info("Calling task_output_results...")
        task_output_results.delay(msg)

    if app.conf['RUN_NER_FACILITIES_AFTER_EXTRACTION']:
        # perform named-entity recognition
        task_identify_facilities.delay(message)
Ejemplo n.º 4
0
    def task_extract_grobid(message):
        """
        Extracts the structured full text from the given location
        """
        logger.debug('Extract grobid content: %s', message)
        if not isinstance(message, list):
            message = [message]

        # Mofiy file format to force the use of GrobidPDFExtractor
        for msg in message:
            msg['file_format'] += "-grobid"

        results = extraction.extract_content(message, grobid_service=app.conf['GROBID_SERVICE'])
        logger.debug('Grobid results: %s', results)
        for r in results:
            logger.debug("Calling 'write_content' with '%s'", str(r))
            # Write locally to filesystem
            writer.write_content(r)
Ejemplo n.º 5
0
    def task_extract_grobid(message):
        """
        Extracts the structured full text from the given location
        """
        logger.debug('Extract grobid content: %s', message)
        if not isinstance(message, list):
            message = [message]

        # Mofiy file format to force the use of GrobidPDFExtractor
        for msg in message:
            msg['file_format'] += "-grobid"

        results = extraction.extract_content(message, grobid_service=app.conf['GROBID_SERVICE'])
        logger.debug('Grobid results: %s', results)
        for r in results:
            logger.debug("Calling 'write_content' with '%s'", str(r))
            # Write locally to filesystem
            writer.write_content(r)
Ejemplo n.º 6
0
    def test_loads_the_content_correctly_and_makes_full_text_file(self):
        """
        Tests the write_content method. Checks that the full text file is
        created and saved to disk.

        :return: no return
        """

        content = writer.write_content(self.dict_item)

        self.assertTrue(os.path.exists(self.full_text_file),
                        msg=os.path.exists(self.full_text_file))
Ejemplo n.º 7
0
    def test_loads_the_content_correctly_and_makes_folders(self):
        """
        Tests the write_content method. Checks that the folder to contain the
        full text and meta data is created.

        :return: no return
        """

        content = writer.write_content(self.dict_item)

        self.assertTrue(os.path.exists(self.bibcode_pair_tree),
                        msg=os.path.exists(self.bibcode_pair_tree))
Ejemplo n.º 8
0
    def test_loads_the_content_correctly_and_makes_full_text_file(self):
        """
        Tests the write_content method. Checks that the full text file is
        created and saved to disk.

        :return: no return
        """

        content = writer.write_content(self.dict_item)

        self.assertTrue(os.path.exists(self.full_text_file),
                        msg=os.path.exists(self.full_text_file))
Ejemplo n.º 9
0
    def test_loads_the_content_correctly_and_makes_folders(self):
        """
        Tests the write_content method. Checks that the folder to contain the
        full text and meta data is created.

        :return: no return
        """

        content = writer.write_content(self.dict_item)

        self.assertTrue(os.path.exists(self.bibcode_pair_tree),
                        msg=os.path.exists(self.bibcode_pair_tree))