Esempio n. 1
0
def generatearxiv(skip_dependencies=True):
    """Generate arxiv data for deepfigures.

    Generate the arxiv data for deepfigures, which involves pulling the
    data from S3 (which the requestor has to pay for).
    """
    if not skip_dependencies:
        build.build.callback()

    cpu_docker_img = settings.DEEPFIGURES_IMAGES['cpu']

    execute('docker run'
            ' --rm'
            ' --env-file deepfigures-local.env'
            ' --volume {ARXIV_DATA_TMP_DIR}:{ARXIV_DATA_TMP_DIR}'
            ' --volume {ARXIV_DATA_OUTPUT_DIR}:{ARXIV_DATA_OUTPUT_DIR}'
            ' {tag}:{version}'
            ' python3'
            ' /work/deepfigures/data_generation/arxiv_pipeline.py'.format(
                tag=cpu_docker_img['tag'],
                version=cpu_docker_img['version_prefix'] + settings.VERSION,
                ARXIV_DATA_TMP_DIR=settings.ARXIV_DATA_TMP_DIR,
                ARXIV_DATA_OUTPUT_DIR=settings.ARXIV_DATA_OUTPUT_DIR),
            logger,
            raise_error=True)
Esempio n. 2
0
def generatepubmed(skip_dependencies=True):
    """Generate pubmed data for deepfigures.

    Generate the pubmed data for deepfigures, which can involve pulling
    the data from S3 (which the requestor has to pay for).
    """
    if not skip_dependencies:
        build.build.callback()

    cpu_docker_img = settings.DEEPFIGURES_IMAGES['cpu']

    execute(
        'docker run'
        ' --rm'
        ' --env-file deepfigures-local.env'
        ' --volume {LOCAL_PUBMED_DISTANT_DATA_DIR}:{LOCAL_PUBMED_DISTANT_DATA_DIR}'
        ' {tag}:{version}'
        ' python3'
        ' /work/deepfigures/data_generation/pubmed_pipeline.py'.format(
            tag=cpu_docker_img['tag'],
            version=settings.VERSION,
            LOCAL_PUBMED_DISTANT_DATA_DIR=settings.
            LOCAL_PUBMED_DISTANT_DATA_DIR),
        logger,
        raise_error=True)
Esempio n. 3
0
def runtests():
    """Run tests for deepfigures."""

    # init logging
    logger.setLevel(logging.INFO)
    logging.basicConfig()

    logger.info('Running tests for deepfigures.')
    execute('pytest -n auto /work/deepfigures', logger)
Esempio n. 4
0
def build():
    """Build docker images for deepfigures."""
    for _, docker_img in settings.DEEPFIGURES_IMAGES.items():
        tag = docker_img['tag']
        dockerfile_path = docker_img['dockerfile_path']

        execute(
            'docker build'
            ' --tag {tag}:{version}'
            ' --file {dockerfile_path} .'.format(
                tag=tag,
                version=settings.VERSION,
                dockerfile_path=dockerfile_path), logger)
Esempio n. 5
0
def testunits(skip_dependencies=False):
    """Run unit tests for deepfigures."""
    if not skip_dependencies:
        build.build.callback()

    cpu_docker_img = settings.DEEPFIGURES_IMAGES['cpu']

    execute('docker run'
            ' --rm'
            ' --env-file deepfigures-local.env'
            ' {tag}:{version}'
            ' python3 /work/scripts/runtests.py'.format(
                tag=cpu_docker_img['tag'], version=settings.VERSION),
            logger,
            raise_error=True)
Esempio n. 6
0
def build():
    """Build docker images for deepfigures."""
    for _, docker_img in settings.DEEPFIGURES_IMAGES.items():
        tag = docker_img['tag']
        dockerfile_path = docker_img['dockerfile_path']
        version = docker_img['version_prefix'] + settings.VERSION

        execute(
            'docker build'
            ' --tag {tag}:{version}'
            ' --cache-from {tag}:{version}'
            ' --build-arg BUILDKIT_INLINE_CACHE=1'
            ' --file {dockerfile_path} .'.format(
                tag=tag, version=version, dockerfile_path=dockerfile_path),
            logger)
Esempio n. 7
0
def detectfigures(
        output_directory,
        pdf_path,
        skip_dependencies=False):
    """Run figure extraction on the PDF at PDF_PATH.

    Run figure extraction on the PDF at PDF_PATH and write the results
    to OUTPUT_DIRECTORY.
    """
    if not skip_dependencies:
        build.build.callback()

    cpu_docker_img = settings.DEEPFIGURES_IMAGES['cpu']

    pdf_directory, pdf_name = os.path.split(pdf_path)

    internal_output_directory = '/work/host-output'
    internal_pdf_directory = '/work/host-input'

    internal_pdf_path = os.path.join(
        internal_pdf_directory, pdf_name)

    execute(
        'docker run'
        ' --rm'
        ' --env-file deepfigures-local.env'
        ' --volume {output_directory}:{internal_output_directory}'
        ' --volume {pdf_directory}:{internal_pdf_directory}'
        ' {tag}:{version}'
        ' python3 /work/scripts/rundetection.py'
        '   {internal_output_directory}'
        '   {internal_pdf_path}'.format(
            tag=cpu_docker_img['tag'],
            version=settings.VERSION,
            output_directory=output_directory,
            internal_output_directory=internal_output_directory,
            pdf_directory=pdf_directory,
            internal_pdf_directory=internal_pdf_directory,
            internal_pdf_path=internal_pdf_path),
        logger,
        raise_error=True)
Esempio n. 8
0
def train(output_directory,
          input_directory,
          hypes='/work/weights/hypes.json',
          skip_dependencies=False):
    """ Traing the deepfigures model.
    :param output_directory: The directory on your host where you want to store the output. (weights, etc.)
    :param input_directory: The directory which contains the data for training, hypes, etc.
    :param hypes: The JSO file which contains the hyper-parameters for training.
    :param skip_dependencies: Set this to True if you do not want to pre-check if the dependencies before running.
    :return: Nothing.
    """

    if not skip_dependencies:
        build.build.callback()

    cpu_docker_img = settings.DEEPFIGURES_IMAGES['gpu']

    docker_output_directory = '/work/host-output'
    docker_input_directory = '/work/host-input'

    execute('docker run'
            ' --gpus all'
            ' --rm'
            ' --env-file deepfigures-local.env'
            ' --volume {host_input_path}:{docker_input_path}'
            ' --volume {host_output_path}:{docker_output_path}'
            ' {tag}:{version}'
            ' python train.py'
            ' --hypes {hypes_path}'
            ' --gpu 0'
            ' --logdir {docker_output_path}'.format(
                host_input_path=input_directory,
                docker_input_path=docker_input_directory,
                host_output_path=output_directory,
                docker_output_path=docker_output_directory,
                tag=cpu_docker_img['tag'],
                version=cpu_docker_img['version_prefix'] + settings.VERSION,
                hypes_path=hypes),
            logger,
            raise_error=True)
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# @file: bootstrap.py
# Copyright (c) 2013 Korepwx. All rights reserved.
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Contributors:
#   Korepwx  <*****@*****.**>  2013-08-22
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Use of this source code is governed by BSD license that can be found in the
# LICENSE file.

import sys
import os
sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))

import core
import scripts

if len(sys.argv) > 1:
    scripts.execute(sys.argv[1])
else:
    scripts.execute_all()