def generatearxiv(skip_dependencies=True): """Generate arxiv data for deepfigures. Generate the arxiv data for deepfigures, which involves pulling the data from S3 (which the requestor has to pay for). """ if not skip_dependencies: build.build.callback() cpu_docker_img = settings.DEEPFIGURES_IMAGES['cpu'] execute('docker run' ' --rm' ' --env-file deepfigures-local.env' ' --volume {ARXIV_DATA_TMP_DIR}:{ARXIV_DATA_TMP_DIR}' ' --volume {ARXIV_DATA_OUTPUT_DIR}:{ARXIV_DATA_OUTPUT_DIR}' ' {tag}:{version}' ' python3' ' /work/deepfigures/data_generation/arxiv_pipeline.py'.format( tag=cpu_docker_img['tag'], version=cpu_docker_img['version_prefix'] + settings.VERSION, ARXIV_DATA_TMP_DIR=settings.ARXIV_DATA_TMP_DIR, ARXIV_DATA_OUTPUT_DIR=settings.ARXIV_DATA_OUTPUT_DIR), logger, raise_error=True)
def generatepubmed(skip_dependencies=True): """Generate pubmed data for deepfigures. Generate the pubmed data for deepfigures, which can involve pulling the data from S3 (which the requestor has to pay for). """ if not skip_dependencies: build.build.callback() cpu_docker_img = settings.DEEPFIGURES_IMAGES['cpu'] execute( 'docker run' ' --rm' ' --env-file deepfigures-local.env' ' --volume {LOCAL_PUBMED_DISTANT_DATA_DIR}:{LOCAL_PUBMED_DISTANT_DATA_DIR}' ' {tag}:{version}' ' python3' ' /work/deepfigures/data_generation/pubmed_pipeline.py'.format( tag=cpu_docker_img['tag'], version=settings.VERSION, LOCAL_PUBMED_DISTANT_DATA_DIR=settings. LOCAL_PUBMED_DISTANT_DATA_DIR), logger, raise_error=True)
def runtests(): """Run tests for deepfigures.""" # init logging logger.setLevel(logging.INFO) logging.basicConfig() logger.info('Running tests for deepfigures.') execute('pytest -n auto /work/deepfigures', logger)
def build(): """Build docker images for deepfigures.""" for _, docker_img in settings.DEEPFIGURES_IMAGES.items(): tag = docker_img['tag'] dockerfile_path = docker_img['dockerfile_path'] execute( 'docker build' ' --tag {tag}:{version}' ' --file {dockerfile_path} .'.format( tag=tag, version=settings.VERSION, dockerfile_path=dockerfile_path), logger)
def testunits(skip_dependencies=False): """Run unit tests for deepfigures.""" if not skip_dependencies: build.build.callback() cpu_docker_img = settings.DEEPFIGURES_IMAGES['cpu'] execute('docker run' ' --rm' ' --env-file deepfigures-local.env' ' {tag}:{version}' ' python3 /work/scripts/runtests.py'.format( tag=cpu_docker_img['tag'], version=settings.VERSION), logger, raise_error=True)
def build(): """Build docker images for deepfigures.""" for _, docker_img in settings.DEEPFIGURES_IMAGES.items(): tag = docker_img['tag'] dockerfile_path = docker_img['dockerfile_path'] version = docker_img['version_prefix'] + settings.VERSION execute( 'docker build' ' --tag {tag}:{version}' ' --cache-from {tag}:{version}' ' --build-arg BUILDKIT_INLINE_CACHE=1' ' --file {dockerfile_path} .'.format( tag=tag, version=version, dockerfile_path=dockerfile_path), logger)
def detectfigures( output_directory, pdf_path, skip_dependencies=False): """Run figure extraction on the PDF at PDF_PATH. Run figure extraction on the PDF at PDF_PATH and write the results to OUTPUT_DIRECTORY. """ if not skip_dependencies: build.build.callback() cpu_docker_img = settings.DEEPFIGURES_IMAGES['cpu'] pdf_directory, pdf_name = os.path.split(pdf_path) internal_output_directory = '/work/host-output' internal_pdf_directory = '/work/host-input' internal_pdf_path = os.path.join( internal_pdf_directory, pdf_name) execute( 'docker run' ' --rm' ' --env-file deepfigures-local.env' ' --volume {output_directory}:{internal_output_directory}' ' --volume {pdf_directory}:{internal_pdf_directory}' ' {tag}:{version}' ' python3 /work/scripts/rundetection.py' ' {internal_output_directory}' ' {internal_pdf_path}'.format( tag=cpu_docker_img['tag'], version=settings.VERSION, output_directory=output_directory, internal_output_directory=internal_output_directory, pdf_directory=pdf_directory, internal_pdf_directory=internal_pdf_directory, internal_pdf_path=internal_pdf_path), logger, raise_error=True)
def train(output_directory, input_directory, hypes='/work/weights/hypes.json', skip_dependencies=False): """ Traing the deepfigures model. :param output_directory: The directory on your host where you want to store the output. (weights, etc.) :param input_directory: The directory which contains the data for training, hypes, etc. :param hypes: The JSO file which contains the hyper-parameters for training. :param skip_dependencies: Set this to True if you do not want to pre-check if the dependencies before running. :return: Nothing. """ if not skip_dependencies: build.build.callback() cpu_docker_img = settings.DEEPFIGURES_IMAGES['gpu'] docker_output_directory = '/work/host-output' docker_input_directory = '/work/host-input' execute('docker run' ' --gpus all' ' --rm' ' --env-file deepfigures-local.env' ' --volume {host_input_path}:{docker_input_path}' ' --volume {host_output_path}:{docker_output_path}' ' {tag}:{version}' ' python train.py' ' --hypes {hypes_path}' ' --gpu 0' ' --logdir {docker_output_path}'.format( host_input_path=input_directory, docker_input_path=docker_input_directory, host_output_path=output_directory, docker_output_path=docker_output_directory, tag=cpu_docker_img['tag'], version=cpu_docker_img['version_prefix'] + settings.VERSION, hypes_path=hypes), logger, raise_error=True)
#!/usr/bin/env python # -*- encoding: utf-8 -*- # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # @file: bootstrap.py # Copyright (c) 2013 Korepwx. All rights reserved. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Contributors: # Korepwx <*****@*****.**> 2013-08-22 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Use of this source code is governed by BSD license that can be found in the # LICENSE file. import sys import os sys.path.insert(0, os.path.abspath(os.path.dirname(__file__))) import core import scripts if len(sys.argv) > 1: scripts.execute(sys.argv[1]) else: scripts.execute_all()