コード例 #1
0
from pbcommand.models import FileTypes
from pbcommand.cli import registry_builder, registry_runner

log = logging.getLogger(__name__)

NAMESPACE = "pbsmrtpipe_examples"

# the 'Driver' exe needs to be your your path. The first arg will be the path
# to the resolved tool contract.
#
# Note, When the tool contract is emitted, the 'run-rtc'
# will automatically be added to the driver.
#
# When this commandline tool is invoked, it will be of the form:
# hello_world_quick.py run-rtc /path/to/resolved-tool-contract.py
registry = registry_builder(NAMESPACE, "hello_world.py ")


def _example_main(input_file, output_file, **kwargs):
    """
    This func should be imported from your python package.

    This should have *no* dependency on the pbcommand IO, such as the RTC/TC models.
    """

    # This is just for test purposes
    log.info("Running example main with {i} {o} kw:{k}".format(i=input_file,
                                                               o=output_file,
                                                               k=kwargs))

    # write mock output files, otherwise the End-to-End test will fail when
コード例 #2
0
ファイル: converters.py プロジェクト: natechols/pbcoretools
import sys

from pbcore.io import (SubreadSet, HdfSubreadSet, FastaReader, FastaWriter,
                       FastqReader, FastqWriter, BarcodeSet, ExternalResource,
                       ExternalResources, openDataSet, ContigSet, ReferenceSet,
                       GmapReferenceSet)
from pbcommand.engine import run_cmd
from pbcommand.cli import registry_builder, registry_runner, QuickOpt
from pbcommand.models import FileTypes, SymbolTypes, OutputFileType

log = logging.getLogger(__name__)

TOOL_NAMESPACE = 'pbcoretools'
DRIVER_BASE = "python -m pbcoretools.tasks.converters "

registry = registry_builder(TOOL_NAMESPACE, DRIVER_BASE)

def _run_bax_to_bam(input_file_name, output_file_name):
    base_name = ".".join(output_file_name.split(".")[:-2])
    input_file_name_tmp = input_file_name
    # XXX bax2bam won't write an hdfsubreadset unless the input is XML too
    if input_file_name.endswith(".bax.h5"):
        input_file_name_tmp = tempfile.NamedTemporaryFile(
            suffix=".hdfsubreadset.xml").name
        ds_tmp = HdfSubreadSet(input_file_name)
        ds_tmp.write(input_file_name_tmp)
    args =[
        "bax2bam",
        "--subread",
        "-o", base_name,
        "--output-xml", output_file_name,
コード例 #3
0
import logging
import sys
import re

from pbcoretools.DataSetEntryPoints import parse_filter_list
from pbcore.io import openDataSet
from pbcommand.cli import registry_builder, registry_runner, QuickOpt
from pbcommand.models import FileTypes, OutputFileType

log = logging.getLogger(__name__)

TOOL_NAMESPACE = 'pbcoretools'
DRIVER_BASE = "python -m pbcoretools.tasks.filters "

registry = registry_builder(TOOL_NAMESPACE, DRIVER_BASE)

rl_opt = QuickOpt(0, "Minimum subread length", "Minimum length of subreads")

filters_opt = QuickOpt(
    "", "Filters to add to the DataSet",
    "A comma separated list of other filters to add to the DataSet")

subreads_file_type = OutputFileType(FileTypes.DS_SUBREADS.file_type_id,
                                    "SubreadSet", "Filtered SubreadSet XML",
                                    "Filtered SubreadSet XML", "filtered")


def sanitize_read_length(read_length):
    if read_length:
        if not re.search('^-?\d*(\.\d*)?$', str(read_length).strip()):
コード例 #4
0
from pbcommand.models import FileTypes
from pbcommand.cli import registry_builder, registry_runner

log = logging.getLogger(__name__)

NAMESPACE = "pbsmrtpipe_examples"

# the 'Driver' exe needs to be your your path. The first arg will be the path
# to the resolved tool contract.
#
# Note, When the tool contract is emitted, the 'run-rtc'
# will automatically be added to the driver.
#
# When this commandline tool is invoked, it will be of the form:
# hello_world_quick.py run-rtc /path/to/resolved-tool-contract.py
registry = registry_builder(NAMESPACE, "hello_world.py ")


def _example_main(input_file, output_file, **kwargs):
    """
    This func should be imported from your python package.

    This should have *no* dependency on the pbcommand IO, such as the RTC/TC models.
    """

    # This is just for test purposes
    log.info("Running example main with {i} {o} kw:{k}".format(i=input_file,
                                                               o=output_file,
                                                               k=kwargs))

    # write mock output files, otherwise the End-to-End test will fail when
コード例 #5
0
import sys
import logging

from pbcommand.models import FileTypes
from pbcommand.cli import registry_builder, registry_runner

log = logging.getLogger(__name__)

registry = registry_builder("pbcommand", "python -m pbcommand.cli.examples.dev_quick_hello_world ")


def _example_main(input_files, output_files, **kwargs):
    log.info("Running example main with {i} {o} kw:{k}".format(i=input_files,
                                                               o=output_files, k=kwargs))
    # write mock output files, otherwise the End-to-End test will fail
    xs = output_files if isinstance(output_files, (list, tuple)) else [output_files]
    for x in xs:
        with open(x, 'w') as writer:
            writer.write("Mock data\n")
    return 0


@registry("dev_qhello_world", "0.2.1", FileTypes.FASTA, FileTypes.FASTA, nproc=1, options=dict(alpha=1234))
def run_rtc(rtc):
    return _example_main(rtc.task.input_files[0], rtc.task.output_files[0], nproc=rtc.task.nproc)


@registry("dev_fastq2fasta", "0.1.0", FileTypes.FASTQ, FileTypes.FASTA)
def run_rtc(rtc):
    return _example_main(rtc.task.input_files[0], rtc.task.output_files[0])
コード例 #6
0
from pbcommand.cli import registry_builder, registry_runner
from pbcore.io import openDataSet

log = logging.getLogger(__name__)

NAMESPACE = "pbsmrtpipe_examples"

# the 'Driver' exe needs to be your your path. The first arg will be the path
# to the resolved tool contract.
#
# Note, When the tool contract is emitted, the 'run-rtc'
# will automatically be added to the driver.
#
# When this commandline tool is invoked, it will be of the form:
# comparative_plots.py run-rtc /path/to/resolved-tool-contract.py
registry = registry_builder(NAMESPACE, "interact_with_sset.py")


def _example_main(input_file, output_file, **kwargs):
    """
    This func should be imported from your python package.

    This should have *no* dependency on the pbcommand IO, such as the RTC/TC models.
    """

    # This is just for test purposes
    log.info("Running example main with {i} {o} kw:{k}".format(i=input_file,
                                                               o=output_file,
                                                               k=kwargs))

    # Try to open SubreadSet with pbcore
コード例 #7
0
    executable_path=
    '/home/knyquist/local/phantomjs-2.1.1-linux-x86_64/bin/phantomjs')

log = logging.getLogger(__name__)

NAMESPACE = "pbsmrtpipe_examples"

# the 'Driver' exe needs to be in your path. The first arg will be the path
# to the resolved tool contract.
#
# Note, When the tool contract is emitted, the 'run-rtc'
# will automatically be added to the driver.
#
# When this commandline tool is invoked, it will be of the form:
# comparative_plots.py run-rtc /path/to/resolved-tool-contract.py
registry = registry_builder(NAMESPACE, "mh_toy.py")


def _get_dset_paths(file):
    log.info("Attempting to open condition JSON")
    json = load_reseq_conditions_from(file)
    dset_paths = {}
    for condition in json.conditions:
        if condition.cond_id not in dset_paths.keys():
            dset_paths[condition.cond_id] = {
                'aset': [],
                'sset': [],
                'rset': []
            }
        dset_paths[condition.cond_id]['aset'] = condition.alignmentset
        dset_paths[condition.cond_id]['sset'] = condition.subreadset
コード例 #8
0
    executable_path=
    '/home/knyquist/local/phantomjs-2.1.1-linux-x86_64/bin/phantomjs')

log = logging.getLogger(__name__)

NAMESPACE = "pbsmrtpipe_examples"

# the 'Driver' exe needs to be in your path. The first arg will be the path
# to the resolved tool contract.
#
# Note, When the tool contract is emitted, the 'run-rtc'
# will automatically be added to the driver.
#
# When this commandline tool is invoked, it will be of the form:
# comparative_plots.py run-rtc /path/to/resolved-tool-contract.py
registry = registry_builder(NAMESPACE, "plot_multiple_mapped_ssets.py")


def _get_dset_paths(input_file):
    dset_paths = []
    log.info("Attempting to open input CSV")
    with open(input_file, 'rb') as csvfile:
        reader = csv.reader(csvfile)
        for mapped_sset in reader:
            # check for a commented line (like a header)
            if mapped_sset[0][0] is not '#':
                absolute_filename = mapped_sset[0]
                dset_paths.append(absolute_filename)
    return dset_paths

コード例 #9
0
ファイル: mh_toy.py プロジェクト: mdsmith/pbinternal2
    for path in PATHS:
        sys.path.append(raise_if_not_exist(path))
    import plotly
    import selenium


from plotly.graph_objs import *
from plotly.offline import download_plotlyjs, plot
from selenium import webdriver

import accuracy_plots

NAMESPACE = "pbinternal2"


registry = registry_builder(NAMESPACE, "mh_toy.py")


class PhantomDriver(object):

    def __init__(self, exe=PHANTOM_EXE):
        self.exe = raise_if_not_exist(exe)
        self.phantomjs_driver = None

    def __enter__(self):
        self.phantomjs_driver = webdriver.PhantomJS(executable_path=self.exe)
        return self.phantomjs_driver

    def __exit__(self, exc_type, exc_val, exc_tb):
        log.info("Shutting down phantomjs")
        if self.phantomjs_driver is not None:
コード例 #10
0
ファイル: loading.py プロジェクト: mdsmith/pbinternal2
log.addHandler(logging.NullHandler()) # To avoid warning messages


class Constants(object):

    DRIVER_BASE = "python -m pbinternal2.tasks.loading "

    LOADING_DIST_ID = "loading_dist"
    LOADING_DIST = 20


def _get_id(base_opt):
    return "{n}.task_options.{o}".format(n=TOOL_NAMESPACE,
                                         o=base_opt)

registry = registry_builder(TOOL_NAMESPACE, Constants.DRIVER_BASE)


@registry("loading_vs_poisson_report", "0.1.0",
          FileTypes.STS_H5,
          FileTypes.REPORT,
          is_distributed=True,
          nproc=1,
          options={Constants.LOADING_DIST_ID: Constants.LOADING_DIST})
def task_loading_vs_poisson_report(rtc):
    return loading_vs_poisson(
        rtc.task.input_files[0],
        rtc.task.output_files[0],
        rtc.task.nproc,
        rtc.task.options[_get_id(Constants.LOADING_DIST_ID)])
コード例 #11
0
ファイル: eol_qc.py プロジェクト: mdsmith/pbinternal2
#!/usr/bin/env python

import sys
import logging
from pbcommand.models import FileTypes, OutputFileType
from pbcommand.cli import registry_builder, registry_runner
from pbinternal2.report.eol_qc_stats import eol_qc_stats
from pbinternal2 import TOOL_NAMESPACE

__version__ = "0.1.1"
__author__ = "Martin Smith"

log = logging.getLogger(__name__)

registry = registry_builder(TOOL_NAMESPACE, "python -m pbinternal2.tasks.eol_qc")


def _eol_qc_outputs():
    csv_file_type_id = FileTypes.CSV.file_type_id
    f1 = OutputFileType(csv_file_type_id, "csv_0", "Per Zmw Stats ", "Per Zmw Statistics", "file_per_zmw")
    f2 = OutputFileType(csv_file_type_id, "csv_1", "Per Movie Stats", "Per Movie Statistics", "file_per_movie")
    return f1, f2


@registry("eol_qc", __version__,
          (FileTypes.DS_SUBREADS, FileTypes.DS_ALIGN),
          _eol_qc_outputs(),
          nproc=1, is_distributed=True, options=dict(nreads=32768))
def run_rtc(rtc):
    """
    Run an EOL-QC analysis on an subreadset and alignmentset.
from pbcommand.cli import registry_builder, registry_runner
from pbcore.io import openDataSet

log = logging.getLogger(__name__)

NAMESPACE = "pbsmrtpipe_examples"

# the 'Driver' exe needs to be your your path. The first arg will be the path
# to the resolved tool contract.
#
# Note, When the tool contract is emitted, the 'run-rtc'
# will automatically be added to the driver.
#
# When this commandline tool is invoked, it will be of the form:
# comparative_plots.py run-rtc /path/to/resolved-tool-contract.py
registry = registry_builder(
    NAMESPACE, "plot_multiple_mapped_ssets.py")


def _get_dset_paths(input_file):
    dset_paths = []
    log.info("Attempting to open input CSV")
    with open(input_file, 'rb') as csvfile:
        reader = csv.reader(csvfile)
        for mapped_sset in reader:
            # check for a commented line (like a header)
            if mapped_sset[0][0] is not '#':
                absolute_filename = mapped_sset[0]
                dset_paths.append(absolute_filename)
    return dset_paths

コード例 #13
0
    executable_path=
    '/home/knyquist/local/phantomjs-2.1.1-linux-x86_64/bin/phantomjs')

log = logging.getLogger(__name__)

NAMESPACE = "pbsmrtpipe_examples"

# the 'Driver' exe needs to be in your path. The first arg will be the path
# to the resolved tool contract.
#
# Note, When the tool contract is emitted, the 'run-rtc'
# will automatically be added to the driver.
#
# When this commandline tool is invoked, it will be of the form:
# comparative_plots.py run-rtc /path/to/resolved-tool-contract.py
registry = registry_builder(NAMESPACE,
                            "interact_with_multiple_mapped_ssets.py")


def _get_dset_paths(input_file):
    dset_paths = []
    log.info("Attempting to open input CSV")
    with open(input_file, 'rb') as csvfile:
        reader = csv.reader(csvfile)
        for mapped_sset in reader:
            # check for a commented line (like a header)
            if mapped_sset[0][0] is not '#':
                absolute_filename = mapped_sset[0]
                dset_paths.append(absolute_filename)
    return dset_paths

コード例 #14
0
from pbcommand.cli import registry_builder, registry_runner
from pbcore.io import openDataSet

log = logging.getLogger(__name__)

NAMESPACE = "pbsmrtpipe_examples"

# the 'Driver' exe needs to be your your path. The first arg will be the path
# to the resolved tool contract.
#
# Note, When the tool contract is emitted, the 'run-rtc'
# will automatically be added to the driver.
#
# When this commandline tool is invoked, it will be of the form:
# comparative_plots.py run-rtc /path/to/resolved-tool-contract.py
registry = registry_builder( NAMESPACE, "interact_with_sset.py" )


def _example_main( input_file, output_file, **kwargs ):
    """
    This func should be imported from your python package.

    This should have *no* dependency on the pbcommand IO, such as the RTC/TC models.
    """

    # This is just for test purposes
    log.info("Running example main with {i} {o} kw:{k}".format(i=input_file,
                                                               o=output_file,
                                                               k=kwargs))

    # Try to open SubreadSet with pbcore