Ejemplo n.º 1
0
def parse_args():
    # setup the parser
    add_subparsers()
    parser = base.get_parser()
    subpars = base.get_subparser()
    subpars.required = True

    args = parser.parse_args()

    # parse directory file list
    # (requires input_directory_parser to be included)
    try:
        args.find_files(args)
    except AttributeError:
        pass

    # protect file names
    protect = ['summary', 'root_dir', 'data_dir']
    for p in protect:
        try:
            setattr(args, p, path.protect(getattr(args, p)))
        except TypeError:
            pass  # arg might not be set (is None)

    return args
Ejemplo n.º 2
0
    def test_write_script_sets_pbs_file_directory(self):

        bp = BasePipe(job_name='foo')
        bp.write_script(directory='~')
        self.assertEqual(
            bp.pbs_file, os.path.join(
                path.protect('~'), 'foo_{}.pbs'.format(bp.timestamp)
            )
        )
Ejemplo n.º 3
0
    def _create_pbs_file_name(self, dir_str):

        try:
            self.pbs_file = os.path.join(
                path.protect(dir_str),
                '_'.join([self.job_name, self.timestamp]) + '.pbs'
            )
        except (TypeError, AssertionError):
            raise AssertionError('Pipe attribute "job_name" is undefined')
        return
Ejemplo n.º 4
0
    def _get_files_from_summary(self, fh, data_dir=None):
        '''Reads a summary file and returns a list of names and files

        Expected format:
                <name>  <file>  [<file> ...]
            The first two columns should be the name of the sample
            and the file. Paired-end should be listed on the same line.
        '''

        rows = [line.rstrip().split() for line in fh]
        names = [cols[0] for cols in rows]
        if not data_dir:
            data_dir = fh.name

        try:
            r1 = [path.protect(cols[1]) for cols in rows]
            r2 = [path.protect(cols[2]) for cols in rows]

            # HACK
            r1 = [os.path.join(os.path.dirname(data_dir), r) for r in r1]
            r2 = [os.path.join(os.path.dirname(data_dir), r) for r in r2]
        except IndexError:
            r1 = [path.protect(cols[1]) for cols in rows]

            # HACK
            r1 = [os.path.join(os.path.dirname(data_dir), r) for r in r1]

        # check for valid paired end in third column
        # try:
        #     r2_0 = os.path.join(os.path.dirname(fh.name), r2[0])
        #     if not os.path.isfile(r2[0]) and not os.path.isfile(r2_0):
        #         raise IndexError()
        # except IndexError:
        #     pass

        try:
            return list(zip(names, r1, r2))
        except UnboundLocalError:
            return list(zip(names, r1))
Ejemplo n.º 5
0
def main():
    global ROOT_DIR
    args = parse_args()
    summary = read_summary(args)
    force = args.force

    if args.root_dir:
        ROOT_DIR = path.protect(args.root_dir)

    genome = args.genome
    project_name = args.project if args.project else 'new_project'
    project_dir = os.path.join(ROOT_DIR, project_name, 'samples')

    run_pipe(summary, genome, project_dir, force=force)
Ejemplo n.º 6
0
def read_summary(args):
    '''Reads a summary file and returns a list of names and files

    Expected format:
            <name>  <file>  [<file> ...]
        The first two columns should be the name of the sample
        and the file. Paired-end should be listed on the same line.
    '''

    with open(args.summary, 'r') as fh:
        rows = [line.rstrip().split() for line in fh]
        data_dir = (args.data_dir
                    if args.data_dir else os.path.dirname(fh.name))

    # add and protect path to second (and third) columns in rows
    for row in rows:
        row[1:] = [
            path.protect(os.path.join(data_dir, col))
            for col in row[1:]
        ]

    return rows
Ejemplo n.º 7
0
import subprocess
import sys
import time
sys.path.append("../remsci/")

import remsci.scripted.base as base
from remsci.lib.utility import path
from libpipe.pipes.genomics import NestedGenomicsPipe
from libpipe.parsers.fastq import FastqScripted
# from libpipe.cmds import (
#     SkewerCmd, HisatCmd, Bowtie2Cmd, FastqcCmd,
#     SamtoolsSortCmd, SamtoolsIndexCmd, BedtoolsMulticovCmd,
# )


ROOT_DIR = path.protect('~/work/projects')
PIPE_PBS_TEMPLATE = ''


def add_subparsers():
    '''Use this function to add subparsers from modules'''
    subparser = base.get_subparser()
    fastq_parser = FastqScripted(subparser)
    fastq_parser.setup()


def setup_logger():
    # setup logger
    import logging
    from remsci.lib.utility import customLogging
    customLogging.config()
Ejemplo n.º 8
0
import subprocess
import sys
import time
sys.path.append("../remsci/")

import remsci.scripted.base as base
from remsci.lib.utility import path
from libpipe.pipes.base import BasePipe
from libpipe.parsers.fastq import FastqScripted
from libpipe.cmds import (
    SkewerCmd, HisatCmd, Bowtie2Cmd, FastqcCmd,
    SamtoolsSortCmd, SamtoolsIndexCmd, BedtoolsMulticovCmd,
)


ROOT_DIR = path.protect('~/data/rempipe')
PIPE_PBS_TEMPLATE = ''
DO_RUN = lambda x: not _has_output(x)


def add_subparsers():
    '''Use this function to add subparsers from modules'''
    subparser = base.get_subparser()
    fastq_parser = FastqScripted(subparser)
    fastq_parser.setup()


def pipe(file_list, genome, project_dir, force=False):

    timestamp = time.strftime("%y%m%d-%H%M%S")