def parse_args(): # setup the parser add_subparsers() parser = base.get_parser() subpars = base.get_subparser() subpars.required = True args = parser.parse_args() # parse directory file list # (requires input_directory_parser to be included) try: args.find_files(args) except AttributeError: pass # protect file names protect = ['summary', 'root_dir', 'data_dir'] for p in protect: try: setattr(args, p, path.protect(getattr(args, p))) except TypeError: pass # arg might not be set (is None) return args
def test_write_script_sets_pbs_file_directory(self): bp = BasePipe(job_name='foo') bp.write_script(directory='~') self.assertEqual( bp.pbs_file, os.path.join( path.protect('~'), 'foo_{}.pbs'.format(bp.timestamp) ) )
def _create_pbs_file_name(self, dir_str): try: self.pbs_file = os.path.join( path.protect(dir_str), '_'.join([self.job_name, self.timestamp]) + '.pbs' ) except (TypeError, AssertionError): raise AssertionError('Pipe attribute "job_name" is undefined') return
def _get_files_from_summary(self, fh, data_dir=None): '''Reads a summary file and returns a list of names and files Expected format: <name> <file> [<file> ...] The first two columns should be the name of the sample and the file. Paired-end should be listed on the same line. ''' rows = [line.rstrip().split() for line in fh] names = [cols[0] for cols in rows] if not data_dir: data_dir = fh.name try: r1 = [path.protect(cols[1]) for cols in rows] r2 = [path.protect(cols[2]) for cols in rows] # HACK r1 = [os.path.join(os.path.dirname(data_dir), r) for r in r1] r2 = [os.path.join(os.path.dirname(data_dir), r) for r in r2] except IndexError: r1 = [path.protect(cols[1]) for cols in rows] # HACK r1 = [os.path.join(os.path.dirname(data_dir), r) for r in r1] # check for valid paired end in third column # try: # r2_0 = os.path.join(os.path.dirname(fh.name), r2[0]) # if not os.path.isfile(r2[0]) and not os.path.isfile(r2_0): # raise IndexError() # except IndexError: # pass try: return list(zip(names, r1, r2)) except UnboundLocalError: return list(zip(names, r1))
def main(): global ROOT_DIR args = parse_args() summary = read_summary(args) force = args.force if args.root_dir: ROOT_DIR = path.protect(args.root_dir) genome = args.genome project_name = args.project if args.project else 'new_project' project_dir = os.path.join(ROOT_DIR, project_name, 'samples') run_pipe(summary, genome, project_dir, force=force)
def read_summary(args): '''Reads a summary file and returns a list of names and files Expected format: <name> <file> [<file> ...] The first two columns should be the name of the sample and the file. Paired-end should be listed on the same line. ''' with open(args.summary, 'r') as fh: rows = [line.rstrip().split() for line in fh] data_dir = (args.data_dir if args.data_dir else os.path.dirname(fh.name)) # add and protect path to second (and third) columns in rows for row in rows: row[1:] = [ path.protect(os.path.join(data_dir, col)) for col in row[1:] ] return rows
import subprocess import sys import time sys.path.append("../remsci/") import remsci.scripted.base as base from remsci.lib.utility import path from libpipe.pipes.genomics import NestedGenomicsPipe from libpipe.parsers.fastq import FastqScripted # from libpipe.cmds import ( # SkewerCmd, HisatCmd, Bowtie2Cmd, FastqcCmd, # SamtoolsSortCmd, SamtoolsIndexCmd, BedtoolsMulticovCmd, # ) ROOT_DIR = path.protect('~/work/projects') PIPE_PBS_TEMPLATE = '' def add_subparsers(): '''Use this function to add subparsers from modules''' subparser = base.get_subparser() fastq_parser = FastqScripted(subparser) fastq_parser.setup() def setup_logger(): # setup logger import logging from remsci.lib.utility import customLogging customLogging.config()
import subprocess import sys import time sys.path.append("../remsci/") import remsci.scripted.base as base from remsci.lib.utility import path from libpipe.pipes.base import BasePipe from libpipe.parsers.fastq import FastqScripted from libpipe.cmds import ( SkewerCmd, HisatCmd, Bowtie2Cmd, FastqcCmd, SamtoolsSortCmd, SamtoolsIndexCmd, BedtoolsMulticovCmd, ) ROOT_DIR = path.protect('~/data/rempipe') PIPE_PBS_TEMPLATE = '' DO_RUN = lambda x: not _has_output(x) def add_subparsers(): '''Use this function to add subparsers from modules''' subparser = base.get_subparser() fastq_parser = FastqScripted(subparser) fastq_parser.setup() def pipe(file_list, genome, project_dir, force=False): timestamp = time.strftime("%y%m%d-%H%M%S")