# parser.add_argument('--pipeline', "-p", # type=str, # choices = ['sim_pure', 'sim_contam','real_pure','test_pipe'], # help="Defining which pipeline to run") parser.add_argument('--config_file', "-cf", type=str, #metavar="config_file", help="yaml file with pipeline parameters") options = parser.parse_args() ## standard python logger which can be synchronised across concurrent Ruffus tasks ## define logging output with --log_file log_file_name logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose) # if we are printing only if not options.just_print and \ not options.flowchart and \ not options.touch_files_only: config_file= file(options.config_file, 'r') config = yaml.load(config_file) pipeline1a = make_sipp(org_list = config['org_list'], config = config) cmdline.run (options, logger = logger) sys.exit()
annotations = parser.add_argument_group('annotations') annotations.add_argument('--genome_fasta', type=str, help='genome fasta') annotations.add_argument('--gtf', type=str, help='gtf') annotations.add_argument("--suppl_annot", type=str, nargs="+", help="supplementary annotation file(s) for checking novel splice events") args = parser.parse_args() params = get_params(args, args.params) check_params(args, params) logs_dir = args.outdir + '/logs' if not os.path.exists(logs_dir): os.makedirs(logs_dir) log_file = '%s/log.%s.txt' % (logs_dir, datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")) logger, logging_mutex = cmdline.setup_logging(__name__, log_file, args.verbose) print 'log_file:', log_file cmdline.run(args) read_pairs = [] if args.fq: read_pairs = format_read_pairs(fqs=args.fq) elif args.fq_list: read_pairs = format_read_pairs(list_file=args.fq_list) history_file = '%s/.ruffus_history.sqlite' % args.outdir bbt_outdir = '%s/bbt_%s' % (args.outdir, get_version('bbt')) assembly_outdir = '%s/rnabloom_%s' % (args.outdir, get_version('rnabloom')) pv_outdir = '%s/pv_%s' % (args.outdir, get_version('pv'))
### start of pipeline # setting the main working directory home_dir = os.getcwd() root_dir = "%s/%s" % (home_dir, args.name) try: os.mkdir(root_dir) except OSError: pass ### setting up logging if not args.log_file: logfile = "%s/%s" % (root_dir, "log_file.txt") else: logfile = "%s/%s" % (root_dir, args.log_file) logger, logger_mutex = cmdline.setup_logging("CRAC_Pipeline_SE", logfile, 10) ### setting the starting files startingfiles = [os.path.abspath(i) for i in args.forwardreads] logger.info("analysing the following files:\n%s\n" % "\n".join(startingfiles)) ### start of pipeline definition pipeline = Pipeline( name="Single-end data CRAC pipeline for multiplexed libraries") logger.info("Trimming the reads") pipeline.transform( task_func=runFlexBar,
hisat_genome_index="$HOME/Scratch/reference/GRcm38/hisat2/grcm38_snp_tran" genome="$HOME/Scratch/reference/GRcm38/GRCm38/Mus_musculus.GRCm38" genome_name="Mus_musculus.GRCm38.dna.primary_assembly.fa" mask="$HOME/Scratch/reference/GRcm38/gtf/Mus_musculus.GRCm38.84.ribo.mito.mask.gtf" if hisat_check: print "hisat check: " + str(hisat_check) print(str(hisat_genome_index)) if stringtie_check: print "stringtie check: " + str(stringtie_check) print "STAR check: " + str(star_check) print gtf print "Basedir: " + basedir # standard python logger which can be synchronised across concurrent Ruffus tasks logger, logger_mutex = cmdline.setup_logging ("Chela", options.log_file, options.verbose) if not aligner: raise Exception ("Aligner not selected with --aligner") files_list=options.input if not files_list: raise Exception ("No matching samples specified with --input.") input_files=[] with open(files_list, 'r') as f: content = [line.decode('utf-8').rstrip('\n') for line in f] for line in content: #print(line) line = line.rstrip() #print(basedir + line) # print "input " + str(glob.glob(basedir + line + "/replicate*/fastq_raw/*gz"))
import ruffus as rf import ruffus.cmdline as cmdline import pandas as pd import numpy as np import os, errno import yaml # import logging from trio import triodb # import warnings # warnings.filterwarnings("ignore", message=".*deprecation.*") # Configuration and command line options parser = cmdline.get_argparse(description='Pipeline using the TPC-H example.') parser.add_argument("--config") options = parser.parse_args() lg, lm = cmdline.setup_logging(__name__, options.log_file, options.verbose) # lg.setLevel(logging.INFO) if vars(options)['config'] == None: print "No config supplied." parser.print_help() sys.exit() with open(vars(options)['config'], 'r') as f: cfg = yaml.load(f) lg.info('pipeline:: ::config %s' % str(cfg)) SQL_PRINT_MAX = 1000 # ============================================================= # functions # =============================================================
pipeline2.set_input(input=[pipeline1a, pipeline1b, pipeline1c]) import ruffus.cmdline as cmdline parser = cmdline.get_argparse( description='Demonstrates the new Ruffus syntax in version 2.6') parser.add_argument('--cleanup', "-C", action="store_true", help="Cleanup before and after.") options = parser.parse_args() # standard python logger which can be synchronised across concurrent Ruffus tasks logger, logger_mutex = cmdline.setup_logging(__file__, options.log_file, options.verbose) logger.debug("\tRuffus Version = " + ruffus.__version__) if options.cleanup: try: shutil.rmtree(tempdir) except: pass correct = False # if we are not printing but running if not options.just_print and \ not options.flowchart and \ not options.touch_files_only:
### start of pipeline # setting the main working directory home_dir = os.getcwd() root_dir = "%s/%s" % (home_dir, args.name) try: os.mkdir(root_dir) except OSError: pass ### setting up logging if not args.log_file: logfile = "%s/%s" % (root_dir, "log_file.txt") else: logfile = "%s/%s" % (root_dir, args.log_file) logger, logger_mutex = cmdline.setup_logging("ChemModSeqPipeline", logfile, 10) ### starting the pipeline commands pipeline = Pipeline(name="ChemModSeqPipeline") args.forwardreads = [os.path.abspath(i) for i in args.forwardreads] args.reversereads = [os.path.abspath(i) for i in args.reversereads] startingfiles = zip(args.forwardreads, args.reversereads) #print startingfiles if args.adapter: pipeline.transform( task_func=runFlexBar, input=startingfiles, filter=formatter("^.+/([^/]+)_1.(san)?fastq$",
def __init__(self, prog_name, log_file, verbosity): proxy, mutex = cmdline.setup_logging(__name__, log_file, verbosity) self.proxy = proxy self.mutex = mutex