print "PLEASE NOTE! The output folder, containing your data, will be named - " + analysis_dir os.makedirs(analysis_dir) analysis_directory = analysis_dir + "/" # setup directory containing fastq's and get them indir = sys.argv[2] for sample in os.listdir(indir): if sample.endswith(".1.fq.gz"): sample_name = re.sub(r"(.*).1.fq.gz", r"\1", sample) sample_name_uc = sample_name + "_unclipped" else: continue # Preparation of bamfile + unclipped bamfile pipeliners.system_call( 'Cut Adapters', cutadapt(sample_name + ".1.fq.gz", analysis_directory + sample_name + "_ar.1.fq.gz")) pipeliners.system_call( 'Cut Adapters', cutadapt(sample_name + ".2.fq.gz", analysis_directory + sample_name + "_ar.2.fq.gz")) exit() pipeliners.system_call( 'SMALT alignment', smalt(analysis_directory, sample_name + "_ar.1.fq.gz", sample_name + "_ar.2.fq.gz", sample_name + ".sam")) pipeliners.system_call( 'Convert samfile to bamfile', sam_to_bam(analysis_directory, sample_name + ".sam", sample_name + ".bam"))
# # # script that runs a function with a set of infiles in serial # # # Kim Brugger (29 Apr 2015), contact: [email protected] import sys #import pprint #pp = pprint.PrettyPrinter(indent=4) sys.path.append("/data/VMshare/github/pipeliners/modules") import pipeliners if (len(sys.argv) == 1): print "USAGE: serial.py [program to run] [file(s) to run on]" exit() program = sys.argv[1] pipeliners.set_verbose_level('INFO') for arg in range(2, len(sys.argv)): infile = str(sys.argv[arg]) cmd = str(program) + " " + infile print cmd pipeliners.system_call('serial_call', cmd)
analysis_dir = sys.argv[1] + "_" + time_stamp print "PLEASE NOTE! The output folder, containing your data, will be named - " + analysis_dir os.makedirs(analysis_dir) analysis_directory = analysis_dir + "/" # setup directory containing fastq's and get them indir = sys.argv[2] for sample in os.listdir(indir): if sample.endswith(".1.fq.gz"): sample_name = re.sub(r"(.*).1.fq.gz", r"\1", sample) sample_name_uc = sample_name + "_unclipped" else: continue # Preparation of bamfile + unclipped bamfile pipeliners.system_call('Cut Adapters', cutadapt(sample_name + ".1.fq.gz", analysis_directory + sample_name + "_ar.1.fq.gz")) pipeliners.system_call('Cut Adapters', cutadapt(sample_name + ".2.fq.gz", analysis_directory + sample_name + "_ar.2.fq.gz")) exit() pipeliners.system_call('SMALT alignment', smalt(analysis_directory, sample_name + "_ar.1.fq.gz", sample_name + "_ar.2.fq.gz", sample_name + ".sam")) pipeliners.system_call('Convert samfile to bamfile', sam_to_bam(analysis_directory, sample_name + ".sam", sample_name + ".bam")) pipeliners.system_call('Unclipping bamfile', unclip_bamfile(analysis_directory, sample_name + ".bam")) # Bamfile processing pipeline pipeliners.system_call('Sort the bamfile', sort_bamfile(analysis_directory, sample_name+ ".bam", sample_name + "_sorted")) #pipeliners.system_call('Deduplicate bamfile', deduplicate_bamfile(sample_name + "_sorted.bam", sample_name + "_rmdups.bam", sample_name + "_rmdup.csv")) pipeliners.system_call('Index bamfile', index_bamfile(analysis_directory, sample_name + "_sorted.bam")) pipeliners.system_call('Fix HIV alignment', HIV_alignment_fix(analysis_directory, sample_name + "_sorted.bam", sample_name + "_fixed.bam")) pipeliners.system_call('Index bamfile', index_bamfile(analysis_directory, sample_name + "_fixed.bam"))
# # # script that runs a function with a set of infiles in serial # # # Kim Brugger (29 Apr 2015), contact: [email protected] import sys #import pprint #pp = pprint.PrettyPrinter(indent=4) sys.path.append("/data/VMshare/github/pipeliners/modules") import pipeliners if (len(sys.argv) == 1): print "USAGE: serial.py [program to run] [file(s) to run on]" exit() program = sys.argv[1] pipeliners.set_verbose_level('INFO'); for arg in range(2, len(sys.argv)): infile = str(sys.argv[ arg ]) cmd = str(program) + " " + infile print cmd pipeliners.system_call('serial_call', cmd)