original_fastq_files = [] for fastq_dir in working_files['fastq_dirs']: #original_fastq_files += glob(os.path.join(fastq_dir, '*.fastq.gz')) #original_fastq_files += glob(os.path.join(fastq_dir, '*_sequence.txt.gz')) original_fastq_files += glob(os.path.join(fastq_dir, '*.fastq.gz')) if len(original_fastq_files)==0: print "No input files found. Do the filenames follow the naming convention?" print "Directories searched:" print "\n".join(working_files['fastq_dirs']) sys.exit(1) # Parse metadata out of input file names and construct symlinks # Metadata is put into a dict (for the rest of ruffus) and some of it also into symlinks (for filename uniqueness) # currently parsing by assuming AGRF naming structure and paired-end reads mkDir(working_files['fastq_symlink_dir']) all_fastq_files = [] for file in original_fastq_files: name = os.path.basename(file) #print name #pu,remaining = name.split("_") #barcode = pu.split(".")[0] #lane = pu.split(".")[1] #id = pu[:5] + "." + lane #sm = os.path.basename(os.path.dirname(file)) #sm = string.replace(sm,"_","-") #print "sm = " + sm + "\npu = " + pu + "\nlane = " + lane + "\nid = " + id + "\nbarcode = " + barcode ##file = sm + "_" + name #file2 = sm + "_" + barcode + ".L" + lane + "_" + remaining
sys.exit(1) trimmed_fastq_files = [] for fastq_dirs in working_files['fastq_dirs']: trimmed_fastq_files += glob(os.path.join(fastq_dir, '*trimmed-paired.fastq.gz')) if len(trimmed_fastq_files)==0: print "No trimmed fastq files found. Do the filenames follow the naming convention?" print "Directories searched:" print "\n".join(working_files['fastq_dirs']) sys.exit(1) # Parse metadata out of input file names and construct symlinks # Metadata is put into a dict (for the rest of ruffus) and some of it also into symlinks (for filename uniqueness) # currently parsing by assuming AGRF naming structure and paired-end reads mkDir(working_files['fastq_symlink_dir']) all_fastq_files = [] for file in original_fastq_files: symlink = parse_and_link(file, working_files['fastq_symlink_dir'], fastq_metadata) all_fastq_files.append(symlink) all_trimmed_fastq_files = [] for file in trimmed_fastq_files: symlink = parse_and_link(file, working_files['fastq_symlink_dir'], fastq_metadata) all_trimmed_fastq_files.append(symlink) # Make a list of files we will actually use if pipeline_options.pipeline['restrict_samples']: # if pipeline_options.pipeline['allowed_samples']: # allowed_samples = set(pipeline_options.pipeline['allowed_samples'])
fastq_metadata = defaultdict(dict) original_fastq_files = [] for fastq_dir in working_files['fastq_dirs']: original_fastq_files += glob(os.path.join(fastq_dir, '*.fastq.gz')) if len(original_fastq_files) == 0: print "No input files found. Do the filenames follow the naming convention?" print "Directories searched:" print "\n".join(working_files['fastq_dirs']) sys.exit(1) # Parse metadata out of input file names and construct symlinks # Metadata is put into a dict (for the rest of ruffus) and some of it also into symlinks (for filename uniqueness) # currently parsing by assuming AGRF naming structure and paired-end reads mkDir(working_files['fastq_symlink_dir']) all_fastq_files = [] for file in original_fastq_files: symlink = parse_and_link(file, working_files['fastq_symlink_dir'], fastq_metadata) all_fastq_files.append(symlink) # Make a list of files we will actually use if pipeline_options.pipeline['restrict_samples']: allowed_samples = set(pipeline_options.pipeline['allowed_samples']) fastq_files = [ file for file in sorted(all_fastq_files) if (fastq_metadata[os.path.basename(file)]['sample'] in allowed_samples ) ] else: