Esempio n. 1
0
original_fastq_files = []
for fastq_dir in working_files['fastq_dirs']:
    #original_fastq_files += glob(os.path.join(fastq_dir, '*.fastq.gz'))
    #original_fastq_files += glob(os.path.join(fastq_dir, '*_sequence.txt.gz'))
    original_fastq_files += glob(os.path.join(fastq_dir, '*.fastq.gz'))

if len(original_fastq_files)==0:
    print "No input files found. Do the filenames follow the naming convention?"
    print "Directories searched:"
    print "\n".join(working_files['fastq_dirs'])
    sys.exit(1)

# Parse metadata out of input file names and construct symlinks
# Metadata is put into a dict (for the rest of ruffus) and some of it also into symlinks (for filename uniqueness)
# currently parsing by assuming AGRF naming structure and paired-end reads
mkDir(working_files['fastq_symlink_dir'])
all_fastq_files = []
for file in original_fastq_files:
    name = os.path.basename(file)
    #print name
    #pu,remaining = name.split("_")
    #barcode = pu.split(".")[0]
    #lane = pu.split(".")[1]
    #id = pu[:5] + "." + lane
    #sm = os.path.basename(os.path.dirname(file))
    #sm = string.replace(sm,"_","-")

    #print "sm = " + sm + "\npu = " + pu + "\nlane = " + lane + "\nid = " + id + "\nbarcode = " + barcode

    ##file = sm + "_" + name
    #file2 = sm + "_" + barcode + ".L" + lane + "_" + remaining
Esempio n. 2
0
    sys.exit(1)

trimmed_fastq_files = []
for fastq_dirs in working_files['fastq_dirs']:
    trimmed_fastq_files += glob(os.path.join(fastq_dir, '*trimmed-paired.fastq.gz'))

if len(trimmed_fastq_files)==0:
    print "No trimmed fastq files found. Do the filenames follow the naming convention?"
    print "Directories searched:"
    print "\n".join(working_files['fastq_dirs'])
    sys.exit(1)
   
# Parse metadata out of input file names and construct symlinks
# Metadata is put into a dict (for the rest of ruffus) and some of it also into symlinks (for filename uniqueness)
# currently parsing by assuming AGRF naming structure and paired-end reads
mkDir(working_files['fastq_symlink_dir'])
all_fastq_files = []
for file in original_fastq_files:
    symlink = parse_and_link(file, working_files['fastq_symlink_dir'], fastq_metadata)
    all_fastq_files.append(symlink)

all_trimmed_fastq_files = []
for file in trimmed_fastq_files:
    symlink = parse_and_link(file, working_files['fastq_symlink_dir'], fastq_metadata)
    all_trimmed_fastq_files.append(symlink)


# Make a list of files we will actually use
if pipeline_options.pipeline['restrict_samples']:
#    if pipeline_options.pipeline['allowed_samples']:
#        allowed_samples = set(pipeline_options.pipeline['allowed_samples'])
Esempio n. 3
0
fastq_metadata = defaultdict(dict)

original_fastq_files = []
for fastq_dir in working_files['fastq_dirs']:
    original_fastq_files += glob(os.path.join(fastq_dir, '*.fastq.gz'))

if len(original_fastq_files) == 0:
    print "No input files found. Do the filenames follow the naming convention?"
    print "Directories searched:"
    print "\n".join(working_files['fastq_dirs'])
    sys.exit(1)

# Parse metadata out of input file names and construct symlinks
# Metadata is put into a dict (for the rest of ruffus) and some of it also into symlinks (for filename uniqueness)
# currently parsing by assuming AGRF naming structure and paired-end reads
mkDir(working_files['fastq_symlink_dir'])
all_fastq_files = []
for file in original_fastq_files:
    symlink = parse_and_link(file, working_files['fastq_symlink_dir'],
                             fastq_metadata)
    all_fastq_files.append(symlink)

# Make a list of files we will actually use
if pipeline_options.pipeline['restrict_samples']:
    allowed_samples = set(pipeline_options.pipeline['allowed_samples'])
    fastq_files = [
        file for file in sorted(all_fastq_files)
        if (fastq_metadata[os.path.basename(file)]['sample'] in allowed_samples
            )
    ]
else: