def test_group_fastqs_by_name_with_R3_and_index_read(self): """ group_fastqs_by_name: R1, R2 and R3 fastqs with index reads """ fastqs = ("/data/PJB1_S1_R1_001.fastq.gz", "/data/PJB1_S1_R2_001.fastq.gz", "/data/PJB1_S1_R3_001.fastq.gz", "/data/PJB1_S1_I1_001.fastq.gz", "/data/PJB2_S2_R1_001.fastq.gz", "/data/PJB2_S2_R2_001.fastq.gz", "/data/PJB2_S2_R3_001.fastq.gz", "/data/PJB2_S2_I1_001.fastq.gz") self.assertEqual(group_fastqs_by_name(fastqs), [[ "/data/PJB1_S1_R1_001.fastq.gz", "/data/PJB1_S1_R2_001.fastq.gz", "/data/PJB1_S1_R3_001.fastq.gz", "/data/PJB1_S1_I1_001.fastq.gz", ], [ "/data/PJB2_S2_R1_001.fastq.gz", "/data/PJB2_S2_R2_001.fastq.gz", "/data/PJB2_S2_R3_001.fastq.gz", "/data/PJB2_S2_I1_001.fastq.gz", ]])
def test_group_fastqs_by_name_PE_with_index_read_pair(self): """ group_fastqs_by_name: paired-end fastqs with index read pair """ fastqs = ("/data/PJB1_S1_R1_001.fastq.gz", "/data/PJB1_S1_R2_001.fastq.gz", "/data/PJB1_S1_I1_001.fastq.gz", "/data/PJB1_S1_I2_001.fastq.gz", "/data/PJB2_S2_R1_001.fastq.gz", "/data/PJB2_S2_R2_001.fastq.gz", "/data/PJB2_S2_I1_001.fastq.gz", "/data/PJB2_S2_I2_001.fastq.gz") self.assertEqual(group_fastqs_by_name(fastqs), [[ "/data/PJB1_S1_R1_001.fastq.gz", "/data/PJB1_S1_R2_001.fastq.gz", "/data/PJB1_S1_I1_001.fastq.gz", "/data/PJB1_S1_I2_001.fastq.gz", ], [ "/data/PJB2_S2_R1_001.fastq.gz", "/data/PJB2_S2_R2_001.fastq.gz", "/data/PJB2_S2_I1_001.fastq.gz", "/data/PJB2_S2_I2_001.fastq.gz", ]])
def test_group_fastqs_by_name_SE(self): """ group_fastqs_by_name: single-end fastqs """ fastqs = ("/data/PJB1_S1_R1_001.fastq.gz", "/data/PJB2_S2_R1_001.fastq.gz") self.assertEqual(group_fastqs_by_name(fastqs), [["/data/PJB1_S1_R1_001.fastq.gz",], ["/data/PJB2_S2_R1_001.fastq.gz",]])
def test_group_fastqs_by_name_unpaired_R2(self): """ group_fastqs_by_name: handle unpaired R2 fastqs """ fastqs = ("/data/PJB1_S1_R1_001.fastq.gz", "/data/PJB1_S1_R2_001.fastq.gz", "/data/PJB2_S2_R2_001.fastq.gz") self.assertEqual(group_fastqs_by_name(fastqs), [["/data/PJB1_S1_R1_001.fastq.gz", "/data/PJB1_S1_R2_001.fastq.gz"], ["/data/PJB2_S2_R2_001.fastq.gz",]])
def test_group_fastqs_by_name_mixed_SE_and_PE(self): """ group_fastqs_by_name: mixture of single- and paired-end fastqs """ fastqs = ("/data/PJB1_S1_R1_001.fastq.gz", "/data/PJB1_S1_R2_001.fastq.gz", "/data/PJB2_S2_R1_001.fastq.gz") self.assertEqual(group_fastqs_by_name(fastqs), [["/data/PJB1_S1_R1_001.fastq.gz", "/data/PJB1_S1_R2_001.fastq.gz"], ["/data/PJB2_S2_R1_001.fastq.gz",]])
def test_group_fastqs_by_name_PE(self): """ group_fastqs_by_name: paired-end fastqs """ fastqs = ("/data/PJB1_S1_R1_001.fastq.gz", "/data/PJB1_S1_R2_001.fastq.gz", "/data/PJB2_S2_R1_001.fastq.gz", "/data/PJB2_S2_R2_001.fastq.gz") self.assertEqual(group_fastqs_by_name(fastqs), [[ "/data/PJB1_S1_R1_001.fastq.gz", "/data/PJB1_S1_R2_001.fastq.gz" ], ["/data/PJB2_S2_R1_001.fastq.gz", "/data/PJB2_S2_R2_001.fastq.gz"]])
extra_files.add(ff) else: # Look for a metadata file based on Fastqs for fq in inputs: info_file = locate_project_info_file(os.path.dirname(fq)) if info_file: break # Filter out index reads inputs = [fq for fq in inputs if not fastq_attrs(fq).is_index_read] if not inputs: logger.fatal("No Fastqs found") sys.exit(1) # Report what was found for fqs in group_fastqs_by_name(inputs,fastq_attrs=fastq_attrs): print("%s:" % fastq_attrs(fqs[0]).sample_name) for fq in fqs: print(" %s" % fq) print("Located %s Fastq%s" % (len(inputs), 's' if len(inputs) != 1 else '')) if info_file: print("Located project metadata in %s%s" % (info_file, " (will be ignored)" if args.ignore_metadata else '')) else: print("Unable to locate project metadata") # Set up environment envmodules = dict()