def _get_samples_to_process(fn, out_dir, config, force_single): """parse csv file with one line per file. It will merge all files that have the same description name""" out_dir = os.path.abspath(out_dir) samples = defaultdict(list) with open(fn) as handle: for l in handle: cols = l.strip().split(",") if len(cols) > 0: if len(cols) < 2: raise ValueError("Line needs 2 values: file and name.") if utils.file_exists(cols[0]) or is_gsm(cols[0]): if cols[0].find(" ") > -1: new_name = os.path.abspath(cols[0].replace(" ", "_")) logger.warning("Space finds in %s. Linked to %s." % (cols[0], new_name)) logger.warning("Please, avoid names with spaces in the future.") utils.symlink_plus(os.path.abspath(cols[0]), new_name) cols[0] = new_name samples[cols[1]].append(cols) else: logger.info("skipping %s, File doesn't exist." % cols[0]) for sample, items in samples.items(): if is_fastq(items[0][0], True): fn = "fq_merge" ext = ".fastq.gz" elif is_bam(items[0][0]): fn = "bam_merge" ext = ".bam" elif is_gsm(items[0][0]): fn = "query_gsm" ext = ".fastq.gz" files = [os.path.abspath(fn_file[0]) if not is_gsm(fn_file[0]) else fn_file[0] for fn_file in items] samples[sample] = [{'files': _check_paired(files, force_single), 'out_file': os.path.join(out_dir, sample + ext), 'fn': fn, 'anno': items[0][2:], 'config': config, 'name': sample, 'out_dir': out_dir}] return [samples[sample] for sample in samples]
def _check_paired(files, force_single): """check if files are fastq(.gz) and paired""" if files[0].endswith(".bam"): return files elif is_gsm(files[0]): return files return combine_pairs(files, force_single)
def _check_paired(files): """check if files are fastq(.gz) and paired""" if files[0].endswith(".bam"): return files elif is_gsm(files[0]): return files return combine_pairs(files)
def _check_paired(files, force_single, separators): """check if files are fastq(.gz) and paired""" full_name = _check_stems(files) if files[0].endswith(".bam"): return files elif is_gsm(files[0]): return files return combine_pairs(files, force_single, full_name, separators)