Example #1
0
def _get_samples_to_process(fn, out_dir, config, force_single):
    """parse csv file with one line per file. It will merge
    all files that have the same description name"""
    out_dir = os.path.abspath(out_dir)
    samples = defaultdict(list)
    with open(fn) as handle:
        for l in handle:
            cols = l.strip().split(",")
            if len(cols) > 0:
                if len(cols) < 2:
                    raise ValueError("Line needs 2 values: file and name.")
                if utils.file_exists(cols[0]) or is_gsm(cols[0]):
                    if cols[0].find(" ") > -1:
                        new_name = os.path.abspath(cols[0].replace(" ", "_"))
                        logger.warning("Space finds in %s. Linked to %s." % (cols[0], new_name))
                        logger.warning("Please, avoid names with spaces in the future.")
                        utils.symlink_plus(os.path.abspath(cols[0]), new_name)
                        cols[0] = new_name
                    samples[cols[1]].append(cols)
                else:
                    logger.info("skipping %s, File doesn't exist." % cols[0])
    for sample, items in samples.items():
        if is_fastq(items[0][0], True):
            fn = "fq_merge"
            ext = ".fastq.gz"
        elif is_bam(items[0][0]):
            fn = "bam_merge"
            ext = ".bam"
        elif is_gsm(items[0][0]):
            fn = "query_gsm"
            ext = ".fastq.gz"
        files = [os.path.abspath(fn_file[0]) if not is_gsm(fn_file[0]) else fn_file[0] for fn_file in items]
        samples[sample] = [{'files': _check_paired(files, force_single), 'out_file': os.path.join(out_dir, sample + ext), 'fn': fn, 'anno': items[0][2:], 'config': config, 'name': sample, 'out_dir': out_dir}]
    return [samples[sample] for sample in samples]
Example #2
0
def _check_paired(files, force_single):
    """check if files are fastq(.gz) and paired"""
    if files[0].endswith(".bam"):
        return files
    elif is_gsm(files[0]):
        return files
    return combine_pairs(files, force_single)
def _check_paired(files):
    """check if files are fastq(.gz) and paired"""
    if files[0].endswith(".bam"):
        return files
    elif is_gsm(files[0]):
        return files
    return combine_pairs(files)
def _check_paired(files, force_single, separators):
    """check if files are fastq(.gz) and paired"""
    full_name = _check_stems(files)
    if files[0].endswith(".bam"):
        return files
    elif is_gsm(files[0]):
        return files
    return combine_pairs(files, force_single, full_name, separators)
def _check_paired(files, force_single, separators):
    """check if files are fastq(.gz) and paired"""
    full_name = _check_stems(files)
    if files[0].endswith(".bam"):
        return files
    elif is_gsm(files[0]):
        return files
    return combine_pairs(files, force_single, full_name, separators)