Beispiel #1
0
def get_targets(units,
                assembly_basename,
                outdir,
                extensions=['/quant.sf', '/lib_format_counts.json'],
                se_ext=['se'],
                pe_ext=['pe']):
    """
    Use the sample info provided in the tsv file
    to generate required targets for salmon
    """
    salmon_targs = []

    # here we need to get all units belonging to a single sample.
    # When I used 'groupby' on our initial test data, groups were
    # A/AB, B/AB, rather than A, B, AB. I'm sure there's a way to
    # do exact matching in groupby, but here's a hackaround for now.
    samples = list(set(units['sample'].tolist()))
    #by_sample = units.groupby(['sample'], sort = False)
    for s in samples:
        unit_list = units.groupby(level=0).get_group(s)['unit'].tolist()
        for unit in unit_list:
            if is_se(units, s, unit):
                salmon_targs = salmon_targs + [
                    '{}_{}_x_{}'.format(s, se_ext[0], assembly_basename) + i
                    for i in extensions
                ]
            else:
                salmon_targs = salmon_targs + [
                    '{}_{}_x_{}'.format(s, pe_ext[0], assembly_basename) + i
                    for i in extensions
                ]
    salmon_targs = list(set(salmon_targs))  # elim any redundant targs
    return [path.join(outdir, targ) for targ in salmon_targs]
Beispiel #2
0
def get_khmer_trimmed_trinity_input(units, basename, outdir, extensions = ['.gz'], se_ext = ['.se'], pe_ext = ['.paired.1','.paired.2', '.single']):

    """
    Use the sample info provided in the tsv file
    to generate required targets for trimmomatic
    """
    targs = []
    for s, u in units.iterrows():
        sample, unit = u['sample'],u['unit']
        end = se_ext if is_se(units,sample, unit) else pe_ext
        targs = targs +  ['{}_{}_'.format(sample, unit) + i + j for i in end for j in extensions]
    return [join(outdir, targ) for targ in targs]
def get_targets(units,
                basename,
                outdir,
                extensions=['.fq.gz'],
                se_ext=['se'],
                pe_ext=['1', '2']):
    """
    Use the sample info provided in the tsv file
    to generate required targets for cat_reads_by_unit
    """
    cat_targs = []
    for s, u in units.iterrows():
        sample, unit = u['sample'], u['unit']
        end = se_ext if is_se(units, sample, unit) else pe_ext
        cat_targs = cat_targs + [
            '{}_'.format(sample) + i + j for i in end for j in extensions
        ]

    return [path.join(outdir, targ) for targ in cat_targs]
Beispiel #4
0
def get_targets(units,
                basename,
                outdir,
                extensions=['.trim.fq.gz'],
                se_ext=['se'],
                pe_ext=['1', '2']):
    """
    Use the sample info provided in the tsv file
    to generate required targets for trimmomatic
    """
    trim_targs = []
    for s, u in units.iterrows():
        sample, unit = u['sample'], u['unit']
        end = se_ext if is_se(units, sample, unit) else pe_ext
        trim_targs = trim_targs + [
            '{}_{}_'.format(sample, unit) + i + j for i in end
            for j in extensions
        ]
        #trim_targs = trim_targs +  ['{}_'.format(sample) + i + j for i in end for j in extensions]

    #trim_targs = list(set(trim_targs))
    return [path.join(outdir, targ) for targ in trim_targs]