def get_targets(units, assembly_basename, outdir, extensions=['/quant.sf', '/lib_format_counts.json'], se_ext=['se'], pe_ext=['pe']): """ Use the sample info provided in the tsv file to generate required targets for salmon """ salmon_targs = [] # here we need to get all units belonging to a single sample. # When I used 'groupby' on our initial test data, groups were # A/AB, B/AB, rather than A, B, AB. I'm sure there's a way to # do exact matching in groupby, but here's a hackaround for now. samples = list(set(units['sample'].tolist())) #by_sample = units.groupby(['sample'], sort = False) for s in samples: unit_list = units.groupby(level=0).get_group(s)['unit'].tolist() for unit in unit_list: if is_se(units, s, unit): salmon_targs = salmon_targs + [ '{}_{}_x_{}'.format(s, se_ext[0], assembly_basename) + i for i in extensions ] else: salmon_targs = salmon_targs + [ '{}_{}_x_{}'.format(s, pe_ext[0], assembly_basename) + i for i in extensions ] salmon_targs = list(set(salmon_targs)) # elim any redundant targs return [path.join(outdir, targ) for targ in salmon_targs]
def get_khmer_trimmed_trinity_input(units, basename, outdir, extensions = ['.gz'], se_ext = ['.se'], pe_ext = ['.paired.1','.paired.2', '.single']): """ Use the sample info provided in the tsv file to generate required targets for trimmomatic """ targs = [] for s, u in units.iterrows(): sample, unit = u['sample'],u['unit'] end = se_ext if is_se(units,sample, unit) else pe_ext targs = targs + ['{}_{}_'.format(sample, unit) + i + j for i in end for j in extensions] return [join(outdir, targ) for targ in targs]
def get_targets(units, basename, outdir, extensions=['.fq.gz'], se_ext=['se'], pe_ext=['1', '2']): """ Use the sample info provided in the tsv file to generate required targets for cat_reads_by_unit """ cat_targs = [] for s, u in units.iterrows(): sample, unit = u['sample'], u['unit'] end = se_ext if is_se(units, sample, unit) else pe_ext cat_targs = cat_targs + [ '{}_'.format(sample) + i + j for i in end for j in extensions ] return [path.join(outdir, targ) for targ in cat_targs]
def get_targets(units, basename, outdir, extensions=['.trim.fq.gz'], se_ext=['se'], pe_ext=['1', '2']): """ Use the sample info provided in the tsv file to generate required targets for trimmomatic """ trim_targs = [] for s, u in units.iterrows(): sample, unit = u['sample'], u['unit'] end = se_ext if is_se(units, sample, unit) else pe_ext trim_targs = trim_targs + [ '{}_{}_'.format(sample, unit) + i + j for i in end for j in extensions ] #trim_targs = trim_targs + ['{}_'.format(sample) + i + j for i in end for j in extensions] #trim_targs = list(set(trim_targs)) return [path.join(outdir, targ) for targ in trim_targs]