def cuffNormUQ(infiles, outfile): '''Calculate upper quartile (UQ) normalised FPKMs using cuffNorm ''' # parse the infiles geneset = infiles[0] cxb_path = os.path.dirname(infiles[1]) cxb_name = "abundances.cxb" # Group replicate samples replicate_field = PARAMS["cufflinks_replicate_field"] if replicate_field: if replicate_field not in NAME_FIELD_TITLES: raise ValueError("cufflinks replicate field not in field titles") key = [T for T in NAME_FIELD_TITLES if T.lower() != replicate_field] agg = SAMPLES.groupby(key) labels = [] cxb_groups = [] for group, indices in agg.groups.iteritems(): labels.append("_".join(group)) group_cxb_files = [ os.path.join(cxb_path, S, cxb_name) for S in SAMPLES.ix[indices]["sample_id"].values ] cxb_groups.append(",".join(group_cxb_files)) cxb_files = " ".join(cxb_groups) else: sample_ids = SAMPLES["sample_id"].values cxb_files = " ".join( [os.path.join(cxb_path, S, cxb_name) for S in sample_ids]) labels = sample_ids # get the output directory and sample labels output_dir = os.path.dirname(outfile) label_str = ",".join(labels) standards = PARAMS["cufflinks_standards"] PipelineScRnaseq.runCuffNorm(geneset, cxb_files, label_str, output_dir, outfile, library_type=CUFFLINKS_STRAND, standards_file=standards, normalisation="quartile", hits="compatible")
def cuffNormUQ(infiles, outfile): '''Calculate upper quartile (UQ) normalised FPKMs using cuffNorm ''' # parse the infiles geneset = infiles[0] cxb_path = os.path.dirname(infiles[1]) cxb_name = "abundances.cxb" # Group replicate samples replicate_field = PARAMS["cufflinks_replicate_field"] if replicate_field: if replicate_field not in NAME_FIELD_TITLES: raise ValueError("cufflinks replicate field not in field titles") key = [T for T in NAME_FIELD_TITLES if T.lower() != replicate_field] agg = SAMPLES.groupby(key) labels = [] cxb_groups = [] for group, indices in agg.groups.iteritems(): labels.append("_".join(group)) group_cxb_files = [os.path.join(cxb_path, S, cxb_name) for S in SAMPLES.ix[indices]["sample_id"].values] cxb_groups.append(",".join(group_cxb_files)) cxb_files = " ".join(cxb_groups) else: sample_ids = SAMPLES["sample_id"].values cxb_files = " ".join([os.path.join(cxb_path, S, cxb_name) for S in sample_ids]) labels = sample_ids # get the output directory and sample labels output_dir = os.path.dirname(outfile) label_str = ",".join(labels) standards = PARAMS["cufflinks_standards"] PipelineScRnaseq.runCuffNorm(geneset, cxb_files, label_str, output_dir, outfile, library_type=CUFFLINKS_STRAND, standards_file=standards, normalisation="quartile", hits="compatible")
def cuffNormClassic(infiles, outfile): '''Calculate classic FPKMs using cuffNorm for copy number estimation''' cxb_files = " ".join([f[:-len(".log")] + "/abundances.cxb" for f in infiles[1:]]) label_str = ",".join([os.path.basename(f)[:-len(".log")] for f in infiles[1:]]) # parse the infiles geneset = infiles[0] # get the output directory and sample labels output_dir = os.path.dirname(outfile) PipelineScRnaseq.runCuffNorm(geneset, cxb_files, label_str, output_dir, outfile, library_type=CUFFLINKS_STRAND, normalisation="classic-fpkm", hits="total")
def cuffNormClassic(infiles, outfile): '''Calculate classic FPKMs using cuffNorm for copy number estimation''' cxb_files = " ".join( [f[:-len(".log")] + "/abundances.cxb" for f in infiles[1:]]) label_str = ",".join( [os.path.basename(f)[:-len(".log")] for f in infiles[1:]]) # parse the infiles geneset = infiles[0] # get the output directory and sample labels output_dir = os.path.dirname(outfile) PipelineScRnaseq.runCuffNorm(geneset, cxb_files, label_str, output_dir, outfile, library_type=CUFFLINKS_STRAND, normalisation="classic-fpkm", hits="total")