예제 #1
0
def cuffNormUQ(infiles, outfile):
    '''Calculate upper quartile (UQ) normalised FPKMs using cuffNorm
    '''

    # parse the infiles
    geneset = infiles[0]

    cxb_path = os.path.dirname(infiles[1])
    cxb_name = "abundances.cxb"

    # Group replicate samples
    replicate_field = PARAMS["cufflinks_replicate_field"]

    if replicate_field:
        if replicate_field not in NAME_FIELD_TITLES:
            raise ValueError("cufflinks replicate field not in field titles")

        key = [T for T in NAME_FIELD_TITLES if T.lower() != replicate_field]
        agg = SAMPLES.groupby(key)

        labels = []
        cxb_groups = []
        for group, indices in agg.groups.iteritems():

            labels.append("_".join(group))

            group_cxb_files = [
                os.path.join(cxb_path, S, cxb_name)
                for S in SAMPLES.ix[indices]["sample_id"].values
            ]

            cxb_groups.append(",".join(group_cxb_files))

        cxb_files = " ".join(cxb_groups)

    else:

        sample_ids = SAMPLES["sample_id"].values

        cxb_files = " ".join(
            [os.path.join(cxb_path, S, cxb_name) for S in sample_ids])

        labels = sample_ids

    # get the output directory and sample labels
    output_dir = os.path.dirname(outfile)
    label_str = ",".join(labels)

    standards = PARAMS["cufflinks_standards"]

    PipelineScRnaseq.runCuffNorm(geneset,
                                 cxb_files,
                                 label_str,
                                 output_dir,
                                 outfile,
                                 library_type=CUFFLINKS_STRAND,
                                 standards_file=standards,
                                 normalisation="quartile",
                                 hits="compatible")
예제 #2
0
def cuffNormUQ(infiles, outfile):
    '''Calculate upper quartile (UQ) normalised FPKMs using cuffNorm
    '''

    # parse the infiles
    geneset = infiles[0]

    cxb_path = os.path.dirname(infiles[1])
    cxb_name = "abundances.cxb"

    # Group replicate samples
    replicate_field = PARAMS["cufflinks_replicate_field"]

    if replicate_field:
        if replicate_field not in NAME_FIELD_TITLES:
            raise ValueError("cufflinks replicate field not in field titles")

        key = [T for T in NAME_FIELD_TITLES if T.lower() != replicate_field]
        agg = SAMPLES.groupby(key)

        labels = []
        cxb_groups = []
        for group, indices in agg.groups.iteritems():

            labels.append("_".join(group))

            group_cxb_files = [os.path.join(cxb_path, S, cxb_name)
                               for S in
                               SAMPLES.ix[indices]["sample_id"].values]

            cxb_groups.append(",".join(group_cxb_files))

        cxb_files = " ".join(cxb_groups)

    else:

        sample_ids = SAMPLES["sample_id"].values

        cxb_files = " ".join([os.path.join(cxb_path, S, cxb_name)
                              for S in sample_ids])

        labels = sample_ids

    # get the output directory and sample labels
    output_dir = os.path.dirname(outfile)
    label_str = ",".join(labels)

    standards = PARAMS["cufflinks_standards"]

    PipelineScRnaseq.runCuffNorm(geneset, cxb_files, label_str,
                                 output_dir, outfile,
                                 library_type=CUFFLINKS_STRAND,
                                 standards_file=standards,
                                 normalisation="quartile", hits="compatible")
예제 #3
0
def cuffNormClassic(infiles, outfile):
    '''Calculate classic FPKMs using cuffNorm
       for copy number estimation'''

    cxb_files = " ".join([f[:-len(".log")] + "/abundances.cxb"
                          for f in infiles[1:]])

    label_str = ",".join([os.path.basename(f)[:-len(".log")]
                          for f in infiles[1:]])

    # parse the infiles
    geneset = infiles[0]

    # get the output directory and sample labels
    output_dir = os.path.dirname(outfile)

    PipelineScRnaseq.runCuffNorm(geneset, cxb_files, label_str,
                                 output_dir, outfile,
                                 library_type=CUFFLINKS_STRAND,
                                 normalisation="classic-fpkm", hits="total")
예제 #4
0
def cuffNormClassic(infiles, outfile):
    '''Calculate classic FPKMs using cuffNorm
       for copy number estimation'''

    cxb_files = " ".join(
        [f[:-len(".log")] + "/abundances.cxb" for f in infiles[1:]])

    label_str = ",".join(
        [os.path.basename(f)[:-len(".log")] for f in infiles[1:]])

    # parse the infiles
    geneset = infiles[0]

    # get the output directory and sample labels
    output_dir = os.path.dirname(outfile)

    PipelineScRnaseq.runCuffNorm(geneset,
                                 cxb_files,
                                 label_str,
                                 output_dir,
                                 outfile,
                                 library_type=CUFFLINKS_STRAND,
                                 normalisation="classic-fpkm",
                                 hits="total")