Esempio n. 1
0
def test_count(data, opts):
    """
    Make a test for all genes iteratively.

    @args data: Store all input data and results
    @type data: Class object
    @args opts: Input argument to the main TE function 
    @type opts: Instance
    """

    print 'Start the statistical test.'

    num = len(data.geneIDs)
    pval = np.empty((num, 1))
    pval.fill(np.nan)

    explanatory0 = cm.create_matrix(data, model='H0')
    explanatory1 = cm.create_matrix(data, model='H1')
    librarySizes = np.hstack([data.libSizesRibo, data.libSizesRna])

    lenSampleRibo = data.idxRibo.size
    lenSampleRna  = data.idxRna.size

    for i in range(num):
        sys.stdout.flush()

        if i % 50 == 0:
            print '\r%i genes finished...' % i ,
        if i+1 == num:
            print '\r%i genes finished.' % num

        if opts.dispDiff and np.isnan(data.dispAdjRibo[i]):
            continue
        if not opts.dispDiff and np.isnan(data.dispAdj[i]):
            continue

        response = np.hstack([data.countRibo[i, :], data.countRna[i, :]])

        if opts.dispDiff:
            disp = np.hstack([np.repeat(data.dispAdjRibo[i], lenSampleRibo), np.repeat(data.dispAdjRna[i], lenSampleRna)])
        else:
            disp = data.dispAdj[i]

        modNB0 = sm.GLM(response, explanatory0, family=sm.families.NegativeBinomial(alpha=disp), offset=np.log(librarySizes))
        modNB1 = sm.GLM(response, explanatory1, family=sm.families.NegativeBinomial(alpha=disp), offset=np.log(librarySizes))
        result0 = modNB0.fit()
        result1 = modNB1.fit()
        pval[i] = 1 - chi2.cdf(result0.deviance - result1.deviance, explanatory1.shape[1] - explanatory0.shape[1])

    data.pval = pval

    return data
def estimate_disp(data, opts):
    """ Create explanatory matrix and estimate dispersion. 
        Temporarily save data in ./TmpData.pkl file.

    @args data: Store all input data and results
    @type data: Class object
    @args opts: Input arguments to the main TE function 
    @type opts: Instance
    """

    explanatory = cm.create_matrix(data, model='H1')
    data.matrix = explanatory

    outpath = opts.resPath
    pklFile = outpath + 'TmpData.pkl'

    if opts.dispDiff:
        data = rd.disper_raw(data, opts)
    else:
        data = rd.disper_raw_scalar(data, opts)
    with open(pklFile, 'wb') as FileOut:
        pickle.dump(data, FileOut, pickle.HIGHEST_PROTOCOL)

    print '*' * 25

    data = fd.disper_fit(data, opts)
    with open(pklFile, 'wb') as FileOut:
        pickle.dump(data, FileOut, pickle.HIGHEST_PROTOCOL)

    print '*' * 25

    if opts.dispDiff:
        data = ad.disper_adj(data, opts)
    else:
        data = ad.disper_adj_scalar(data, opts)
    with open(pklFile, 'wb') as FileOut:
        pickle.dump(data, FileOut, pickle.HIGHEST_PROTOCOL)

    return data
Esempio n. 3
0
def estimate_disp(data, opts):
    """ Create explanatory matrix and estimate dispersion. 
        Temporarily save data in ./TmpData.pkl file.

    @args data: Store all input data and results
    @type data: Class object
    @args opts: Input arguments to the main TE function 
    @type opts: Instance
    """

    explanatory = cm.create_matrix(data, model='H1')
    data.matrix = explanatory

    outpath = opts.resPath
    pklFile = outpath + 'TmpData.pkl'

    if opts.dispDiff:
        data = rd.disper_raw(data, opts)
    else:
        data = rd.disper_raw_scalar(data, opts)
    with open(pklFile, 'wb') as FileOut:
        pickle.dump(data, FileOut, pickle.HIGHEST_PROTOCOL)

    print '*'*25

    data = fd.disper_fit(data, opts)
    with open(pklFile, 'wb') as FileOut:
        pickle.dump(data, FileOut, pickle.HIGHEST_PROTOCOL)

    print '*'*25

    if opts.dispDiff:
        data = ad.disper_adj(data, opts)
    else:
        data = ad.disper_adj_scalar(data, opts)
    with open(pklFile, 'wb') as FileOut:
        pickle.dump(data, FileOut, pickle.HIGHEST_PROTOCOL)

    return data
Esempio n. 4
0
def test_count(data, opts):
    """
    Make a test for all genes iteratively.

    @args data: Store all input data and results
    @type data: Class object
    @args opts: Input argument to the main TE function 
    @type opts: Instance
    """

    print 'Start the statistical test.'

    num = len(data.geneIDs)
    pval = np.empty((num, 1))
    pval.fill(np.nan)

    explanatory0 = cm.create_matrix(data, model='H0')
    explanatory1 = cm.create_matrix(data, model='H1')
    librarySizes = np.hstack([data.libSizesRibo, data.libSizesRna])

    lenSampleRibo = data.idxRibo.size
    lenSampleRna = data.idxRna.size

    errorCnt = 0

    for i in range(num):
        if opts.dispDiff and np.isnan(data.dispAdjRibo[i]):
            continue
        if not opts.dispDiff and np.isnan(data.dispAdj[i]):
            continue

        response = np.hstack([data.countRibo[i, :], data.countRna[i, :]])

        if opts.dispDiff:
            disp = np.hstack([
                np.repeat(data.dispAdjRibo[i], lenSampleRibo),
                np.repeat(data.dispAdjRna[i], lenSampleRna)
            ])
        else:
            disp = data.dispAdj[i]

        try:
            modNB0 = sm.GLM(response,
                            explanatory0,
                            family=sm.families.NegativeBinomial(alpha=disp),
                            offset=np.log(librarySizes))
            modNB1 = sm.GLM(response,
                            explanatory1,
                            family=sm.families.NegativeBinomial(alpha=disp),
                            offset=np.log(librarySizes))
            result0 = modNB0.fit()
            result1 = modNB1.fit()
        except sm.tools.sm_exceptions.PerfectSeparationError:
            errorCnt += 1
        else:
            if not opts.dispDiff:
                pval[i] = 1 - chi2.cdf(
                    result0.deviance - result1.deviance,
                    explanatory1.shape[1] - explanatory0.shape[1])
            elif opts.dispDiff:
                pval[i] = 1 - chi2.cdf(
                    result0.deviance - result1.deviance,
                    (explanatory1.shape[1] - explanatory0.shape[1]) / 2.5)
            else:
                pass

    data.pval = pval

    sys.stdout.write(
        'Warning: Failed to do test: %i genes. P value set to \'nan\'.\n' %
        errorCnt)

    return data
Esempio n. 5
0
def test_count(data, opts):
    """
    Make a test for all genes iteratively.

    @args data: Store all input data and results
    @type data: Class object
    @args opts: Input argument to the main TE function 
    @type opts: Instance
    """

    print 'Start the statistical test.'

    num = len(data.geneIDs)
    pval = np.empty((num, 1))
    pval.fill(np.nan)

    explanatory0 = cm.create_matrix(data, model='H0')
    explanatory1 = cm.create_matrix(data, model='H1')
    librarySizes = np.hstack([data.libSizesRibo, data.libSizesRna])

    lenSampleRibo = data.idxRibo.size
    lenSampleRna = data.idxRna.size

    for i in range(num):
        sys.stdout.flush()

        if i % 50 == 0:
            print '\r%i genes finished...' % i,
        if i + 1 == num:
            print '\r%i genes finished.' % num

        if opts.dispDiff and np.isnan(data.dispAdjRibo[i]):
            continue
        if not opts.dispDiff and np.isnan(data.dispAdj[i]):
            continue

        response = np.hstack([data.countRibo[i, :], data.countRna[i, :]])

        if opts.dispDiff:
            disp = np.hstack([
                np.repeat(data.dispAdjRibo[i], lenSampleRibo),
                np.repeat(data.dispAdjRna[i], lenSampleRna)
            ])
        else:
            disp = data.dispAdj[i]

        modNB0 = sm.GLM(response,
                        explanatory0,
                        family=sm.families.NegativeBinomial(alpha=disp),
                        offset=np.log(librarySizes))
        modNB1 = sm.GLM(response,
                        explanatory1,
                        family=sm.families.NegativeBinomial(alpha=disp),
                        offset=np.log(librarySizes))
        result0 = modNB0.fit()
        result1 = modNB1.fit()
        pval[i] = 1 - chi2.cdf(result0.deviance - result1.deviance,
                               explanatory1.shape[1] - explanatory0.shape[1])

    data.pval = pval

    return data