Example #1
0
def timex_test(events, groups):
    events_r = robjects.Matrix(events.T)
    return [
        timex_package.testCliqueAsGroup(robjects.IntVector(group),
                                        events_r).rx2("pvalueLRT")[0]
        for group in groups
    ]
Example #2
0
def pca_impute(data, method='bpca', scale='none', center=True, npcs=5):
    """
    Impute the missing data elements using PCA.

    Requires the 'pcaMethods' Bioconductor package.

    Args:
        * data
        * method: Which PCA algorithm to use. One of:
            * 'svd'
            * 'ppca'
            * 'bpca'
            * 'svdImpute'
            * 'nipals
            * 'robustPca'
        * scale:
        * center:
        * npcs: number of principal components.

    """
    kwargs = locals()
    r.importr('pcaMethods')
    data = remove_na_rows(data)
    r_data = robjects.Matrix(data)
    prepped = robjects.r['prep'](r_data, scale=scale, center=center)
    result = robjects.r['pca'](prepped, method=method, center=False, nPcs=npcs)
    imputed = numpy.array(robjects.r['completeObs'](result))
    return _same_type_(imputed, data)
Example #3
0
def heatmap(data, bicluster=None, local=False, palette=None, **kwargs):
    """
    Plots the dataset as a heatmap. Optionally rearrange for a bicluster, if supplied.

    Args:
        * data: a numpy.ndarray to plot.
        * bicluster: an optional bicluster
        * local: if True, only plot the bicluster's submatrix; otherwise plot the whole dataset,
            but with rows and columns shuffled so that the bicluster is in the top-left corner.
        * palette: The color palette to use. Must be an RPy2 object representing a color palette,
            eg: palette=rpy2.r.r("heat.colors(10)")
        * kwargs: any other arguments that the R function 'plot' accepts. Common ones include:
            file, width, height.

    """
    kwargs["local"] = local

    #hack to keep visualization from vertical mirroring itself
    if bicluster is None:
        data = data[::-1]

    if bicluster is not None:
        nrows, ncols = data.shape
        assert max(bicluster.rows) < nrows
        assert max(bicluster.cols) < ncols
        kwargs["bicResult"] = _get_r_biclust_([bicluster])
        kwargs["number"] = 1
    if palette is not None:
        kwargs["beamercolor"] = True
        kwargs["paleta"] = palette
    _rplot_("drawHeatmap", r.Matrix(data), **kwargs)
Example #4
0
def _call_helper_(function_name, data, **kwargs):
    params = kwargs
    params['X'] = data

    robjects.r.library('fabia')
    r_data = robjects.Matrix(data)
    func = robjects.r[function_name]
    factorization = func(**params)
    return _extract_biclusters_(factorization)
Example #5
0
def _rfunction_(functionname, data, **kwargs):
    """
    get an R object for the data
    """
    r_data = robjects.Matrix(data)

    #get the function
    robjects.r.library('biclust')
    func = robjects.r[functionname]

    result = numpy.array(func(r_data, **kwargs))
    return _same_type_(result, data)
def affinity(E, preference_fraction=0.5, simdist_function="pearson_correlation", damping=0.5, max_iter=200, **kwargs):
    similarities = simdist(E, simdist_function, **kwargs)

    similarities_max, similarities_min = similarities.values.max(), similarities.values.min()
    preference = (similarities_max - similarities_min) * preference_fraction

    ro.packages.importr("apcluster")

    rresults = ro.r["apcluster"](s=ro.Matrix(similarities.values), p=preference)
    labels = np.array(ro.r["labels"](rresults, "enum"))

    modules = convert_labels2modules(labels, E.columns)

    return modules
Example #7
0
def parallel_coordinates(bicluster,
                         plot='cols',
                         compare=True,
                         info=False,
                         ylab="Value",
                         color=1,
                         **kwargs):
    """
    Parallel coordinate plot of a bicluster.

    Args:
        * bicluster: Bicluster to plot.
        * plot: 'rows', 'cols', or 'both'
        * compare: If True, also plots the rest of the rows/columns in a lighter color.
        * info: If True: include an informative title.
        * ylab: y-axis label.
        * color: foreground color; integer.
        * kwargs: any other arguments that the R function 'plot' accepts. Common ones include:
            file, width, height.

    """
    kwargs.update(locals())

    valid_plots = ['cols', 'rows', 'both']
    if not plot in valid_plots:
        raise Exception("Error: 'plot' argument must be one of: {0}".format(
            " ".join(valid_plots)))

    for k in ('bicluster', 'plot', 'kwargs', 'color'):
        kwargs.pop(k)

    kwargs['col'] = color

    bicResult = _get_r_biclust_([bicluster])
    number = 1

    assert bicluster.data is not None
    data = r.Matrix(bicluster.data)

    if plot == 'rows':
        kwargs["plotcol"] = False
    elif plot == 'both':
        kwargs["plotBoth"] = True

    _rplot_("parallelCoordinates", data, bicResult, number, **kwargs)
def clues(E, disMethod="1-corr", n0=300, alpha=0.05, eps=1e-4, itmax=20, strengthMethod="sil", strengthIni=-1, **kwargs):
    ro.packages.importr("clues")

    rresults = ro.r["clues"](
        ro.Matrix(standardize(E).T.values),
        disMethod=disMethod,
        n0=n0,
        alpha=alpha,
        eps=eps,
        itmax=itmax,
        strengthMethod=strengthMethod,
        strengthIni=strengthIni,
        quiet=False
    )

    modules = convert_labels2modules(list(rresults.rx2("mem")), E.columns)

    return modules
Example #9
0
def bubbleplot(data,
               biclusters1,
               biclusters2=None,
               biclusters3=None,
               projection='mean',
               show_labels=False,
               **kwargs):
    """
    A bubbleplot comparison of multiple sets of biclusters which attempts to project them
    down to two dimensions.

    Args:
        * data: numpy.ndarray on which all biclusters are defined.
        * biclusters1: a list of biclusters.
        * biclusters2: a list of biclusters.
        * biclusters3: a list of biclusters.
        * projection: projection method; one of 'mean', 'isomds', 'cmdscale'.
        * show_labels: if True, label each bicluster in the plot.
        * kwargs: any other arguments that the R function 'plot' accepts. Common ones include:
            file, width, height.


    """
    valid_projections = ['mean', 'isomds', 'cmdscale']
    if not projection in valid_projections:
        raise Exception(
            "Error: 'projection' argument must be one of: {0}".format(
                " ".join(valid_projections)))

    kwargs['projection'] = projection
    kwargs['showLabels'] = show_labels

    bicResult1 = _get_r_biclust_(biclusters1)
    kwargs['bicResult1'] = bicResult1

    if biclusters2 is not None:
        bicResult2 = _get_r_biclust_(biclusters2)
        kwargs['bicResult2'] = bicResult2

    if biclusters3 is not None:
        bicResult3 = _get_r_biclust_(biclusters3)
        kwargs['bicResult3'] = bicResult3

    _rplot_("bubbleplot", r.Matrix(data), **kwargs)
Example #10
0
File: Stat.py Project: afcarl/STEN
def calculatingAovR(tableFactor, Data, Formula):
    """Computes and fits an Analysis of Variance Model"""
    numpy2ri.activate()
    for t in tableFactor:
        factorName = t[0]
        factorType = t[1]
        factorData = t[2]
        # sending Data to global variable in R (Factor definition for
        # Subject, Within or Between Type and FloatVector for Covariate
        if factorType == 'Covariate':
            tmp = robjects.FloatVector(factorData)
            robjects.globalenv[factorName] = tmp
        else:
            tmp = robjects.r.factor(factorData)
            robjects.globalenv[factorName] = tmp

    DataR = robjects.Matrix(Data.T)
    robjects.globalenv["DataR"] = DataR
    TextR = 'aov(%s)' % Formula
    express = robjects.r.parse(text=TextR)
    Fit = robjects.r.eval(express)
    robjects.globalenv["Fit"] = Fit
    raw = robjects.r.summary(Fit)
    df = []
    for r in raw:
        for d in r[0][0][:-1]:
            df.append([int(d), int(r[0][0][-1])])
    pValue = np.hstack([np.array([c[4][:-1] for c in r]) for r in raw])
    FValue = np.hstack([np.array([c[3][:-1] for c in r]) for r in raw])
    terms = []
    if len(raw) == 1:
        for r in raw[0]:
            for t in r.rownames[0:-1]:
                terms.append(t.replace(' ', ''))
    else:
        for i in raw:
            for r in i:
                for t in r.rownames[0:-1]:
                    terms.append(t.replace(' ', ''))

    return pValue, FValue, terms, df
Example #11
0
    def gwr_mth(self, pt, mth, nnghs=None, stns_rm=None):

        if nnghs == None:
            # Get the nnghs to use from the optimal values at surrounding stations
            nnghs = self._GwrTairAnom__get_nnghs(pt, mth, stns_rm)

        self.stn_slct.set_ngh_stns(pt[LAT],
                                   pt[LON],
                                   nnghs,
                                   load_obs=True,
                                   stns_rm=stns_rm,
                                   obs_mth=mth)

        ngh_obs = self.stn_slct.ngh_obs
        ngh_stns = self.stn_slct.ngh_stns
        ngh_wgt = self.stn_slct.ngh_wgt
        ngh_obs_cntr = ngh_obs - ngh_stns[get_norm_varname(mth)]

        a_pt = np.array(
            [pt[LON], pt[LAT], pt[ELEV], pt[TDI], pt[get_lst_varname(mth)]])

        rslt = r.gwr_anomaly(
            robjects.FloatVector(ngh_stns[LON]),
            robjects.FloatVector(ngh_stns[LAT]),
            robjects.FloatVector(ngh_stns[ELEV]),
            robjects.FloatVector(ngh_stns[TDI]),
            robjects.FloatVector(ngh_stns[get_lst_varname(mth)]),
            robjects.FloatVector(ngh_wgt), robjects.Matrix(ngh_obs_cntr),
            robjects.FloatVector(a_pt))

        fit_anom = np.array(rslt.rx('fit_anom'))
        nrow = np.array(rslt.rx('fit_nrow'))[0]
        ncol = np.array(rslt.rx('fit_ncol'))[0]
        fit_anom = np.reshape(fit_anom, (nrow, ncol), order='F')

        interp_anom = np.array(rslt.rx('pt_anom')).ravel()

        interp_vals = interp_anom + pt[get_norm_varname(mth)]

        return interp_vals
Example #12
0
def make_isa_data(nrows=300,
                  ncols=50,
                  nclusts=3,
                  nclustrows=None,
                  nclustcols=None,
                  noise=0,
                  bicluster_signals=None,
                  bicluster_noise=None,
                  noverlap_rows=0,
                  noverlap_cols=None,
                  shuffle=None):
    """
    Make ISA-style data.

    Generates a dataset using the Bioconductor 'isa2' package's
    make.isa.data function.

    If an argument is None, it is not included, and isa2's defaults are used.

    Requires that 'isa2' be installed.

    Args:
        * nrows: Number of rows in the data matrix.
        * cols: Number of columns in the data matrix.
        * nclusts: Number of biclusters.
        * nclustrows: Rows in each bicluster.
            Defaults to round(0.5 * num_rows/num_fact)
        * nclustcols: Cols in each bicluster. round(0.5 * num_cols/num_fact)
        * noise: Standard deviation of normal noise in background.
        * bicluster_signals: List of base signals for each bicluster.
            Defaults to 1's.
        * bicluster_noise: List of noise standard deviations for each bicluster.
            Defaults to 0's.
        * noverlap_rows: Number of bicluster rows that overlap.
        * noverlap_cols: Number of coluster columns that overlap.
            Defaults to 'overlap_row'.
        * shuffle: If True, shuffle rows and columns.

    """
    args = locals()

    isa_map = dict(
        nrows='num_rows',
        ncols='num_cols',
        nclusts='num_fact',
        nclustrows='mod_row_size',
        nclustcols='mod_col_size',
        noise='noise',
        bicluster_signals='mod_signal',
        bicluster_noise='mod_noise',
        noverlap_rows='overlap_row',
        noverlap_cols='overlap_col',
    )

    isa_args = dict()

    for key, argkey in isa_map.iteritems():
        isa_args[argkey] = args[key]

    #remove empty keys
    empty_keys = []
    for key in isa_args:
        if isa_args[key] is None:
            empty_keys.append(key)
    for key in empty_keys:
        isa_args.pop(key)

    for key in ['mod_signal', 'mod_noise']:
        if key in isa_args:
            isa_args[key] = robjects.FloatVector(list(isa_args[key]))

    robjects.r.library('isa2')

    #get data
    func = robjects.r['isa.in.silico']
    result = func(**isa_args)

    #convert to python
    data = numpy.array(robjects.Matrix(result[0])).copy()
    rows = numpy.array(robjects.Matrix(result[1])).copy()
    cols = numpy.array(robjects.Matrix(result[2])).copy()

    nbiclusters = rows.shape[1]

    row_list = []
    for i in range(nbiclusters):
        row = list(rows[:, i].nonzero()[0])
        row_list.append(row)

    col_list = []
    for i in range(nbiclusters):
        col = list(cols[:, i].nonzero()[0])
        col_list.append(col)

    expected = []
    for r, c, in zip(row_list, col_list):
        expected.append(Bicluster(r, c, data))

    if shuffle:
        data, expected = _shuffle_(data, expected)
    return data, expected
Example #13
0
        for j in range(len(remained)):
            combination = np.zeros(bioN)
            tmpselected = np.append(selected, remained[j])
            combination[tmpselected - 1] = 1
            comb = np.concatenate(([combination], comb))
    return comb


## implentment iteration
thresholdN = 10
iterationT = 2
while (iterationT < thresholdN):
    iterationT = iterationT + 1
    comb = prepareCombination(topres, bioN)
    rcomb = numpy2ri.py2ri(comb)
    rcomb = robjects.Matrix(rcomb)
    robjects.globalenv['rcomb'] = rcomb
    rscript_calC = '''
    rcomb <- data.frame(rcomb)
    starttimeC<-Sys.time()
    resC<-func_bycb(rcomb)
    endtimeC<-Sys.time()
    ctimeC<-endtimeC-starttimeC
    '''
    robjects.r(rscript_calC)
    #print(robjects.r['head']('rcomb'))
    npresC = np.array(robjects.r['resC'])
    npresC = np.reshape(npresC, newshape=(npresC.shape[0], npresC.shape[1]))
    npresC = np.transpose(npresC)
    npres = np.concatenate((npres, npresC), axis=0)
    tmpres = -1 * npres
Example #14
0
def isa(data,
        thr_row=None,
        thr_col=None,
        no_seeds=100,
        direction=['updown', 'updown']):
    """
    ISA biclustering algorithm.

    Args:
        * data: numpy.ndarray.
        * thr_row: threshold value for rows.
        * thr_col: threshold value for cols.
        * no_seeds: number of seeds to generate biclusters.
        * direction: either 'up' for upregulated,
            'down' for downregulated, 'updown' for both(default).

    Returns:
        A list of biclusters.

    """


    #load the isa library
    robjects.r.library('isa2')

    #get an R object for the data
    r_data = robjects.Matrix(data)

    def handle_threshold(x):
        if x is None:
            x = robjects.r['seq'](1, 3, by=0.5)
        else:
            if not isiterable(x):
                x = [x]
            x = robjects.FloatVector(list(x))
        return x

    thr_row = handle_threshold(thr_row)
    thr_col = handle_threshold(thr_col)

    direction = robjects.StrVector(direction)

    #run biclustering
    func = robjects.r('isa')
    result = func(r_data, thr_row, thr_col, no_seeds, direction)

    #get rowXnumber array
    row_matrix = numpy.array(robjects.Matrix(result[0]))

    #get numberXcolumn array
    col_matrix = numpy.array(robjects.Matrix(result[1]))

    num_biclusters = row_matrix.shape[1]
    assert num_biclusters == col_matrix.shape[1]

    #make list of biclusters
    biclusters = []
    for i in range(num_biclusters):
        row_vals = row_matrix[:, i]
        col_vals = col_matrix[:, i]

        rows = [index for index, elt in enumerate(row_vals) if elt]
        cols = [index for index, elt in enumerate(col_vals) if elt]

        biclusters.append(Bicluster(rows, cols, data=data))

    return biclusters