コード例 #1
0
def SCCA_r(X,Y, n_components, pen):


	df_X = pd.DataFrame(X)
	df_Y = pd.DataFrame(Y)

	rmat_X = com.convert_to_r_matrix(df_X)
	rmat_Y = com.convert_to_r_matrix(df_Y)

	ri.globalenv['X'] = rmat_X
	ri.globalenv['Y'] = rmat_Y

	com.r(
	    """
	    out <- CCA(x = X, z = Y, K = %i, niter = 100, standardize = FALSE,
	               penaltyx = %f, penaltyz = %f)
	    """ % (n_components, pen[0], pen[1]))

	# convert the results back to dataframes and then to numpy arrays
	df_u = com.convert_robj(com.r('out[1]'))['u']
	df_v = com.convert_robj(com.r('out[2]'))['v']
	cors = com.convert_robj(com.r('out[16]'))['cors']

	x_loadings = df_u.as_matrix()
	y_loadings = df_v.as_matrix()
	cors = np.array(cors)
	
	loadings = (x_loadings, y_loadings)

	return loadings, cors
コード例 #2
0
ファイル: test_common.py プロジェクト: israelzuniga/pandas
    def test_convert_r_matrix(self):

        is_na = robj.baseenv.get("is.na")

        seriesd = tm.getSeriesData()
        frame = pd.DataFrame(seriesd, columns=["D", "C", "B", "A"])
        # Null data
        frame["E"] = [np.nan for item in frame["A"]]

        r_dataframe = com.convert_to_r_matrix(frame)

        assert np.array_equal(com.convert_robj(r_dataframe.rownames), frame.index)
        assert np.array_equal(com.convert_robj(r_dataframe.colnames), frame.columns)
        assert all(is_na(item) for item in r_dataframe.rx(True, "E"))

        for column in frame[["A", "B", "C", "D"]]:
            coldata = r_dataframe.rx(True, column)
            original_data = frame[column]
            assert np.array_equal(com.convert_robj(coldata), original_data)

        # Pandas bug 1282
        frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)]

        try:
            wrong_matrix = com.convert_to_r_matrix(frame)
        except TypeError:
            pass
        except Exception:
            raise
コード例 #3
0
ファイル: test_common.py プロジェクト: ubdsgroup/wikienergy
    def test_convert_r_matrix(self):

        is_na = robj.baseenv.get("is.na")

        seriesd = tm.getSeriesData()
        frame = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])
        # Null data
        frame["E"] = [np.nan for item in frame["A"]]

        r_dataframe = com.convert_to_r_matrix(frame)

        assert np.array_equal(
            com.convert_robj(r_dataframe.rownames), frame.index)
        assert np.array_equal(
            com.convert_robj(r_dataframe.colnames), frame.columns)
        assert all(is_na(item) for item in r_dataframe.rx(True, "E"))

        for column in frame[["A", "B", "C", "D"]]:
            coldata = r_dataframe.rx(True, column)
            original_data = frame[column]
            assert np.array_equal(com.convert_robj(coldata),
                                  original_data)

        # Pandas bug 1282
        frame["F"] = ["text" if item %
                      2 == 0 else np.nan for item in range(30)]

        try:
            wrong_matrix = com.convert_to_r_matrix(frame)
        except TypeError:
            pass
        except Exception:
            raise
コード例 #4
0
ファイル: binom_test.py プロジェクト: brwnj/cu_projects
def main(args):
    df = pd.io.parsers.read_table(args.highlow, header=0)
    genes = df.columns
    mirs = df.columns
    testing = {}
    for mir in mirs:
        if not mir.startswith("hsa"): continue
        testing[mir] = {}
        # print mir
        for gene in genes:
            if gene.endswith("_notest") or gene.startswith("hsa"): continue
            success = 0
            fail = 0
            for key, row in df.iterrows():
                if row[mir] == 0:
                    # 0 success; 1 fail
                    if row[gene] == 0:
                        success += 1
                    else:
                        fail += 1
                # miR == 0
                else:
                    # 1 success; 0 fail
                    if row[gene] == 1:
                        success += 1
                    else:
                        fail += 1
            # create dataframe for testing
            temp_df = pd.DataFrame({'success': [success], 'fail': [fail]})
            r_matrix = com.convert_to_r_matrix(temp_df)
            p = stats.binom_test(r_matrix)[2][0]
            # print "%s: %f" % (gene, p)
            testing[mir][gene] = p
    testing_df = pd.DataFrame(testing)
    testing_df.to_csv(sys.stdout, sep="\t")
コード例 #5
0
ファイル: pca.py プロジェクト: strny007/miXGENE
def pca_test(exp, block, es):
    try:
        importr("miXGENE", lib_loc=R_LIB_CUSTOM_PATH)
        assert isinstance(es, ExpressionSet)
        dataset = R.r['new']('mixData')
        r_data = com.convert_to_r_matrix(es.get_assay_data_frame())
        dataset.do_slot_assign('data', r_data)

        dataset_factor = R.r.new('mixPheno')
        pheno_df = es.get_pheno_data_frame()
        r_phenotype = R.r.factor(R.StrVector(pheno_df['Sample_title'].tolist()))
        dataset_factor.do_slot_assign("phenotype", r_phenotype)

        pca = R.r['mixPca'](
            dataset=dataset,
            dataset_factor=dataset_factor,
            )

        r_points = pca.do_slot('points')
        np_points = rpyn.ri2numpy(r_points)
        df_points = pd.DataFrame(np_points)
        df_points.index = pheno_df.index
        res = PcaResult(
            base_dir=exp.get_data_folder(),
            base_filename= "%s_pca" % block.uuid
        )
        res.store_pca(df_points)

        block.pca_result = res
        block.do_action("success", exp)
    except Exception, e:
        block.errors.append(e)
        block.do_action("error", exp)
コード例 #6
0
ファイル: feature_selection.py プロジェクト: evilkost/miXGENE
def apply_ranking(
        exp, block,
        es, ranking_name,
        result_table,
        pheno_class_column=None, options=None
):
    if not options:
        options = {}
    if not pheno_class_column:
        pheno_class_column = es.pheno_metadata["user_class_title"]

    R.r['source'](R_LIB_CUSTOM_PATH + '/ranking.Methods.r')
    func = R.r[ranking_name]

    assay_df = es.get_assay_data_frame()
    x = com.convert_to_r_matrix(assay_df)
    y = es.get_pheno_column_as_r_obj(pheno_class_column)

    log.debug("Computing ranking: `%s` options: `%s`", ranking_name, options)
    with stopwatch(name="Computing ranking: `%s` options: `%s`" % (ranking_name, options),
                   threshold=0.01):
        ranking_list = list(func(R.r['t'](x), y, **options))

    ranking_fixed = map(lambda a: int(a - 1), ranking_list)
    df = pd.DataFrame(
        index=assay_df.index,
        data=[len(assay_df)] * len(assay_df), columns=["rank"]
    )
    for rank, row_num in enumerate(ranking_fixed):
        df.ix[row_num, "rank"] = rank

    result_table.store_table(df)
    return [result_table], {}
コード例 #7
0
ファイル: analysis.py プロジェクト: idrdex/star_api
def combat(df, annotation_col="sample_class"):
    import pandas.rpy.common as com

    names = df[["gse_name", "gpl_name"]].drop_duplicates().to_records(index=False)
    # drop genes with missing data
    df["code"] = df.gsm_name + "_" + df.gpl_name + "_" + df.gse_name
    df = df.set_index("code")

    combined_matrix = combine_matrix(names)
    # combined_matrix.to_csv("combined_matrix.csv")
    m = drop_missing_samples(combined_matrix).dropna()
    # drop_missing_genes = drop_missing_genes(dropMissingSamples(combined_matrix)).dropna() #UNNECESSARY
    samples_m = df.index.intersection(m.columns)
    m = m[samples_m]
    m.to_csv("m.csv")
    samples = df.ix[m.columns].reset_index()
    # samples.to_csv("samples.csv")
    edata = com.convert_to_r_matrix(m)
    batch = robjects.StrVector(samples.gse_name + "_" + samples.gpl_name)
    # pheno = robjects.FactorVector(samples.sample_class)
    pheno = robjects.FactorVector(samples[annotation_col])
    r.library("sva")
    fmla = robjects.Formula("~pheno")
    fmla.environment["pheno"] = pheno
    # fmla = robjects.Formula('~1')
    # fmla.environment['pheno'] = r['as.factor'](pheno)
    mod = r["model.matrix"](fmla)
    r_combat_edata = r.ComBat(dat=edata, batch=batch, mod=mod)
    combat_matrix = pd.DataFrame(np.asmatrix(r_combat_edata))
    combat_matrix.index = m.index
    combat_matrix.columns = m.columns
    return combat_matrix, samples
コード例 #8
0
ファイル: feature_selection.py プロジェクト: klema/miXGENE
def apply_ranking(exp,
                  block,
                  es,
                  ranking_name,
                  result_table,
                  pheno_class_column=None,
                  options=None):
    if not options:
        options = {}
    if not pheno_class_column:
        pheno_class_column = es.pheno_metadata["user_class_title"]

    R.r['source'](R_LIB_CUSTOM_PATH + '/ranking.Methods.r')
    func = R.r[ranking_name]

    assay_df = es.get_assay_data_frame()
    x = com.convert_to_r_matrix(assay_df)
    y = es.get_pheno_column_as_r_obj(pheno_class_column)

    log.debug("Computing ranking: `%s` options: `%s`", ranking_name, options)
    with stopwatch(name="Computing ranking: `%s` options: `%s`" %
                   (ranking_name, options),
                   threshold=0.01):
        ranking_list = list(func(R.r['t'](x), y, **options))

    ranking_fixed = map(lambda a: int(a - 1), ranking_list)
    df = pd.DataFrame(index=assay_df.index,
                      data=[len(assay_df)] * len(assay_df),
                      columns=["rank"])
    for rank, row_num in enumerate(ranking_fixed):
        df.ix[row_num, "rank"] = rank

    result_table.store_table(df)
    return [result_table], {}
コード例 #9
0
 def from_dataframe(cls, dataframe, **kwargs):
     '''Instantiate a Rollcall object from a pandas.DataFrame corresponding
     to the R matrix described in the pscl docs.
     See http://cran.r-project.org/web/packages/pscl/pscl.pdf
     '''
     r_matrix = rpy_common.convert_to_r_matrix(dataframe)
     return cls.from_matrix(r_matrix, **kwargs)
コード例 #10
0
ファイル: rollcall.py プロジェクト: FiveGen/pypscl
 def from_dataframe(cls, dataframe, **kwargs):
     '''Instantiate a Rollcall object from a pandas.DataFrame corresponding
     to the R matrix described in the pscl docs.
     See http://cran.r-project.org/web/packages/pscl/pscl.pdf
     '''
     r_matrix = rpy_common.convert_to_r_matrix(dataframe)
     return cls.from_matrix(r_matrix, **kwargs)
コード例 #11
0
ファイル: gt.py プロジェクト: klema/miXGENE
    def gt_basic(es, gene_sets, pheno_class_column,
                 model="logistic",
                 permutations=100):
        """
            @param es: Expression set with defined user class in pheno
            @type es: ExpressionSet

            @type gene_sets: environment.structures.GeneSets

            @param pheno_class_column: Column name of target classes in phenotype table
            @type pheno_class_column: string or None
        """
        GlobalTest.gt_init()

        dataset = com.convert_to_r_matrix(es.get_assay_data_frame())
        response = es.get_pheno_column_as_r_obj(pheno_class_column)

        genes_in_es = es.get_assay_data_frame().index.tolist()
        gs_filtered = filter_gs_by_genes(gene_sets.get_gs(), genes_in_es)

        gt_instance = GlobalTest.gt(
            response,
            R.r['t'](dataset),
            subsets=gs_filtered.to_r_obj(),
            model=model,
            permutations=permutations,
        )

        result = gt_instance.do_slot('result')
        result_df = com.convert_robj(result)
        return result_df
コード例 #12
0
ファイル: main.py プロジェクト: abhik1368/star_api
def impute_data(data):
    # data.to_csv("data.csv")
    r.library("impute")
    r_data = com.convert_to_r_matrix(data)
    r_imputedData = r['impute.knn'](r_data)
    npImputedData = np.asarray(r_imputedData[0])
    imputedData = pd.DataFrame(npImputedData)
    imputedData.index = data.index
    imputedData.columns = data.columns
    return imputedData
コード例 #13
0
def impute_data(data):
    # data.to_csv("data.csv")
    r.library("impute")
    r_data = com.convert_to_r_matrix(data)
    r_imputedData = r['impute.knn'](r_data)
    npImputedData = np.asarray(r_imputedData[0])
    imputedData = pd.DataFrame(npImputedData)
    imputedData.index = data.index
    imputedData.columns = data.columns
    return imputedData
コード例 #14
0
ファイル: generic_ranking.py プロジェクト: strny007/miXGENE
def apply_ranking(
        exp, block,
        es, ranking_name,
        result_table,
        pheno_class_column=None, options=None
):
    if not options:
        options = {}
    if not pheno_class_column:
        pheno_class_column = es.pheno_metadata["user_class_title"]

    R.r['source'](R_LIB_CUSTOM_PATH + '/ranking.Methods.r')
    func = R.r[ranking_name]
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg')
        import pydevd
        pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True)

    assay_df = es.get_assay_data_frame()
    cols = assay_df.columns

    # We must rename cols to be unique for R
    out_genes = {}
    out_cols = []
    for i, g in enumerate(cols):
        g = g.split('.')[0]
        if g in out_genes:
            new_g = g + '__' + str(i)
            out_genes[g].append(new_g)
            out_cols.append(new_g)
        else:
            out_genes[g] = [g]
            out_cols.append(g)
    assay_df.columns = out_cols
    assay_df = assay_df.T

    x = com.convert_to_r_matrix(assay_df)
    y = es.get_pheno_column_as_r_obj(pheno_class_column)
    exp.log(block.uuid, "Computing ranking: `%s` options: `%s`" % (ranking_name, options))
    log.debug("Computing ranking: `%s` options: `%s`", ranking_name, options)
    with stopwatch(name="Computing ranking: `%s` options: `%s`" % (ranking_name, options),
                   threshold=0.01):
        ranking_list = list(func(R.r['t'](x), y, **options))

    ranking_fixed = map(lambda a: int(a - 1), ranking_list)
    df = pd.DataFrame(
        index=assay_df.index,
        data=[len(assay_df)] * len(assay_df), columns=["rank"]
    )
    for rank, row_num in enumerate(ranking_fixed):
        df.ix[row_num, "rank"] = rank

    result_table.store_table(df)
    return [result_table], {}
コード例 #15
0
ファイル: main.py プロジェクト: dhimmel/starapi
def impute_data(data):
    import rpy2.robjects as robjects
    r = robjects.r
    import pandas.rpy.common as com
    r.library("impute")
    r_data = com.convert_to_r_matrix(data)
    r_imputedData = r['impute.knn'](r_data)
    npImputedData = np.asarray(r_imputedData[0])
    imputedData = pd.DataFrame(npImputedData)
    imputedData.index = data.index
    imputedData.columns = data.columns
    return imputedData
コード例 #16
0
 def calculate(self, method, data_frame, positive_samples, negative_samples):
     ## construct matrix_r
     r = robjects.r
     samples = []
     for sample in data_frame.axes[1]:
         if sample in positive_samples + negative_samples:
             samples.append(sample)
     features = data_frame.axes[0]
     matrix = data_frame[samples]
     matrix_r = common.convert_to_r_matrix(matrix)
     ## construct cls_r
     cls = {}
     for sample in samples:
         if sample in positive_samples:
             cls[sample] = 1
         elif sample in negative_samples:
             cls[sample] = 0
     cls_r = common.convert_to_r_matrix( pandas.DataFrame( [cls] ) )
     ## generate signature with method
     sam_out = self.siggenes.sam(matrix_r, r.c(cls_r))
     sam_att = r.cbind(
         r.c(r.attributes(sam_out).rx2('d')),
         r.c(r.attributes(sam_out).rx2('vec.false')),
         r.c(r.attributes(sam_out).rx2('q.value')),
         r.c(r.attributes(sam_out).rx2('p.value')),
         r.c(r.attributes(sam_out).rx2('s'))
     )
     ## return results as a data_frame
     ocols = ['Score', 'FalseCalls', 'Q-value', 'P-value', 'StdDev']
     output = {}
     for j, col in enumerate(ocols):
         row = {}
         for i, n in enumerate(features):
             # print n, col, sam_att.rx(i + 1, j + 1)[0]
             row[n] = sam_att.rx(i + 1, j + 1)[0]
         # print row
         output[col] = row
     return(pandas.DataFrame(output))
コード例 #17
0
ファイル: default.py プロジェクト: synesthesiam/eyecode
def mcnemar_test(results_matrix):
    """Mcnemar chi-squared test from R.

    Returns
    -------
        c, p : tuple
            chi-sq stat and p-value from Mcnemar test.
    """
    import pandas.rpy.common as rcom
    mcnemar_test_fun = rcom.r["mcnemar.test"]

    results_matrix = rcom.convert_to_r_matrix(results_matrix)
    test_result = mcnemar_test_fun(results_matrix)
    return test_result[0][0], test_result[2][0]
コード例 #18
0
def mcnemar_test(results_matrix):
    """Mcnemar chi-squared test from R.

    Returns
    -------
        c, p : tuple
            chi-sq stat and p-value from Mcnemar test.
    """
    import pandas.rpy.common as rcom
    mcnemar_test_fun = rcom.r["mcnemar.test"]

    results_matrix = rcom.convert_to_r_matrix(results_matrix)
    test_result = mcnemar_test_fun(results_matrix)
    return test_result[0][0], test_result[2][0]
コード例 #19
0
def chisq_test(results_matrix):
    """Chi-square test from R.

    Returns
    -------
        c, p : tuple
            chi-sq stat and p-value from test.
    """
    import pandas.rpy.common as rcom
    test_fun = rcom.r["chisq.test"]

    results_matrix = rcom.convert_to_r_matrix(results_matrix)
    test_result = test_fun(results_matrix)
    return test_result[0][0], test_result[2][0]
コード例 #20
0
ファイル: default.py プロジェクト: synesthesiam/eyecode
def chisq_test(results_matrix):
    """Chi-square test from R.

    Returns
    -------
        c, p : tuple
            chi-sq stat and p-value from test.
    """
    import pandas.rpy.common as rcom
    test_fun = rcom.r["chisq.test"]

    results_matrix = rcom.convert_to_r_matrix(results_matrix)
    test_result = test_fun(results_matrix)
    return test_result[0][0], test_result[2][0]
コード例 #21
0
def show_allocated_portfolio(form):
    selected_tickers = form['ticker_selection'].split(',')
    if selected_tickers[-1] == ' ':
        selected_tickers = selected_tickers[:-1]
    data = pd.read_csv('stock_data.csv').set_index('Date').sort_index()[1:]
    ro.globalenv['data'] = com.convert_to_r_matrix(data[selected_tickers])
    ro.r('source("calculations.R")')
    raw = ro.r('function_make_everything_work(data,' + str(form['horizon']) + ')')
    weights = np.around(np.array(raw),2)
    result = zip(selected_tickers,weights,weights > 0)
    rfalloc = 1 - weights.sum()
    result.append(('Risk-Free',rfalloc,rfalloc>0))
    return render_template('allocated.html',
                           horizon=form['horizon'],  
                           result=result)
コード例 #22
0
ファイル: RpyExp.py プロジェクト: JudoWill/ResearchNotebooks
def quantile_norm_with_R(input_df):
    
    R_norm_func = robjects.r("""quantnorm <- function(inputmatrix)
{
y<-normalize.quantiles(inputmatrix)
return(y)
}""" )
    
    R_matrix = com.convert_to_r_matrix(input_df)
    print input_df
    normed_matrix = R_norm_func(R_matrix)
    normed_df = com.convert_robj(normed_matrix)
    print normed_df
    normed_df.index = input_df.index
    normed_df.columns = input_df.columns
    
    return normed_df
コード例 #23
0
    def gt_basic(es, gene_sets, pheno_class_column,
                 model="logistic",
                 permutations=100):
        """
            @param es: Expression set with defined user class in pheno
            @type es: ExpressionSet

            @type gene_sets: environment.structures.GeneSets

            @param pheno_class_column: Column name of target classes in phenotype table
            @type pheno_class_column: string or None
        """
        if settings.CELERY_DEBUG:
            import sys
            sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg')
            import pydevd
            pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True)
        src_gs = gene_sets.get_gs()
        # GlobalTest.gt_init()
        df = es.get_assay_data_frame()

        df, gs_filtered = preprocess_df_gs(df, src_gs)

        dataset = com.convert_to_r_matrix(df.T)
        response = es.get_pheno_column_as_r_obj(pheno_class_column)

        ds_r = R.r['t'](dataset)
        gs_r = gs_filtered.to_r_obj()
        try:
            R.r['library']("globaltest")
            gt = R.r['gt']
            gt_instance = gt(
                response,
                ds_r,
                subsets=gs_r,
              #  model=model,
              #  permutations=permutations
            )
        except:
            import sys
            log.error("Unexpected error: %s" % sys.exc_info()[0])
            raise
        result = gt_instance.do_slot('result')
        result_df = com.convert_robj(result)
        return result_df
コード例 #24
0
 def convert_to_r_series(self ,resampled_df, start_date ,data_freq):
     
     # convert to R dataframe
     r_dataframe = com.convert_to_r_matrix(resampled_df)
     
     if data_freq == 12:
         start_val = self.get_start_for_r_monthly(start_date)
     elif data_freq == 52:
         start_val = self.get_start_for_r_series_weekly(start_date)
     else:
         raise NotImplementedError( "Implemented only for other frequencies 12 and 52" )
     
     #convert to R time Series
     ts = robjects.r['ts']
     r_series = ts(r_dataframe, 
                   start=start_val,
                   frequency=data_freq
                   )
     return r_series
コード例 #25
0
ファイル: remote.py プロジェクト: narolez571/intuition
    def fetch_equities_daily(self, symbols, ohlc=False, r_type=False, returns=False, **kwargs):
        if len(symbols) == 0:
            return pd.DataFrame()
        if isinstance(symbols, str):
            symbols = symbols.split(",")

        if ohlc:
            data = load_bars_from_yahoo(stocks=symbols, **kwargs)
            # data.items = symbols
        else:
            data = load_from_yahoo(stocks=symbols, **kwargs)
            # data.columns = symbols

            # NOTE Would it work with a pandas panel ?
            if returns:
                data = ((data - data.shift(1)) / data).fillna(method="bfill")
            if r_type:
                data = convert_to_r_matrix(data)

        return data
コード例 #26
0
ファイル: Rtools.py プロジェクト: JudoWill/PySeqUtils
def quantile_norm_with_R(input_df):
    """Uses R normalize.quantiles to normalize a DataFrame.
    """

    robjects.r('require(preprocessCore)')
    R_norm_func = robjects.r("""quantnorm <- function(inputmatrix)
{
y<-normalize.quantiles(inputmatrix)
return(y)
}""")

    R_matrix = com.convert_to_r_matrix(input_df)

    normed_matrix = R_norm_func(R_matrix)
    normed_df = com.convert_robj(normed_matrix)

    normed_df.index = input_df.index
    normed_df.columns = input_df.columns

    return normed_df
コード例 #27
0
 def calculate(self, method, data_frame, positive_samples, negative_samples):
     ## construct matrix_r
     r = robjects.r
     samples = []
     for sample in data_frame.axes[1]:
         if sample in positive_samples + negative_samples:
             samples.append(sample)
     samples.sort()
     features = list(data_frame.axes[0])
     features.sort()
     matrix = data_frame[samples].loc[features]
     matrix_r = common.convert_to_r_dataframe(matrix)
     ## construct cls_r
     cls = {}
     for sample in samples:
         if sample in positive_samples:
             cls[sample] = 1
         elif sample in negative_samples:
             cls[sample] = 0
     cls_r = common.convert_to_r_matrix( pandas.DataFrame( [cls] ) )
     ## generate signature with method
     sam_out = self.siggenes.sam(matrix_r, r.c(cls_r))
     sam_att = r.cbind(
         r.c(r.attributes(sam_out).rx2("d")),
         r.c(r.attributes(sam_out).rx2("vec.false")),
         r.c(r.attributes(sam_out).rx2("q.value")),
         r.c(r.attributes(sam_out).rx2("p.value")),
         r.c(r.attributes(sam_out).rx2("s"))
     )
     ## return results as a data_frame
     ocols = ["Score", "FalseCalls", "Q-value", "P-value", "StdDev"]
     output = {}
     for j, col in enumerate(ocols):
         row = {}
         for i, n in enumerate(features):
             # print n, col, sam_att.rx(i + 1, j + 1)[0]
             row[n] = sam_att.rx(i + 1, j + 1)[0]
         # print row
         output[col] = row
     return(pandas.DataFrame(output))
コード例 #28
0
    def fetch_equities_daily(self, equities, ohlc=False,
                             r_type=False, returns=False, **kwargs):
        if len(equities) == 0:
            return pd.DataFrame()
        if isinstance(equities, str):
            equities = equities.split(',')
        symbols = [self.datafeed.guess_name(equity) for equity in equities]

        if ohlc:
            data = load_bars_from_yahoo(stocks=symbols, **kwargs)
            data.items = equities
        else:
            data = load_from_yahoo(stocks=symbols, **kwargs)
            data.columns = equities

            #NOTE Would it work with a pandas panel ?
            if returns:
                data = ((data - data.shift(1)) / data).fillna(method='bfill')
            if r_type:
                data = convert_to_r_matrix(data)

        return data
コード例 #29
0
behavioral_data = all_behavioral_data[:, 1:]

rest_data = np.load(expanduser(rscorrfn))
X = rest_data[subjet_subset]
Y = behavioral_data
#demean
S = Y.sum(axis=0) / Y.shape[0]
Y -= S[np.newaxis, :]
var = (Y**2).sum(axis=0)
var[var == 0] = 1
Y /= var
X[np.isnan(X)] = 1

df_X = pd.DataFrame(X)
df_Y = pd.DataFrame(Y)
rmat_X = com.convert_to_r_matrix(df_X)
rmat_Y = com.convert_to_r_matrix(df_Y)

ri.globalenv['X'] = rmat_X
ri.globalenv['Y'] = rmat_Y

# explained variable
from sklearn.linear_model import LinearRegression
limit_exp_var = len(keys)  #save for later
exp_var_X = []
exp_var_Y = []
for i in range(1, limit_exp_var + 1):
    n_com = i
    com.r("""
	    out <- CCA(x = X, z = Y, K = %i, niter = 100, standardize = FALSE,
	               penaltyx = %f, penaltyz = %f)
コード例 #30
0
def peakFound(cubematrix,t2m,hdr,loc_IS,delta_drug,delta_tiss,hws,hws_t,loc_tiss):

    devst=mean(t2m[1:t2m.size-1]-t2m[0:t2m.size-2]) 
    matrix_data=np.zeros([np.size(t2m),2]) 
    peakIS_matrix=np.zeros([hdr[0]*hdr[1],1])
    peakdrug_matrix=np.zeros([hdr[0]*hdr[1],1])
    peaktissue_matrix=np.zeros([hdr[0]*hdr[1],1])
    range_drugmatrix=np.zeros([hdr[0]*hdr[1],2])
    range_tissmatrix=np.zeros([hdr[0]*hdr[1],2])
    range_ISmatrix=np.zeros([hdr[0]*hdr[1],2])
    matrix_data[:,0]=t2m 
    
    for c in range(hdr[1] * hdr[0]): 
         matrix_data[:,1]=cubematrix[:,c]  # spettrum of single pixel "c"
         matrix_data_DF = pd.DataFrame(matrix_data,dtype='float')
         data = com.convert_to_r_matrix(matrix_data_DF)
         ro.globalenv['matrix_data_DF']=data 
         ro.globalenv['hws']=hws
         ro.globalenv['loc_IS']=loc_IS
         ro.globalenv['hws_t']=hws_t
         ro.globalenv['loc_tiss']=loc_tiss
         ro.globalenv['delta_drug']=delta_drug
         ro.globalenv['delta_tiss']=delta_tiss
         ro.globalenv['devst']=devst
         ro.r('spectra<-createMassSpectrum(mass=matrix_data_DF[,c(1)], intensity=matrix_data_DF[,c(2)])')
         ro.r('peaks<-detectPeaks(spectra, SNR=2,halfWindowSize=hws)') 
         ## identification of peak of internal standard
         ro.r('all_peaks_IS<-peaks@mass[(peaks@mass>=loc_IS[1])&(peaks@mass<=loc_IS[2])]')# all the peaks in this m/z range 
         ro.r('posISs<-match(all_peaks_IS,spectra@mass)')
         ro.r('NpeaksIS<-length(all_peaks_IS)')
         NpeaksIS=ro.r('NpeaksIS')[0]
         if NpeaksIS==0:
             continue
         if NpeaksIS>1:
             ro.r('info_peaksIS<-matrix(data = 0,nrow = NpeaksIS ,ncol =3 )')
             ro.r('info_peaksIS[,c(1)]<-posISs')
             ro.r('info_peaksIS[,c(2)]<-all_peaks_IS')
             ro.r('info_peaksIS[,c(3)]<-spectra@intensity[posISs]')
             ro.r('pos_peakIS_moda<-info_peaksIS[match(max(info_peaksIS[,c(3)]),info_peaksIS[,c(3)]),c(1)]')
         else:
             ro.r('pos_peakIS_moda<-posISs')
         ro.r('pos_peakIS_moda')[0]    
         ro.r('matrix_lav<-matrix(data = 0,nrow = hws*2+1 ,ncol =4 )')
         ro.r('matrix_lav[,c(1)]<-spectra@mass[(pos_peakIS_moda-hws):(pos_peakIS_moda+hws)]')
         ro.r('matrix_lav[,c(2)]<-spectra@intensity[(pos_peakIS_moda-hws):(pos_peakIS_moda+hws)]')
         ro.r('matrix_lav[,c(3)]<-(pos_peakIS_moda-hws):(pos_peakIS_moda+hws)')
         ro.r('peakIS_mean<-sum(matrix_lav[,c(1)]*matrix_lav[,c(2)])/sum(matrix_lav[,c(2)])')
         ro.r('matrix_lav[,c(4)]<-abs(matrix_lav[,c(1)]-peakIS_mean)')
         pos_peakIS=ro.r('pos_peakIS<-matrix_lav[match(min(matrix_lav[,c(4)]),matrix_lav[,c(4)]),c(3)]')[0]
         peaks_IS=ro.r('peakIS_mean')
         if np.isnan(peaks_IS):
             continue
         if NpeaksIS>0:
             ro.r('range_IS<-c(spectra@mass[pos_peakIS-hws], spectra@mass[pos_peakIS+hws])')
             rim=ro.r('range_IS')
             range_ISmatrix[c,0]=rim[0]
             range_ISmatrix[c,1]=rim[1]
             peakIS_matrix[c]=peaks_IS[0]
             ro.r('all_peaks_drug<-spectra@mass[(spectra@mass>=peakIS_mean-delta_drug-devst)&(spectra@mass<=peakIS_mean-delta_drug+devst)]')
             ro.r('Npeaks_drug<-length(all_peaks_drug)')
             all_peaks_drug=ro.r('all_peaks_drug')
             Npeaks_drug=ro.r('Npeaks_drug')[0]
             if Npeaks_drug>1:
                 peaks_drug=0
                 p=0
                 min_diff_peaks_drug=ro.r('min(abs(peakIS_mean-delta_drug-all_peaks_drug))')[0]
                 while peaks_drug<=0:
                     if abs(peaks_IS[0]-delta_drug-all_peaks_drug[p])==min_diff_peaks_drug:
                            peaks_drug=all_peaks_drug[p]  
                     p=p+1
             else:
                 peaks_drug=all_peaks_drug
             peakdrug_matrix[c]= peaks_drug   
             ro.globalenv['peaks_drug']=peaks_drug
             ro.r('pos_peakdrug<-match(peaks_drug,spectra@mass)')
             ro.r('range_drug<-c(spectra@mass[pos_peakdrug-hws], spectra@mass[pos_peakdrug+hws])')
             rdm=ro.r('range_drug')
             range_drugmatrix[c,0]=rdm[0]
             range_drugmatrix[c,1]=rdm[1]
             # identification of peak of tissue
             ro.r('all_peaks_tiss<-peaks@mass[(peaks@mass>=loc_tiss[1])&(peaks@mass<=loc_tiss[2])]')# tutti i picchi nell'intervallo cercato
             ro.r('pos_tiss<-match(all_peaks_tiss,spectra@mass)')
             ro.r('Npeaks_tiss<-length(all_peaks_tiss)')
             NpeaksTiss=ro.r('Npeaks_tiss')[0]
             if NpeaksTiss==0:
                 continue
             if NpeaksTiss>1:
                 ro.r('info_peaksTiss<-matrix(data = 0,nrow = Npeaks_tiss ,ncol =3 )')
                 ro.r('info_peaksTiss[,c(1)]<-pos_tiss')
                 ro.r('info_peaksTiss[,c(2)]<-all_peaks_tiss')
                 ro.r('info_peaksTiss[,c(3)]<-spectra@intensity[pos_tiss]')
                 ro.r('peaks_tiss<-info_peaksTiss[match(max(info_peaksTiss[,c(3)]),info_peaksTiss[,c(3)]),c(2)]')
                 ro.r('pos_peakTiss_moda<-info_peaksTiss[match(max(info_peaksTiss[,c(3)]),info_peaksTiss[,c(3)]),c(1)]')
             else:
                 ro.r('pos_peakTiss_moda<-pos_tiss')
             ro.r('pos_peakTiss_moda')[0]    
             ro.r('matrix_lav<-matrix(data = 0,nrow = hws_t*2+1 ,ncol =4 )')
             ro.r('matrix_lav[,c(1)]<-spectra@mass[(pos_peakTiss_moda-hws_t):(pos_peakTiss_moda+hws_t)]')
             ro.r('matrix_lav[,c(2)]<-spectra@intensity[(pos_peakTiss_moda-hws_t):(pos_peakTiss_moda+hws_t)]')
             ro.r('matrix_lav[,c(3)]<-(pos_peakTiss_moda-hws_t):(pos_peakTiss_moda+hws_t)')
             ro.r('peaktiss_mean<-sum(matrix_lav[,c(1)]*matrix_lav[,c(2)])/sum(matrix_lav[,c(2)])')
             ro.r('matrix_lav[,c(4)]<-abs(matrix_lav[,c(1)]-peaktiss_mean)')    
             pos_peakTiss=ro.r('pos_peaktiss<-matrix_lav[match(min(matrix_lav[,c(4)]),matrix_lav[,c(4)]),c(3)]')[0]
             peaks_tiss=ro.r('peaktiss_mean')
             if np.isnan(peaks_tiss):
                 continue
             if NpeaksTiss>0:
                 ro.r('range_tiss<-c(spectra@mass[pos_peaktiss-hws_t], spectra@mass[pos_peaktiss+hws_t])')
                 rtm=ro.r('range_tiss')
                 range_tissmatrix[c,0]=rtm[0]
                 range_tissmatrix[c,1]=rtm[1]  
                 peaktissue_matrix[c]=peaks_tiss[0]
    return (peakIS_matrix,peakdrug_matrix,peaktissue_matrix,range_ISmatrix, range_drugmatrix, range_tissmatrix)
コード例 #31
0
ファイル: RpyExp.py プロジェクト: JudoWill/ResearchNotebooks
# <codecell>

robjects.r('require(preprocessCore)')

# <codecell>

robjects.r("""quantnorm <- function(inputmatrix)
{
y<-normalize.quantiles(inputmatrix)
return(y)
}""" )

# <codecell>

small_dataframe = com.convert_to_r_matrix(cyto_data[['VEGF', 'HGF', 'Rantes']].T)

output = robjects.r['quantnorm'](small_dataframe)

# <codecell>

normed = com.convert_robj(output).T

# <codecell>

def quantile_norm_with_R(input_df):
    
    R_norm_func = robjects.r("""quantnorm <- function(inputmatrix)
{
y<-normalize.quantiles(inputmatrix)
return(y)
コード例 #32
0
behavioral_data = all_behavioral_data[:, 1:]

rest_data = np.load(expanduser(rscorrfn))
X = rest_data[subjet_subset]
Y = behavioral_data
#demean
S = Y.sum(axis=0) / Y.shape[0]
Y -= S[np.newaxis, :]
var = (Y ** 2).sum(axis=0)
var[var == 0] = 1
Y /= var
X[np.isnan(X)] = 1

df_X = pd.DataFrame(X)
df_Y = pd.DataFrame(Y)
rmat_X = com.convert_to_r_matrix(df_X)
rmat_Y = com.convert_to_r_matrix(df_Y)

ri.globalenv['X'] = rmat_X
ri.globalenv['Y'] = rmat_Y

# explained variable
from sklearn.linear_model import LinearRegression
limit_exp_var = len(keys) #save for later
exp_var_X = []
exp_var_Y = []
for i in range(1, limit_exp_var+1):
	n_com = i
	com.r(
	    """
	    out <- CCA(x = X, z = Y, K = %i, niter = 100, standardize = FALSE,