Example #1
0
def get_data(numgroups):
    with localconverter(ro.default_converter + pandas2ri.converter):
        if numgroups == 2:
            r.source('~/Documents/rscripts/splatter-2.R')
        elif numgroups == 6:
            r.source('~/Documents/rscripts/splatter-6.R')
        counts = r2py(r['counts'])  # cell-by-gene dataframe
        cellinfo = r2py(r['cellinfo'])  # Cell, Batch, Group
        geneinfo = r2py(r['geneinfo'])  # Gene

        sim = sc.AnnData(counts.values, obs=cellinfo, var=geneinfo)
        sim.obs_names = cellinfo.Cell
        sim.var_names = geneinfo.Gene
        if numgroups == 2:
            sc.pp.filter_genes(
                sim, min_counts=1
            )  # omitted in 6 case so we can generalize to diff dropout %s

        truecounts = r2py(r['truecounts'])
        dropout = r2py(r['dropout'])
        print("percent dropout: {}".format(
            np.sum(dropout.values) / (sim.n_obs * sim.n_vars)))

        sim_true = sc.AnnData(truecounts.values, obs=cellinfo, var=geneinfo)
        sim_true.obs_names = cellinfo.Cell
        sim_true.var_names = geneinfo.Gene
        sim_true = sim_true[:, sim.var_names]

        return [sim, sim_true]
def stage2_calculate():

    print 'Execute jpsurvRest/stage2_calculate'
    print 'Yes, yes, yes...'
    print

    print(OKGREEN + UNDERLINE + BOLD +
          "****** Stage 2: CALCULATE BUTTON ***** " + ENDC)

    jpsurvDataString = request.args.get('jpsurvData', False)
    jpsurvDataString = fix_jpsurv(jpsurvDataString)

    print(BOLD + "**** jpsurvDataString ****" + ENDC)
    print(jpsurvDataString)
    print(OKBLUE + "The jpsurv STRING::::::" + ENDC)
    print(jpsurvDataString)
    jpsurvData = json.loads(jpsurvDataString)
    print(BOLD + "**** jpsurvData ****" + ENDC)
    for key, value in jpsurvData.iteritems():
        print("var: %s = %s" % (key, value))
        print("var: %s = %s" % (key, value))

    #Init the R Source
    r.source('./JPSurvWrapper.R')

    print(BOLD + "**** Calling getFittedResultsWrapper ****" + ENDC)
    r.getFittedResultWrapper(UPLOAD_DIR, jpsurvDataString)

    status = '{"status":"OK"}'
    mimetype = 'application/json'
    out_json = json.dumps(status)
    return current_app.response_class(out_json, mimetype=mimetype)
Example #3
0
def calc_norm_factors(counts_df, method):
    assert method in ['TMM', 'TMMwsp', 'RLE', 'upperquartile']
    from rpy2.robjects import numpy2ri, pandas2ri, r
    numpy2ri.activate()
    pandas2ri.activate()
    r.source(os.path.join('workflow','scripts','utils_calcNormFactors.R'))
    return r.calcNormFactors(counts_df, method=method)
Example #4
0
def compute_degs(dataframe, method, samples, controls, constant_threshold=10, filter_low_expressed=False, min_counts=10):

    # Filter lowly expressed genes
    if filter_low_expressed:
        dataframe = dataframe.loc[[index for index, value in dataframe.sum(axis=1).iteritems() if value > min_counts]]
        print(dataframe.shape)
    # Connect to R
    r.source('scripts/code_library.R')
    pandas2ri.activate()

    # Create design dict
    sample_dict = {'samples': samples, 'controls': controls}

    # Create design dataframe
    design_dataframe = pd.DataFrame({group_label: {sample:int(sample in group_samples) for sample in dataframe.columns} for group_label, group_samples in sample_dict.items()})

    # Convert to R
    dataframe_r = pandas2ri.py2ri(dataframe)
    design_dataframe_r = pandas2ri.py2ri(design_dataframe)

    # Run
    if method == 'CD':
        signature_dataframe_r = r.apply_characteristic_direction(dataframe_r, design_dataframe_r, constant_threshold)
    elif method == 'limma':
        signature_dataframe_r = r.apply_limma(dataframe_r, design_dataframe_r)
    else:
        raise ValueError('Wrong method supplied.  Must be limma or CD.')
 
    # Convert to pandas and sort
    signature_dataframe = pandas2ri.ri2py(signature_dataframe_r)

    # Return
    return signature_dataframe
def brt(fname, species_name, gbm_opts, weights):
    """
    Takes the name of a CSV file containing a data frame and a dict
    of options for gbm.step, runs gbm.step, and returns the results.
    """
    from rpy2.robjects import r
    import anopheles_brt
    r.source(os.path.join(anopheles_brt.__path__[0],'brt.functions.R'))
    
    heads = file(os.path.join('anopheles-caches',fname)).readline().split(',')
    weight_str = str(weights.tolist()).replace('[','c(').replace(']',')')
    base_argstr = 'data=read.csv("anopheles-caches/%s"), gbm.x=2:%i, gbm.y=1, family="bernoulli", site.weights=%s, silent=TRUE'%(fname, len(heads), weight_str)
    opt_argstr = ', '.join([base_argstr] + map(lambda t: '%s=%s'%t, gbm_opts.iteritems()))

    varname = sanitize_species_name(species_name)

    brt_fname = hashlib.sha1(opt_argstr).hexdigest()+'.r'
    if brt_fname in os.listdir('anopheles-caches'):
        r('load')(os.path.join('anopheles-caches', brt_fname))
        return r(varname)
    else:
        r('%s<-gbm.step(%s)'%(varname,opt_argstr))
        if str(r(varname))=='NULL':
            raise ValueError, 'gbm.step returned NULL'
        r('save(%s, file="%s")'%(varname,os.path.join('anopheles-caches', brt_fname)))
        return r(varname)
Example #6
0
def compute_signature(rawcount_dataframe, method, experimental_samples, control_samples):

    # Connect to R
    r.source('scripts/signature.R')
    pandas2ri.activate()

    # Create design dict
    sample_dict = {'experimental': experimental_samples, 'control': control_samples}

    # Create design dataframe
    design_dataframe = pd.DataFrame({group_label: {sample:int(sample in group_samples) for sample in rawcount_dataframe.columns} for group_label, group_samples in sample_dict.iteritems()})

    # Convert to R
    rawcount_dataframe_r = pandas2ri.py2ri(rawcount_dataframe)
    design_dataframe_r = pandas2ri.py2ri(design_dataframe)

    # Run
    if method == 'CD':
        signature_dataframe_r = r.run_characteristic_direction(rawcount_dataframe_r, design_dataframe_r)
    elif method == 'limma':
        signature_dataframe_r = r.run_limma(rawcount_dataframe_r, design_dataframe_r)
    else:
    	raise ValueError('Wrong method supplied.  Must be limma or CD.')

    # Convert to pandas and sort
    signature_dataframe = pandas2ri.ri2py(signature_dataframe_r)

    # Add
    return signature_dataframe
Example #7
0
def load_active_driver(local_ad=True):
    if local_ad:
        r.source("ActiveDriver/R/ActiveDriver.R")
        # ActiveDriver is in the global namespace now
        return r
    else:
        return importr("ActiveDriver")
Example #8
0
def ComBat(X, batch, covariate=None, parametric=False, empirical_bayes=True, save_dir=None):
    # Check X
    if not isinstance(X, (pd.DataFrame, pd.Series)):
        if isinstance(X, (list, tuple, np.ndarray, Mapping)):
            df = pd.DataFrame(X)
        else:
            raise TypeError('X must be an array-like object, dictionary or pandas Dataframe/Series')
    else:
        df = X
    row_names = df.index
    r_df = pandas2ri.py2ri(df)
    # Check covariate
    if covariate is None:
        covariate = np.ones((len(batch), 1))
    else:
        if not isinstance(covariate, (list, tuple, np.ndarray)):
            if isinstance(covariate, pd.DataFrame) or isinstance(covariate, pd.Series):
                covariate = covariate.to_numpy()
            else:
                raise TypeError('covariate array must be an array like or pandas Dataframe/Series')
        else:
            covariate = np.array(covariate)
    if len(covariate.shape) == 1:
        covariate = covariate.reshape(-1, 1)
    elif len(covariate.shape) > 2:
        raise ValueError('covariate array must be 1D or 2D')
    nr, nc = covariate.shape
    r_covariate = r.matrix(covariate, nrow=nr, ncol=nc)
    # Check batch
    if not isinstance(batch, (list, tuple, np.ndarray)):
        if isinstance(batch, pd.DataFrame) or isinstance(batch, pd.Series):
            batch = batch.to_numpy()
        else:
            raise TypeError('batch array must be an array like or pandas Dataframe/Series')
    else:
        batch = np.array(batch)
    if len(batch.shape) != 1:
        if len(batch.shape) == 2 and batch.shape[1] == 1:
            batch.reshape(-1)
        else:
            raise ValueError('batch array must be 1D or 2D with second dimension equal to 1')
    if len(np.unique(batch)) <= 1:
        raise ValueError('batch array must have at least 2 classes')
    r_batch = Vector(batch)
    # cwd = os.path.dirname(sys.argv[0])
    cwd = os.path.dirname(os.path.abspath(__file__))
    r.setwd(cwd)
    # r.source('./Statistical_analysis/R_scripts/ComBat.R')
    r.source('./R_scripts/ComBat.R')
    r_dr_results = r.ComBat_harmonization(r_df, r_covariate, r_batch, parametric, empirical_bayes)
    R_object_dict = {}
    keys = r_dr_results.names
    for i in range(len(keys)):
        R_object_dict[keys[i]] = np.array(r_dr_results[i])
    results = pd.DataFrame(R_object_dict)
    results.index = row_names
    if save_dir is not None:
        results.to_excel(os.path.join(save_dir, 'Features_ComBat.xlsx'))
    return results
Example #9
0
def plotIDR(output_file, input_prefixes):
    '''create IDR plots.

    This code is taken from the R script

    batch-consistency-plot.r

    within the IDR package.
    '''

    dirname = os.path.dirname(__file__)
    R.source(os.path.join(dirname, "WrapperIDR.r"))

    R('''df.txt = 10''')

    R('''uri.list <- list()
         uri.list.match <- list()
         ez.list <- list()
         legend.txt <- c()
         em.output.list <- list()
         uri.output.list <- list()''')

    npair = len(input_prefixes)
    for x, input_prefix in enumerate(input_prefixes):

        R.load(input_prefix + "-uri.sav")
        R.load(input_prefix + "-em.sav")
        i = x + 1

        R('''uri.output.list[[%(i)i]] <- uri.output;
              em.output.list[[%(i)i]] <- em.output;
              # reverse =T for error rate;''' % locals())
        R('''
              ez.list[[%(i)i]] <- get.ez.tt.all(em.output, uri.output.list[[%(i)i]]$data12.enrich$merge1,
                                        uri.output.list[[%(i)i]]$data12.enrich$merge2);'''
          % locals())
        R('''
              # URI for all peaks
              uri.list[[%(i)i]] <- uri.output$uri.n;

              # URI for matched peaks
              uri.match <- get.uri.matched(em.output$data.pruned, df=df.txt);
              uri.list.match[[%(i)i]] <- uri.match$uri.n;
         ''' % locals())

        legend = "%(i)i = %(input_prefix)s" % locals()
        R('''
              legend.txt[%(i)i] <- '%(legend)s';
        ''' % locals())

    R.pdf(output_file)
    R('''par(mfcol=c(2,3), mar=c(5,6,4,2)+0.1)''')
    R('''plot.uri.group(uri.list, NULL, file.name=NULL, c(1:%(npair)i), title.txt="all peaks");
         plot.uri.group(uri.list.match, NULL, file.name=NULL, c(1:%(npair)i), title.txt="matched peaks");
         plot.ez.group(ez.list, plot.dir=NULL, file.name=NULL, legend.txt=c(1:%(npair)i), y.lim=c(0, 0.6));
         plot(0, 1, type="n", xlim=c(0,1), ylim=c(0,1), xlab="", ylab="", xaxt="n", yaxt="n"); 
         legend(0, 1, legend.txt, cex=0.6);''' % locals())
    R["dev.off"]()
Example #10
0
def plotIDR( output_file, input_prefixes ):
    '''create IDR plots.

    This code is taken from the R script

    batch-consistency-plot.r

    within the IDR package.
    '''

    dirname = os.path.dirname(__file__)
    R.source(os.path.join( dirname, "WrapperIDR.r"))

    R('''df.txt = 10''')    

    R('''uri.list <- list()
         uri.list.match <- list()
         ez.list <- list()
         legend.txt <- c()
         em.output.list <- list()
         uri.output.list <- list()''')

    npair = len(input_prefixes)
    for x, input_prefix in enumerate(input_prefixes):

        R.load( input_prefix + "-uri.sav" )
        R.load( input_prefix + "-em.sav" )
        i = x + 1

        R( '''uri.output.list[[%(i)i]] <- uri.output;
              em.output.list[[%(i)i]] <- em.output;
              # reverse =T for error rate;''' % locals())
        R('''
              ez.list[[%(i)i]] <- get.ez.tt.all(em.output, uri.output.list[[%(i)i]]$data12.enrich$merge1,
                                        uri.output.list[[%(i)i]]$data12.enrich$merge2);''' % locals())
        R('''
              # URI for all peaks
              uri.list[[%(i)i]] <- uri.output$uri.n;

              # URI for matched peaks
              uri.match <- get.uri.matched(em.output$data.pruned, df=df.txt);
              uri.list.match[[%(i)i]] <- uri.match$uri.n;
         ''' % locals() )

        legend = "%(i)i = %(input_prefix)s" % locals()
        R('''
              legend.txt[%(i)i] <- '%(legend)s';
        '''% locals())
        
    R.pdf( output_file )
    R('''par(mfcol=c(2,3), mar=c(5,6,4,2)+0.1)''')
    R('''plot.uri.group(uri.list, NULL, file.name=NULL, c(1:%(npair)i), title.txt="all peaks");
         plot.uri.group(uri.list.match, NULL, file.name=NULL, c(1:%(npair)i), title.txt="matched peaks");
         plot.ez.group(ez.list, plot.dir=NULL, file.name=NULL, legend.txt=c(1:%(npair)i), y.lim=c(0, 0.6));
         plot(0, 1, type="n", xlim=c(0,1), ylim=c(0,1), xlab="", ylab="", xaxt="n", yaxt="n"); 
         legend(0, 1, legend.txt, cex=0.6);''' % locals())
    R["dev.off"]()
def load_predict_func(file_path):
    """Load Predict Function"""
    LOG.info("Loading predict function from rds file {}".format(file_path))
    try:
        r.source("../mlfmodelserver/deserialize_model.R")
        return r.get(r.deserialize_model(file_path))
    except Exception as generic_exception:
        LOG.error(
            "Exception occured while unpickling {}".format(generic_exception))
        raise generic_exception
Example #12
0
def init_topGO():
    try:
        topGO = importr("topGO")
    except:
        print ("It looks like topGO is not installed. Trying to install topGO via" "Bioconductor...")
        try:
            R.source("http://bioconductor.org/biocLite.R")
            R.biocLite("topGO")
            topGO = importr("topGO")
        except:
            print "Problem installing topGO from Bioconductor!"
            print ("Please install manually from: " "http://www.bioconductor.org/packages/2.13/bioc/html/topGO.html")
    return topGO
Example #13
0
def apply_voom(dataframe):
    # Connect to R
    r.source('scripts/code_library.R')
    pandas2ri.activate()

    # Convert to R
    dataframe_r = pandas2ri.py2ri(dataframe)

    # Run
    signature_dataframe_r = r.apply_voom(dataframe_r)

    # Convert to pandas and sort
    signature_dataframe = pandas2ri.ri2py(signature_dataframe_r)

    # Return
    return signature_dataframe
Example #14
0
def r(code=None, path=None, rel=True, conda=True, convert=True,
      repo='https://cran.microsoft.com/', **kwargs):
    '''
    Runs the R script and returns the result.

    :arg str code: R code to execute.
    :arg str path: R script path. Cannot be used if code is specified
    :arg bool rel: True treats path as relative to the caller function's file
    :arg bool conda: True overrides R_HOME to use the Conda R
    :arg bool convert: True converts R objects to Pandas and vice versa
    :arg str repo: CRAN repo URL

    All other keyword arguments as passed as parameters
    '''
    # Use Conda R if possible
    if conda:
        r_home = _conda_r_home()
        if r_home:
            os.environ['R_HOME'] = r_home

    # Import the global R session
    try:
        from rpy2.robjects import r, pandas2ri, globalenv
    except ImportError:
        app_log.error('rpy2 not installed. Run "conda install rpy2"')
        raise
    except RuntimeError:
        app_log.error('Cannot find R. Set R_HOME env variable')
        raise

    # Set a repo so that install.packages() need not ask for one
    r('local({r <- getOption("repos"); r["CRAN"] <- "%s"; options(repos = r)})' % repo)

    # Activate or de-activate automatic conversion
    # https://pandas.pydata.org/pandas-docs/version/0.22.0/r_interface.html
    if convert:
        pandas2ri.activate()
    else:
        pandas2ri.deactivate()

    # Pass all other kwargs as global environment variables
    for key, val in kwargs.items():
        globalenv[key] = val

    if code and path:
        raise RuntimeError('Use r(code=) or r(path=...), not both')
    if path:
        # if rel=True, load path relative to parent directory
        if rel:
            stack = inspect.getouterframes(inspect.currentframe(), 2)
            folder = os.path.dirname(os.path.abspath(stack[1][1]))
            path = os.path.join(folder, path)
        result = r.source(path, chdir=True)
        # source() returns a withVisible: $value and $visible. Use only the first
        result = result[0]
    else:
        result = r(code)

    return result
Example #15
0
 def _process(self, *args, **kwargs):
     with localconverter(default_converter + pandas2ri.converter):
         globalenv['get_occurrence_dataframe'] = \
             self.get_occurrence_dataframe
         globalenv['get_plot_dataframe'] = self.get_plot_dataframe
         globalenv['get_plot_occurrence_dataframe'] = \
             self.get_plot_occurrence_dataframe
         globalenv['get_taxon_dataframe'] = self.get_taxon_dataframe
         globalenv['get_raster'] = self.get_raster
         r.source(self.r_script_path)
         process_func = r['process']
         df = pandas2ri.ri2py(process_func())
         if isinstance(df, pd.DataFrame):
             return int32_to_int64(fill_str_empty_with_nan(df)), [], {}
         if len(df) == 1:
             return df[0], [], {}
         return df, [], {}
def init_qvalue():
    global __qvalue

    if __qvalue is None:
        try:
            print "Importing qvalue ..."
            qvalue = importr("qvalue")
        except:
            print ("It looks like qvalue is not installed. Trying to install qvalue via"
                   "Bioconductor...")
            try:
                R.source("http://bioconductor.org/biocLite.R")
                R.biocLite("qvalue")
                qvalue = importr("qvalue")
            except:
                print "Problem installing qvalue from Bioconductor!"
                print ("Please install manually from: "
                       "http://www.bioconductor.org/packages/release/bioc/html/qvalue.html")
        __qvalue = qvalue
def init_topGO():
    global __topGo

    if __topGo is None:
        try:
            print "Importing topGO ..."
            topGO = importr("topGO")
        except:
            print ("It looks like topGO is not installed. Trying to install topGO via"
                   "Bioconductor...")
            try:
                R.source("http://bioconductor.org/biocLite.R")
                R.biocLite("topGO")
                topGO = importr("topGO")
            except:
                print "Problem installing topGO from Bioconductor!"
                print ("Please install manually from: "
                       "http://www.bioconductor.org/packages/release/bioc/html/topGO.html")
        __topGo = topGO
Example #18
0
def compute_prob(size_sample,
                 prior_pg,
                 r_read_file_name="20k_test_elbos.csv",
                 prob_result_file="20k_test_prob_result.csv",
                 working_folder="./"):
    # save args in a txt file for R script yo read
    # todo don't run compute_prob in parallel... filename.txt will be changed
    # names_file = prob_result_file[:-4] + '.txt'
    # save_path = 'filenames_for_r/'
    # if not os.path.exists(save_path):
    #     os.makedirs(save_path)
    text_file = open('filename.txt', "w")
    text_file.write(r_read_file_name)
    text_file.write('\n')
    text_file.write(prob_result_file)
    text_file.write('\n')
    text_file.write(working_folder)
    text_file.write('\n')
    text_file.write(str(size_sample))
    text_file.write('\n')
    text_file.write(str(prior_pg))
    text_file.write('\n')
    text_file.close()
    # kernel and local ELBOs
    # training_results = "20k_test_elbos.pkl",
    # with open(training_results, "rb") as fin:
    #     valid_ker = pickle.load(fin)
    #     elbos = pickle.load(fin)

    # save as csv file
    # "kernels" "L_i"
    # data = {'kernels': valid_ker,
    #         'L_i': np.array(elbos).reshape(-1)}
    # print(data)
    # df = pd.DataFrame(data)
    # print(df)
    # df.to_csv(r_read_file_name, index=None)
    # compute probability
    r.setwd('~/BKS/src/R_bks')
    r.source('bks_run_global_python.R')
Example #19
0
 def hierarchical_clust_parmar(self, X, y=None):
     """
     Consensus Clustering with hierarchical clustering as described in :
         Radiomic feature clusters and Prognostic Signatures specific for Lung and Head & Neck cancer.
         Parmar et al., Scientific Reports, 2015
     """
     df = pd.DataFrame(X)
     r_df = pandas2ri.py2ri(df)
     cwd = os.path.dirname(sys.argv[0])
     r.setwd(cwd)
     r.source(
         './Statistical_analysis/R_scripts/hierarchical_clustering_Parmar.R'
     )
     if self.cluster_reduction in self.cluster_reduction_methods:
         r_dr_results = r.hierarchical_clustering_parmar(
             r_df,
             max_k=20,
             threshold=1 - self.threshold,
             corr_metric=self.corr_metric,
             cluster_reduction=self.cluster_reduction)
     else:
         raise ValueError(
             'cluster_reduction must be one of : %s. '
             '%s was passed' %
             (self.cluster_reduction_methods, self.cluster_reduction))
     R_object_dict = {}
     keys = r_dr_results.names
     for i in range(len(keys)):
         R_object_dict[keys[i]] = np.array(r_dr_results[i])
     dr_results = pd.DataFrame(R_object_dict).to_numpy()
     self.cluster_labels = dr_results[:, 0]
     nb_cluster = np.amax(dr_results[:, 0]).astype(int)
     coefficient_matrix = np.zeros(
         (dr_results.shape[0],
          nb_cluster))  # Shape of (n_features, nb cluster)
     for i in range(nb_cluster):
         coefficient_matrix[:, i] = np.where(dr_results[:, 0] == i + 1,
                                             dr_results[:, 1], 0)
     coefficient_matrix = coefficient_matrix.T
     return coefficient_matrix
Example #20
0
def run_cd(rawcount_dataframe, experimental_samples, control_samples, method,
           signature_name):

    # Connect to R
    r.source('/Users/denis/Documents/Projects/scripts/Scripts.R')
    pandas2ri.activate()

    # Create design dict
    sample_dict = {
        'experimental': experimental_samples,
        'control': control_samples
    }

    # Create design dataframe
    design_dataframe = pd.DataFrame({
        group_label: {
            sample: int(sample in group_samples)
            for sample in rawcount_dataframe.columns
        }
        for group_label, group_samples in sample_dict.items()
    })

    # Convert to R
    rawcount_dataframe_r = pandas2ri.py2ri(rawcount_dataframe)
    design_dataframe_r = pandas2ri.py2ri(design_dataframe)

    # Run
    cd_dataframe_r = r.run_characteristic_direction(rawcount_dataframe_r,
                                                    design_dataframe_r)

    # Convert to pandas and sort
    cd_dataframe = pandas2ri.ri2py(cd_dataframe_r)

    # Add mean expression
    # signature_dataframe['AveExpr'] = rawcount_dataframe.loc[signature_dataframe.index].apply(np.average, axis=1)

    # Add
    return cd_dataframe
Example #21
0
def stage4_trends_calculate():

    print 'Go'

    print(OKGREEN + UNDERLINE + BOLD + "****** Stage 4: Trends BUTTON ***** " +
          ENDC)
    print("Recalculating ...")
    print(BOLD + "**** Calling getTrendsData ****" + ENDC)

    jpsurvDataString = request.args.get('jpsurvData', False)
    jpsurvDataString = fix_jpsurv(jpsurvDataString)

    #Init the R Source
    r.source('./JPSurvWrapper.R')

    # Next  line execute the R Program
    r.getTrendsData(UPLOAD_DIR, jpsurvDataString)

    status = '{"status":"OK"}'
    mimetype = 'application/json'
    out_json = json.dumps(status)

    return current_app.response_class(out_json, mimetype=mimetype)
def init_biomaRt():

    global __biomaRt
    global __mart
    if __biomaRt is None:

        try:
            print "Importing biomaRt ..."
            biomaRt = importr("biomaRt")
        except:
            print ("It looks like biomaRt is not installed. Trying to install biomaRt via"
                   "Bioconductor...")
            try:
                R.source("http://bioconductor.org/biocLite.R")
                R.biocLite("biomaRt")
                biomaRt = importr("biomaRt")
            except:
                print "Problem installing biomaRt from Bioconductor!"
                print ("Please install manually from: "
                       "http://www.bioconductor.org/packages/release/bioc/html/biomaRt.html")

        __biomaRt = biomaRt
        __mart = R.useMart(biomart = "ensembl", dataset = __mart_dataset)
Example #23
0
def univariate_analysis(X, y, adjusted_method='BH', save_dir=None):
    if not isinstance(X, (pd.DataFrame, pd.Series)):
        if isinstance(X, (list, tuple, np.ndarray, Mapping)):
            if len(np.array(X).shape) != 2:
                raise ValueError('X array must 2D')
            X = pd.DataFrame(X)
        else:
            raise TypeError('X must be an array-like object, dictionary or pandas Dataframe/Series')
    if not isinstance(y, (list, tuple, np.ndarray)):
        if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series):
            y = y.to_numpy()
        else:
            raise TypeError('y array must be an array like or pandas Dataframe/Series')
    else:
        y = np.array(y)
    if len(y.shape) != 1:
        if len(y.shape) == 2 and y.shape[1] == 1:
            y.reshape(-1)
        else:
            raise ValueError('y array must be 1D or 2D with second dimension equal to 1')
    if len(np.unique(y)) <= 1:
        raise ValueError('y array must have at least 2 classes')
    r_X = pandas2ri.py2ri(X)
    r_y = Vector(y)
    cwd = os.path.dirname(sys.argv[0])
    r.setwd(cwd)
    r.source('./Statistical_analysis/R_scripts/univariate_analysis.R')
    r_dr_results = r.univariate_analysis(r_X, r_y, adjusted_method=adjusted_method)
    R_object_dict = {}
    keys = r_dr_results.names
    for i in range(len(keys)):
        R_object_dict[keys[i]] = np.array(r_dr_results[i])
    results = pd.DataFrame(R_object_dict)
    if save_dir is not None:
        results.to_excel(os.path.join(save_dir, 'univariate_stats_analysis.xlsx'))
    return results
Example #24
0
def main():
    gn = Granatum()
    assay_df = gn.pandas_from_assay(gn.get_import('assay'))
    grdict = gn.get_import('groupVec')
    phe_dict = pd.Series(gn.get_import('groupVec'))
    groups = set(parse(gn.get_arg('groups')))

    inv_map = {}
    for k, v in grdict.items():
        if v in groups:
            inv_map[v] = inv_map.get(v, []) + [k]
    cells = []
    for k, v in inv_map.items():
        cells.extend(v)
    assay_df = assay_df.loc[:, cells]
    assay_df = assay_df.sparse.to_dense().fillna(0)
    #assay_mat = r['as.matrix'](pandas2ri.py2ri(assay_df))
    # assay_mat = r['as.matrix'](conversion.py2rpy(assay_df))
    phe_vec = phe_dict[assay_df.columns]

    r.source('./drive_DESeq2.R')
    ret_r = r['run_DESeq'](assay_df, phe_vec)
    ret_r_as_df = r['as.data.frame'](ret_r)

    # ret_py_df = pandas2ri.ri2py(ret_r_as_df)
    # TODO: maybe rename the columns to be more self-explanatory?
    result_df = ret_r_as_df
    result_df = result_df.sort_values('padj')
    result_df.index.name = 'gene'
    gn.add_pandas_df(result_df.reset_index(),
                     description='The result table as returned by DESeq2.')
    gn.export(result_df.to_csv(), 'DESeq2_results.csv', raw=True)
    significant_genes = result_df.loc[
        result_df['padj'] < 0.05]['log2FoldChange'].to_dict()
    gn.export(significant_genes, 'Significant genes', kind='geneMeta')
    gn.commit()
Example #25
0
def run_limma(rawcount_dataframe, experimental_samples, control_samples,
              signature_name):

    # Connect to R
    # r.source('/Users/denis/Documents/Projects/scripts/Scripts.R')
    r.source(
        '/Users/maayanlab/Library/Mobile Documents/com~apple~CloudDocs/Documents/Projects/scripts/Scripts.R'
    )
    pandas2ri.activate()

    # Create design dict
    sample_dict = {
        'experimental': experimental_samples,
        'control': control_samples
    }

    # Create design dataframe
    design_dataframe = pd.DataFrame({
        group_label: {
            sample: int(sample in group_samples)
            for sample in rawcount_dataframe.columns
        }
        for group_label, group_samples in sample_dict.items()
    })

    # Convert to R
    rawcount_dataframe_r = pandas2ri.py2ri(rawcount_dataframe)
    design_dataframe_r = pandas2ri.py2ri(design_dataframe)

    # Run
    limma_dataframe_r = r.run_limma(rawcount_dataframe_r, design_dataframe_r)

    # Convert to pandas and sort
    limma_dataframe = pandas2ri.ri2py(limma_dataframe_r).sort_values('P.Value')

    return limma_dataframe
Example #26
0
def runIDR( options, peakfile1, peakfile2 ):
    '''run IDR analysis.

    This code is taken from the R script
    
    batch-consistency-analysis.r
    '''

    if options.half_width != None:
        R.assign( "half.width", options.half_width )
    else:
        R('''half.width = NULL''') 
    R.assign( "overlap.ratio", options.overlap_ratio )
    R.assign( "is.broadpeak", options.is_broadpeak )
    R.assign( "sig.value", options.signal_value )

    dirname = os.path.dirname(__file__)
    R.source(os.path.join( dirname, "WrapperIDR.r"))

    # read the length of the chromosomes, which will be used to concatenate chr's
    R('''chr.size <- read.table('%s', sep='\t')''' % options.filename_chromosome_table)

    output_prefix = options.output_prefix
    output_uri = output_prefix + "-uri.sav"
    output_em = output_prefix + "-em.sav"
    output_overlapped_peaks = output_prefix + "-overlapped-peaks.txt"
    output_peaks_above_idr = output_prefix + "-npeaks-aboveIDR.txt"

    # process data, summit: the representation of the location of summit
    E.info("loading data" )
    R('''rep1 <- process.narrowpeak('%(peakfile1)s', chr.size, 
                     half.width=half.width, summit="offset", broadpeak=is.broadpeak)''' % locals())
    R('''rep2 <- process.narrowpeak('%(peakfile2)s', chr.size, 
                     half.width=half.width, summit="offset", broadpeak=is.broadpeak)''' % locals())

    E.info( "replicate 1: read %s: %i peaks, %i after filtering" % \
                (peakfile1, 
                 R('''nrow(rep1$data.ori)''')[0], 
                 R('''nrow(rep1$data.cleaned)''')[0]))
    E.info( "replicate 2: read %s: %i peaks, %i after filtering" % \
                (peakfile2, 
                 R('''nrow(rep2$data.ori)''')[0], 
                 R('''nrow(rep2$data.cleaned)''')[0]))

    E.info( "computing correspondence profile (URI)")
        
    R('''uri.output <- compute.pair.uri(rep1$data.cleaned, rep2$data.cleaned, 
                                        sig.value1=sig.value, sig.value2=sig.value, 
                                        overlap.ratio=overlap.ratio)''')
    E.info( "saving correspondence profile to %s" % output_uri )
    R('''save(uri.output, file='%(output_uri)s') ''' % locals())

    E.info( "computing EM procedure for inference")
    R('''em.output <- fit.em(uri.output$data12.enrich, fix.rho2=T)''')
    E.info( "saving EM to %s" % output_em )
    R('''save(em.output, file='%(output_em)s') ''' % locals())

    # write em output into a file
    # cat(paste("EM estimation for the following files\n", peakfile1, "\n", peakfile2, "\n", sep=""))

    options.stdout.write( "em_estimation\n%s\n" % str(R('''em.output$em.fit$para''')))

    # add on 3-29-10
    # output both local idr and IDR
    E.info( "writing overlapped peaks to %s" % output_overlapped_peaks)
    R('''idr.local <- 1-em.output$em.fit$e.z''')
    R('''IDR <- c()''')
    R('''o <- order(idr.local)''')
    R('''IDR[o] <- cumsum(idr.local[o])/c(1:length(o))''')
    R('''
    write.out.data <- data.frame(chr1=em.output$data.pruned$sample1[, "chr"],
                                 start1=em.output$data.pruned$sample1[, "start.ori"],
                                 stop1=em.output$data.pruned$sample1[, "stop.ori"],
                                 sig.value1=em.output$data.pruned$sample1[, "sig.value"],
                                 chr2=em.output$data.pruned$sample2[, "chr"],
                                 start2=em.output$data.pruned$sample2[, "start.ori"],
                                 stop2=em.output$data.pruned$sample2[, "stop.ori"],
                                 sig.value2=em.output$data.pruned$sample2[, "sig.value"],
                                 idr.local=1-em.output$em.fit$e.z, IDR=IDR)
    ''')
    R('''write.table(write.out.data, file='%(output_overlapped_peaks)s')''' % locals())

    # number of peaks passing IDR range (0.01-0.25)
    E.info("computing number of peaks at various thresholds")
    R('''IDR.cutoff <- seq(0.01, 0.25, by=0.01)''')
    R('''idr.o <- order(write.out.data$idr.local)''')
    R('''idr.ordered <- write.out.data$idr.local[idr.o]''')
    R('''IDR.sum <- cumsum(idr.ordered)/c(1:length(idr.ordered))''')
    R('''
    IDR.count <- c()
    n.cutoff <- length(IDR.cutoff)
    for(i in 1:n.cutoff){
        IDR.count[i] <- sum(IDR.sum <= IDR.cutoff[i])
        }
    ''')
    
    # write the number of peaks passing various IDR ranges into a file
    E.info( "writing number of peaks above IDR cutoffs in range [0.01, 0.25] to %s" % output_peaks_above_idr)
    R('''idr.cut <- data.frame( cutoff=IDR.cutoff, count=IDR.count)''')
    R('''write.table(idr.cut, file='%(output_peaks_above_idr)s', quote=F, 
                     row.names=F, col.names=T, sep='\t')''' % locals())

    R('''mar.mean <- get.mar.mean(em.output$em.fit)''')
    options.stdout.write( "marginal mean of two components\n%s\n)" % R('''print(mar.mean)'''))
Example #27
0
def show1():
	open1()
	r.source('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/R/head1.r',encoding="utf-8")
	data = DataFrame.from_csvfile('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/temp/day1.csv')
	pp = ggplot2.ggplot(data)+ggplot2.aes_string(x='project', y='time',fill = 'project')+ggplot2.geom_bar(stat ='identity')+ggplot2.ggtitle("今日项目时间分布图")+ggplot2.labs(x='项目',y='时间 (min)')+ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)})
	pp.plot()
Example #28
0
def generate_var():  # FIXME: make a test?
    import pandas.rpy.common as prp
    from rpy2.robjects import r

    r.source("tests/var.R")
    return prp.convert_robj(r["result"], use_pandas=False)
Example #29
0
def main( argv = None ):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv: argv = sys.argv

    # setup command line parser
    parser = E.OptionParser( version = "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", 
                                    usage = globals()["__doc__"] )

    parser.add_option("-a", "--gtf-a", dest="gtf_a", type="string",
                      help="supply a gtf file - will compress uncompressed files"  )
    parser.add_option("-b", "--gtf-b", dest = "gtf_b", type = "string",
                      help="supply a second gtf file - will compress uncompressed files")
    parser.add_option("-s", "--scripts-dir", dest = "scripts_dir", type = "string",
                      help="supply a location for accessory scripts")
    parser.add_option( "--no-venn", dest = "no_venn", action="store_true", 
                      help="set if no venn is to be drawn")

    
    ## add common options (-h/--help, ...) and parse command line 
    (options, args) = E.Start( parser, argv = argv )

    gtf_files = [options.gtf_a, options.gtf_b]

    merged_files = []
    prefices = []
    E.info("merging gtf files")
    for gtf in gtf_files:
        if gtf.endswith(".gtf.gz"):
            outfile = P.snip(gtf, ".gtf.gz") + ".merged.gtf.gz"
            prefices.append(P.snip(gtf, ".gtf.gz"))
            merged_files.append(outfile)
            statement = '''zcat %s | python %s/gtf2gtf.py --merge-transcripts --log=%s.log | gzip > %s''' % (gtf, options.scripts_dir, outfile, outfile)
            P.run()
        elif gtf.endswith(".gtf"):
            outfile = P.snip(gtf, ".gtf") + ".merged.gtf.gz"
            prefices.append(P.snip(gtf,".gtf"))
            merged_files.append(outfile)
            statement = '''cat %s | python %s/gtf2gtf.py --merge-transcripts --log=%s.log | gzip  > %s''' % (gtf, options.scripts_dir, outfile, outfile)
            P.run()
        else:
            raise ValueError("cannot perform merge on %s: is not a gtf file" % gtf)

    for prefix in prefices:
        if options.gtf_a.find(prefix) != -1:
            gtf_a = prefix + ".merged.gtf.gz"
            prefix_a = prefix
        elif options.gtf_b.find(prefix) != -1:
            gtf_b = prefix + ".merged.gtf.gz"
            prefix_b = prefix

    E.info("intersecting gtf files")
    # intersect the resulting merged files

    scriptsdir = options.scripts_dir
    intersection_out = "_vs_".join([prefix_a, prefix_b]) + ".intersection.gtf.gz" 
    statement = '''intersectBed -a %(gtf_a)s -b %(gtf_b)s -s -wa
                 | python %(scriptsdir)s/gtf2gtf.py --merge-transcripts --log=log | gzip > %(intersection_out)s'''
    P.run()

    if not options.no_venn:
        E.info("producing venn diagram for %s vs %s..." % (options.gtf_a, options.gtf_b))
        # produce the venn diagram
        intersection_file = intersection_out
        gtf_a_merged = gtf_a
        gtf_b_merged = gtf_b

        # create dictionary key
        gtf_pair = (gtf_a_merged, gtf_b_merged)

        # containers for counts
        count_gtf_merged_a = 0
        count_gtf_merged_b = 0
        count_intersection = 0

        # create GTF iterator objects
        gtf_iterator_a = GTF.iterator(IOTools.openFile(gtf_pair[0]))
        gtf_iterator_b = GTF.iterator(IOTools.openFile(gtf_pair[1]))
        gtf_iterator_intersection = GTF.iterator(IOTools.openFile(intersection_file))

        # do the counts for each file
        E.info("counting entries in %s" % gtf_a)
        for entry in gtf_iterator_a:
            count_gtf_merged_a += 1
        print "counts for gtf-a: ",count_gtf_merged_a

        E.info("counting entries in %s" % gtf_b)
        for entry in gtf_iterator_b:
            count_gtf_merged_b += 1
        print "counts for gtf-b: ",count_gtf_merged_b

        E.info("counting entries in %s" % intersection_file)
        for entry in gtf_iterator_intersection:
            count_intersection += 1
        print "counts for intersection: ", count_intersection

        # this is the important bit - basically take an arbitrary list of numbers to represent the list of lincrna in the refnoncoding set
        # then use the intersection count to represent the overlapping section in the lincrna set and add a set of random numbers to this 
        # set to make up the remaining - non-overlapping set

        result = {}
        E.info("assembling count lists")
        result[gtf_pair] = {"gtf-b" : map(str,xrange(count_gtf_merged_b))  , "gtf-a" : map(str,xrange(count_intersection)) + map(str, [random.random() for i in range(count_intersection,count_gtf_merged_a)]  )}

        R_source = os.path.join(os.path.abspath(options.scripts_dir), "venn_diagram.R")
        R.source(R_source)

        prefix_a = prefix_a.replace(".", "_").replace("-", "_")
        prefix_b = prefix_b.replace(".", "_").replace("-", "_")
        
        R('''prefix.a <- "%s"''' % prefix_a)
        R('''prefix.b <- "%s"''' % prefix_b) 
        E.info("drawing venn diagram to %s" % (prefix_a + "_vs_" + prefix_b + ".overlap.png"))
        
        R["venn.diagram2"](R.list( A = result[gtf_pair]["gtf-a"], B = result[gtf_pair]["gtf-b"])
        , prefix_a + "_vs_" + prefix_b + ".overlap.png"
        , **{'cat.cex': 1.5
             , 'main.fontfamily': "Arial"
             , 'cat.pos':FloatVector((0,0))
             , 'cat.fontfamily':"Arial"
             , 'main.cex':1.8                                                                                                                                                                                                              
             , 'height':1000
             , 'width':1000
             , 'cex':2                                                                                                                                                                                                                      
             , 'fontfamily':"Arial"                                                                                                                                                                                                         
             , 'lwd':R.c(1,1)                                                                                                                                                                                                               
             , 'fill':R.c(R.rgb(0,0,0.5,0.5), R.rgb(0.5,0,0,0.5))                                                                                                                                                         
             , 'category.names':R.c(prefix_a, prefix_b) 
             , 'margin' : R.c(0.1,0.1,0.1,0.1)
             })

    ## write footer and output benchmark information.
    E.Stop()
#############################################
########## 2. General Setup
#############################################
##### 1. Variables #####
dataset_names = {
	'HMS_Dataset_20303': 'cytosolic-24h',
	'HMS_Dataset_20304': 'nuclear-24h',
	'HMS_Dataset_20305': 'cytosolic-48h',
	'HMS_Dataset_20306': 'nuclear-48h',
	'HMS_Dataset_20307': 'cytosolic-72h',
	'HMS_Dataset_20308': 'nuclear-72h',
}

##### 2. R Connection #####
rSource = 'pipeline/scripts/pipeline-mcf10a-cycif.R'
r.source(rSource)

##### 3. Files #####
concatenatedExpressionFile = 's1-data.dir/HMS_Datasets-merged_filtered.txt'
differentialExpressionFiles = glob.glob('s2-gene_differential_expression.dir/*')

#######################################################
#######################################################
########## S1. Process Data
#######################################################
#######################################################

#############################################
########## 1. Process dataset
#############################################
Example #31
0
 def _read_source(self):
     r.source(self.source_file)
Example #32
0
from flask import Flask, request
from rpy2.robjects import r
import traceback

app = Flask(__name__)
r.source('crosstalkWrapper.R')


@app.route('/calculate/', methods=['POST'])
def calculate():
    try:
        return r.calculate(request.stream.read())[0]

    except Exception as e:
        print('------------EXCEPTION------------')
        traceback.print_exc(1)
        return str(e), 400


@app.after_request
def after_request(response):
    response.headers.add('Access-Control-Allow-Origin', '*')
    response.headers.add('Access-Control-Allow-Headers',
                         'Content-Type,Authorization')
    response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE')
    return response


import argparse
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
Example #33
0
import time
import json
import io
from flask import Flask, send_file, request, jsonify, make_response
# from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
# from rpy2.robjects.vectors import IntVector, FloatVector
from rpy2.robjects import r
import traceback
from socket import gethostname
import tempfile, os
import random
import os, base64
import uuid
from util import *

r.source('LCWrapper.R')

# Initialize the Flask application
if __name__ == '__main__':
    app = Flask(__name__,
                static_folder='.',
                static_url_path='',
                template_folder='.')
else:
    app = Flask(__name__)


@app.route('/')
def index():
    return send_file('{}/index.html'.format(CLIENT_APP_FOLDER))
Example #34
0
#################################################################
#################################################################

#############################################
########## 1. Load libraries
#############################################
##### 1. General support #####
import scipy.stats as ss
import numpy as np
import warnings
import os
from rpy2.robjects import r, pandas2ri
pandas2ri.activate()

##### 2. R #####
r.source(
    os.path.join(os.path.dirname(os.path.realpath(__file__)), 'normalize.R'))

#######################################################
#######################################################
########## S1. Dataset Normalization
#######################################################
#######################################################

#############################################
########## 1. logCPM
#############################################


def logCPM(dataset):

    # Get raw data
Example #35
0
def runIDR(options, peakfile1, peakfile2):
    '''run IDR analysis.

    This code is taken from the R script

    batch-consistency-analysis.r
    '''

    if options.half_width is not None:
        R.assign("half.width", options.half_width)
    else:
        R('''half.width = NULL''')
    R.assign("overlap.ratio", options.overlap_ratio)
    R.assign("is.broadpeak", options.is_broadpeak)
    R.assign("sig.value", options.signal_value)

    dirname = os.path.dirname(__file__)
    R.source(os.path.join(dirname, "WrapperIDR.r"))

    # read the length of the chromosomes, which will be used to concatenate
    # chr's
    R('''chr.size <- read.table('%s', sep='\t')''' %
      options.filename_chromosome_table)

    output_prefix = options.output_prefix
    output_uri = output_prefix + "-uri.sav"
    output_em = output_prefix + "-em.sav"
    output_overlapped_peaks = output_prefix + "-overlapped-peaks.txt"
    output_peaks_above_idr = output_prefix + "-npeaks-aboveIDR.txt"

    # process data, summit: the representation of the location of summit
    E.info("loading data")
    R('''rep1 <- process.narrowpeak('%(peakfile1)s', chr.size, 
                     half.width=half.width, summit="offset", broadpeak=is.broadpeak)'''
      % locals())
    R('''rep2 <- process.narrowpeak('%(peakfile2)s', chr.size, 
                     half.width=half.width, summit="offset", broadpeak=is.broadpeak)'''
      % locals())

    E.info("replicate 1: read %s: %i peaks, %i after filtering" %
           (peakfile1, R('''nrow(rep1$data.ori)''')[0],
            R('''nrow(rep1$data.cleaned)''')[0]))
    E.info("replicate 2: read %s: %i peaks, %i after filtering" %
           (peakfile2, R('''nrow(rep2$data.ori)''')[0],
            R('''nrow(rep2$data.cleaned)''')[0]))

    E.info("computing correspondence profile (URI)")

    R('''uri.output <- compute.pair.uri(rep1$data.cleaned, rep2$data.cleaned, 
                                        sig.value1=sig.value, sig.value2=sig.value, 
                                        overlap.ratio=overlap.ratio)''')
    E.info("saving correspondence profile to %s" % output_uri)
    R('''save(uri.output, file='%(output_uri)s') ''' % locals())

    E.info("computing EM procedure for inference")
    R('''em.output <- fit.em(uri.output$data12.enrich, fix.rho2=T)''')
    E.info("saving EM to %s" % output_em)
    R('''save(em.output, file='%(output_em)s') ''' % locals())

    # write em output into a file
    # cat(paste("EM estimation for the following files\n", peakfile1, "\n", peakfile2, "\n", sep=""))

    options.stdout.write("em_estimation\n%s\n" %
                         str(R('''em.output$em.fit$para''')))

    # add on 3-29-10
    # output both local idr and IDR
    E.info("writing overlapped peaks to %s" % output_overlapped_peaks)
    R('''idr.local <- 1-em.output$em.fit$e.z''')
    R('''IDR <- c()''')
    R('''o <- order(idr.local)''')
    R('''IDR[o] <- cumsum(idr.local[o])/c(1:length(o))''')
    R('''
    write.out.data <- data.frame(chr1=em.output$data.pruned$sample1[, "chr"],
                                 start1=em.output$data.pruned$sample1[, "start.ori"],
                                 stop1=em.output$data.pruned$sample1[, "stop.ori"],
                                 sig.value1=em.output$data.pruned$sample1[, "sig.value"],
                                 chr2=em.output$data.pruned$sample2[, "chr"],
                                 start2=em.output$data.pruned$sample2[, "start.ori"],
                                 stop2=em.output$data.pruned$sample2[, "stop.ori"],
                                 sig.value2=em.output$data.pruned$sample2[, "sig.value"],
                                 idr.local=1-em.output$em.fit$e.z, IDR=IDR)
    ''')
    R('''write.table(write.out.data, file='%(output_overlapped_peaks)s')''' %
      locals())

    # number of peaks passing IDR range (0.01-0.25)
    E.info("computing number of peaks at various thresholds")
    R('''IDR.cutoff <- seq(0.01, 0.25, by=0.01)''')
    R('''idr.o <- order(write.out.data$idr.local)''')
    R('''idr.ordered <- write.out.data$idr.local[idr.o]''')
    R('''IDR.sum <- cumsum(idr.ordered)/c(1:length(idr.ordered))''')
    R('''
    IDR.count <- c()
    n.cutoff <- length(IDR.cutoff)
    for(i in 1:n.cutoff){
        IDR.count[i] <- sum(IDR.sum <= IDR.cutoff[i])
        }
    ''')

    # write the number of peaks passing various IDR ranges into a file
    E.info(
        "writing number of peaks above IDR cutoffs in range [0.01, 0.25] to %s"
        % output_peaks_above_idr)
    R('''idr.cut <- data.frame( cutoff=IDR.cutoff, count=IDR.count)''')
    R('''write.table(idr.cut, file='%(output_peaks_above_idr)s', quote=F, 
                     row.names=F, col.names=T, sep='\t')''' % locals())

    R('''mar.mean <- get.mar.mean(em.output$em.fit)''')
    options.stdout.write("marginal mean of two components\n%s\n)" %
                         R('''print(mar.mean)'''))
Example #36
0
def show4():
	open4()
	r.source('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/R/end.R',encoding="utf-8")
	data = DataFrame.from_csvfile('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/temp/project2.csv')
	pp = ggplot2.ggplot(data)+ggplot2.aes_string(x='day', y='time',fill = 'factor(project)')+ggplot2.geom_bar(stat ='identity',position = 'dodge')+ggplot2.ggtitle("两项目时间对比图")+ggplot2.labs(x='日期',y='时间 (min)')+ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)})
	pp.plot()
Example #37
0
def compute_far(plot_pdf,
                data_rds,
                yvarname,
                xvarname,
                f_yhist,
                f_yrcp,
                f_xhist,
                f_xrcp,
                y_compute_ano=True,
                y_start_ano="1961-01-01T00:00:00",
                y_end_ano="1990-12-31T23:59:59",
                y_bbox='-127,-65,25,50',
                y_season='DJF',
                y_first_spatial=True,
                y_spatial_aggregator="mean",
                y_time_aggregator="mean",
                x_compute_ano=True,
                x_start_ano="1961-01-01T00:00:00",
                x_end_ano="1990-12-31T23:59:59",
                x_bbox='-127,-65,25,50',
                x_season='DJF',
                x_first_spatial=True,
                x_spatial_aggregator="mean",
                x_time_aggregator="mean",
                stat_model="gauss_fit",
                qthreshold=0.9,
                nbootstrap=250):

    LOGGER.debug('initialization')
    xname = "x_" + xvarname
    yname = "y_" + yvarname
    LOGGER.debug('bug0!!!')

    run_tokeep = select_run_tokeep(f_xhist, f_xrcp, f_yhist, f_yrcp)
    xvar, xyear = prepare_dat(varname=xvarname,
                              lfhist=f_xhist,
                              lfrcp=f_xrcp,
                              run_tokeep=run_tokeep,
                              compute_ano=x_compute_ano,
                              start_ano=x_start_ano,
                              end_ano=x_end_ano,
                              first_spatial=y_first_spatial,
                              spatial_aggregator=x_spatial_aggregator,
                              time_aggregator=x_time_aggregator,
                              bbox=x_bbox,
                              season=x_season)

    LOGGER.debug('bug2!!!')
    yvar, yyear = prepare_dat(varname=yvarname,
                              lfhist=f_yhist,
                              lfrcp=f_yrcp,
                              run_tokeep=run_tokeep,
                              compute_ano=y_compute_ano,
                              start_ano=y_start_ano,
                              end_ano=y_end_ano,
                              first_spatial=y_first_spatial,
                              spatial_aggregator=y_spatial_aggregator,
                              time_aggregator=y_time_aggregator,
                              bbox=y_bbox,
                              season=y_season)

    LOGGER.info('data prepared')
    dfx = {}
    dfx['year'] = xyear
    dfx[xname] = xvar
    dfx = pandas.DataFrame.from_dict(dfx)
    dfy = {}
    dfy['year'] = yyear
    dfy[yname] = yvar
    dfy = pandas.DataFrame.from_dict(dfy)
    if all(dfx['year'] == dfy['year']):
        df = pandas.concat([dfx, dfy[yname]], axis=1)[['year', yname, xname]]
    else:
        raise Exception("years of x and y not corresponding")

    Rsrc = config.Rsrc_dir()
    # import rpy2's package module
    import rpy2.robjects.packages as rpackages
    from rpy2.robjects import pandas2ri
    pandas2ri.activate()
    from rpy2.robjects import r
    from rpy2.robjects.packages import importr
    # import R's utility package
    utils = importr('utils')
    r.source(join(Rsrc, "compute_and_plot_far.R"))
    farg = importr("FARg")
    LOGGER.debug('rcode prepared')

    if (stat_model == "gauss_fit"):
        far = r.compute_and_plot_far(mdata=df,
                                     yvar=yname,
                                     xvar=xname,
                                     tvar="year",
                                     xp=1.6,
                                     R=nbootstrap,
                                     stat_model=farg.gauss_fit,
                                     ci_p=0.9,
                                     pdf_name=plot_pdf)
    if (stat_model == "gev_fit"):
        far = r.compute_and_plot_far(mdata=df,
                                     yvar=yname,
                                     xvar=xname,
                                     tvar="year",
                                     xp=1.6,
                                     R=nbootstrap,
                                     stat_model=farg.gev_fit,
                                     ci_p=0.9,
                                     pdf_name=plot_pdf)
    if (stat_model == "gpd_fit"):
        far = r.compute_and_plot_far(mdata=df,
                                     yvar=yname,
                                     xvar=xname,
                                     tvar="year",
                                     xp=1.6,
                                     R=nbootstrap,
                                     stat_model=farg.gpd_fit,
                                     ci_p=0.9,
                                     qthreshold=qthreshold,
                                     pdf_name=plot_pdf)

    LOGGER.debug('far computed')

    r.saveRDS(far, file=data_rds)
Example #38
0
#################################################################
############### Generate Signature
#################################################################
#################################################################

#############################################
########## 1. Load libraries
#############################################
##### 1. General support #####
import os
import pandas as pd
from rpy2.robjects import r, pandas2ri
pandas2ri.activate()

##### 2. R #####
r.source(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'R', 'signature.R'))

#######################################################
#######################################################
########## S1. Design Matrix
#######################################################
#######################################################

def make_design_matrix(expression_dataframe, group_A, group_B):

	# Sample names
	group_A = [x.replace(':', '.').replace('-', '.') for x in group_A]
	group_B = [x.replace(':', '.').replace('-', '.') for x in group_B]
	expression_dataframe.columns = [x.replace(':', '.').replace('-', '.') for x in expression_dataframe.columns]

	# Get expression dataframe
Example #39
0
def run_cummeRbund_install():
    """
    provides R install of cummeRbund and provides user with all R output and prompts.
    """
    r.source("http://bioconductor.org/biocLite.R")
    r.biocLite('cummeRbund')
Example #40
0
from sklearn.cluster import KMeans
from bag import *
import numpy as np
from rpy2.robjects import r
from rpy2.robjects import pandas2ri
pandas2ri.activate()
from tabulate import tabulate
import time
#import warnings
#warnings.simplefilter("error")


t0 = time.clock()
# set environment in order to access and call funcitons there
r.source("gennonnormal.r")
   
# Set parameters
k = 8   # num centroids
percent = .6    # percent to hold out from center.
n =1 

# pull in parameter matrix from gennonnormal.r
params = np.array(r.params())
print params
# store results in dict for comparison
prop_= [] 
prop_reg_ = [] 
rsq_ = []
rsq_reg_ = [] 
Example #41
0
def generate_var():
    from rpy2.robjects import r
    import pandas.rpy.common as prp
    r.source('tests/var.R')
    return prp.convert_robj(r['result'], use_pandas=False)
from flask import Flask, request
from rpy2.robjects import r

app = Flask(__name__, static_folder='', static_url_path='')
r.source('apcWrapper.R')


@app.route('/calculate/', methods=['POST'], strict_slashes=False)
def calculate():
    return r.calculate(request.data.decode())[0]


@app.route('/apcRest/ping/', strict_slashes=False)
@app.route('/ping/', strict_slashes=False)
def ping():
    return r('"true"')[0]


@app.errorhandler(Exception)
def error_handler(e):
    """ Ensure errors are logged and returned """
    app.logger.error(str(e))
    return str(e), 400


@app.after_request
def after_request(response):
    response.headers.add('Access-Control-Allow-Origin', '*')
    response.headers.add('Access-Control-Allow-Headers',
                         'Content-Type,Authorization')
    response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE')