Exemple #1
0
def run_lrgs(x, y, err_x, err_y, _xycov=None, nmc=500, dirichlet=True):
    '''
    Runs the lrgs regression algorithm written in R by interfacing through
    rpy2. For our purposes, inputs should be in scaled (log) form. (For the
    moment, only works for on-diagonal elements of the covariance matrix.) nmc
    is the length of the markov chain.
    '''

    # pylint: disable = too-many-arguments
    # pylint: disable = too-many-locals

    # Make sure dimensions are correct
    assert np.size(x) == np.size(y)
    assert np.size(err_x) == np.size(err_y)
    assert np.size(x) == np.size(err_x)

    # Convert x and y to r vectors
    rx = robjects.FloatVector(x)
    ry = robjects.FloatVector(y)
    rx_err = robjects.FloatVector(err_x)
    ry_err = robjects.FloatVector(err_y)

    # Set up covariance matrix
    M = RARRAY(0.0, dim=RC(2, 2, np.size(rx)))

    for i in range(np.size(rx)):
        M.rx[1, 1, i + 1] = rx_err[i]
        M.rx[2, 2, i + 1] = ry_err[i]

    # Set some R equivalents
    TRUE = robjects.BoolVector([True])
    FALSE = robjects.BoolVector([False])

    if dirichlet:
        d = TRUE
    else:
        d = FALSE

    # Run MCMC
    posterior = RLRGS.Gibbs_regression(rx,
                                       ry,
                                       M,
                                       nmc,
                                       dirichlet=d,
                                       trace='bsg',
                                       mention_every=50)

    # Extract relevant data from posterior
    B = np.array(posterior[0])  # Parameter chain
    S = np.array(posterior[1])[0][0]
    # ^ Scatter chain (only intrinsic scatter for the moment!)

    # Prepare lrgs fit chains
    intercept = B[0][0]
    slope = B[1][0]
    sigma = np.sqrt(S)

    # Return fit parameters consistently with run_linmix
    return (intercept, slope, sigma)
Exemple #2
0
def run_lme(signal, mask):
    effects = ro.Formula('signal ~ group + visit +  (1|subject)')

    good_voxels = np.sum(mask > 0.5)
    effects.environment["mask"] = rm = ro.BoolVector(mask > 0.5)
    effects.environment["signal"] = ro.FloatVector(signal).rx(rm)
    # assign variables
    effects.environment["subject"] = subject.rx(rm)
    effects.environment["visit"] = visit.rx(rm)
    effects.environment["group"] = group.rx(rm)

    # allocate space for output
    result = np.zeros(8)
    result[0] = good_voxels

    if good_voxels > 4:
        try:
            # run linear mixed-effect model
            m = base.summary(lme.lmer(effects))
            # extract DF (for the visit)
            result[1] = m.rx2('coefficients').rx(True, 'df')[2]
            # extract coeffecients
            result[2:5] = m.rx2('coefficients').rx(True, 'Estimate')[:]
            # extract t-values
            result[5:8] = m.rx2('coefficients').rx(True, 't value')[:]

        except RRuntimeError:
            # probably model didn't converge
            pass
    else:
        # not enough information
        pass

    return result
    def get_response_matrix(self):
        matrix = {}

        matrix_index = 0
        # For each question:
        for question_index in range(self.test_length):
            question = self.questions[question_index]

            # Cannot have questions where either 100% or 0% were correct, as ltm will crash.
            # This also excludes questions the user has opted to discard.
            if not question.discard:
                # Header value.
                question_response_vector = []

                # Retrieve all the responses for each student.
                for j in range(len(self.students)):
                    question_response_vector.append(
                        self.students[j].is_right(question_index))

                # question_response_vector.append(1)
                matrix_index += 1
            else:
                # Otherwise, create a vector of NA objects.
                question_response_vector = [robjects.NA_Logical] * len(
                    self.students)

            # Convert to a vector.
            matrix[question_index +
                   1] = robjects.BoolVector(question_response_vector)

        # Convert the dictionary of vectors to a dataframe.
        response_matrix = robjects.DataFrame(matrix)

        return response_matrix
Exemple #4
0
def convert_dict(obj):
    if all([isinstance(x, str) for x in obj]):
        return ro.StrVector(obj)
    elif all([isinstance(x, int) | isinstance(x, float) for x in obj]):
        return ro.IntVector(obj)
    elif all([isinstance(x, bool) for x in obj]):
        return ro.BoolVector(obj)
    elif all([isinstance(x, float) for x in obj]):
        return ro.FloatVector(obj)

    return ro.ListVector(obj)
def pathifier(disease_name):
    model = DataReader().read_network_model()
    X, y = DataReader().read_data(disease_name)
    pre = DynamicPreprocessing(['metabolic-standard'])

    X = pre.fit_transform(X, y)
    import pdb
    pdb.set_trace()

    df = pd.DataFrame(X)
    metabolite_fold_changes = robj.r.matrix(robj.FloatVector(
        df.as_matrix().T.ravel().tolist()),
                                            nrow=df.shape[1])
    all_metabolite_ids = robj.StrVector(list(df))

    subsystem_metabolite = defaultdict(set)
    for r in model.reactions:
        if r.subsystem and not (r.subsystem.startswith('Transport')
                                or r.subsystem.startswith('Exchange')):
            subsystem_metabolite[r.subsystem] \
                .update(m.id for m in r.metabolites if m.id in df)

    pathway_names, pathway_metabolites = zip(
        *filter(lambda x: x[1], subsystem_metabolite.items()))

    pathway_metabolites = robj.r['list'](
        *map(lambda x: robj.StrVector(list(x)), pathway_metabolites))

    pathway_names = robj.StrVector(list(pathway_names))
    is_healthy = robj.BoolVector(list(map(lambda x: x == 'h', y)))

    pathifier = importr("pathifier")

    result = pathifier.quantify_pathways_deregulation(metabolite_fold_changes,
                                                      all_metabolite_ids,
                                                      pathway_metabolites,
                                                      pathway_names,
                                                      is_healthy,
                                                      attempts=100,
                                                      min_exp=0,
                                                      min_std=0)

    regScores = dict()
    for pathway, scores in dict(result.items())['scores'].items():
        regScores[pathway] = list(scores[:])

    df = pd.DataFrame(regScores)
    df.insert(0, 'stage', y)
    df.to_csv('../dataset/disease/%s_regulization.csv' % disease_name,
              index=False)
Exemple #6
0
 def as_r_vector(o, val_type):
     if isinstance(o, dict):
         keys = o.keys()
         vals = [o[k] for k in keys]
         robj = as_r_vector(vals, val_type=val_type)
         robj.setnames(keys)
     else:
         if val_type == int:
             robj = robjects.IntVector(o)
         elif val_type == float:
             robj = robjects.FloatVector(o)
         elif val_type == bool:
             robj = robjects.BoolVector(o)
         else:
             robj = robjects.RVector(o)
     return robj
Exemple #7
0
def pyArrayToRVector(X, rName=None, nanToNA=True):
  """Convert an array-like object to a vector in the R workspace.

  Args:
    X (array-like): Array to convert.
      Entries must be integer, float, or boolean type.
    rName (str): Name of the R variable to which to assign the vector.
      If None, a new variable name is auto-generated.
    nanToNA (bool): If True, nan in Python is converted to NA in R.
      nan in Python typically represents missing data.
      In R, missing data is represented by NA.
      (R has both NaN and NA; Python has no dedicated type for missing values.)

  Returns:
    str: The R variable to which the vector is assigned.

  Raises:
    NotImplementedError: If the type of X is unsupported.
  """
  X = np.asarray(X).ravel()

  if ( str(X.dtype).startswith('int') or
       str(X.dtype).startswith('uint') ):
    rVector = ro.IntVector(X)
  elif str(X.dtype).startswith('float'):
    rVector = ro.FloatVector(X)
  elif str(X.dtype).startswith('bool'):
    rVector = ro.BoolVector(X)
  else:
    raise NotImplementedError(
      'Only int, float, and bool are currently supported.' )

  if rName is None:
    rName = genVarName()
  r.assign(rName, rVector)

  if nanToNA:
    r( '%s[ is.nan(%s) ] <- NA' % (rName, rName) )

  return rName
Exemple #8
0
    def heatmap(self, plotfile, zscore=False):
        ''' plots a heatmap
        set zscore=True to use a divergent colour scale
        '''
        # to do: add option to parse design file and add coloured row for
        # variable specified in design file.

        plotHeatmap = R('''
        function(df, zscore){

        library("Biobase")
        library("RColorBrewer")
        library("gplots")

        if(zscore[1]==TRUE){
        #hmcol <- colorRampPalette(colors = c("red", "white", "blue"))
        PuOr <- brewer.pal(11, "PuOr")
        hmcol <- c(colorRampPalette(c(PuOr[1], PuOr[6]))(100),
                   colorRampPalette(c(PuOr[6], PuOr[11]))(100)[-1])
        }
        else{
        hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
        }

        png("%(plotfile)s", width=1000, height=1000, units="px")

        heatmap.2(as.matrix(df),
                  col = hmcol,
                  scale="none", trace="none", margin=c(18, 10),
                  dendrogram="both", cexCol=2,
                  labRow = "",
                  hclustfun = function(x) hclust(x, method = 'average'),
                  distfun = dist)
        dev.off()
        }''' % locals())

        r_counts = pandas2ri.py2ri(self.table)

        plotHeatmap(r_counts, ro.BoolVector([zscore]))
Exemple #9
0
def make_rvector(col, ct=COLTYPE.FLOAT):
    """Make and return an R vector for data in `col` of COLTYPE ct.

    Returns:
      robjects.Vector
    Raises:
      TypeError if the type is unknown
      TypeError if it is COLTYPE.DATE but not parseable
    """
    if ct == COLTYPE.INT:
        vec = robjects.IntVector(col)
    elif ct == COLTYPE.FLOAT:
        vec = robjects.FloatVector(col)
    elif ct == COLTYPE.STR:
        # Use I() from R.base library to avoid conversion
        # into a factor. Usually though a factor is what you want.
        vec = base.I(robjects.StrVector(col))
    elif ct == COLTYPE.BOOL:
        vec = robjects.BoolVector(col)
    elif ct == COLTYPE.FACTOR:
        # conversion will happen automatically
        vec = robjects.StrVector(col)
    elif ct == COLTYPE.DATE:
        field = col[0]
        if isinstance(field, datetime.datetime):
            tcol = map(datetime_to_sec, col)
        elif isinstance(field, float):
            tcol = col
        else:
            raise TypeError("Bad date type '%s' for column %d, '%s'. "
                            "Expected time.struct_time, "
                            "datetime.datetime, or float." % (
                                type(field), i, colnames[i]))
        vec = robjects.FloatVector(tcol)
    else:
        raise TypeError("Unknown type '%s' for column %d, '%s'." % (
            type(field), i, colnames[i]))
    return(vec)
Exemple #10
0
def create_vector(v_list, desired_type=None):
    is_bool = True
    is_int = True
    is_float = True
    is_str = True
    for elt in v_list:
        if type(elt) == str:
            is_bool = False
            is_int = False
            is_float = False
        elif type(elt) == float:
            is_bool = False
            is_int = False
        elif type(elt) == int:
            is_bool = False
        else:
            is_bool = False
            is_int = False
            is_float = False
            is_str = False
            break
 
    if is_bool and (desired_type is None or desired_type == bool):
        return robjects.BoolVector(v_list)
    elif is_int and (desired_type is None or desired_type == int):
        res = [int(elt) for elt in v_list]
        return robjects.IntVector(res)
    elif is_float and (desired_type is None or desired_type == float):
        res = [float(elt) for elt in v_list]
        return robjects.FloatVector(res)
    elif is_str and (desired_type is None or desired_type == str):
        res = [str(elt) for elt in v_list]
        return robjects.StrVector(res)
    
    if desired_type is not None:
        raise TypeException("Cannot coerce vector to type '%s'" % desired_type)
    return robjects.RVector(v_list)
    octave.eval("test_prob = predict(bbq_model, test_scores, 1)",
                verbose=False)
    bbq_prob = octave.pull('test_prob', verbose=False)
    bbq_prob = np.array([item[0] for item in bbq_prob])
    bbq_metrics.append(isotonic.get_metrics(test_class, bbq_prob, k=k))
    # Create isotonic regression model
    ir_model = IsotonicRegression(y_min=y_min,
                                  y_max=y_max,
                                  out_of_bounds='clip')
    ir_model.fit(X=training_scores, y=training_class)
    ir_prob = isotonic.predict(ir_model, test_scores)
    ir_metrics.append(isotonic.get_metrics(test_class, ir_prob, k=k))
    # Create ENIR model using R:
    enir_model = enir.enir_build(
        robjects.FloatVector(training_scores.tolist()),
        robjects.BoolVector(training_class.tolist()))
    enir_prob = enir.enir_predict(enir_model,
                                  robjects.FloatVector(test_scores.tolist()))
    # Convert to numpy.array:
    enir_prob = np.array(enir_prob)
    enir_metrics.append(isotonic.get_metrics(test_class, enir_prob, k=k))

    # Create weighted (by likelihood) averaged bootstrapped isotonic regression.
    # I am using the identical IR models for BIR, which is basically also an
    # ensemble model but where all models have equal weight.
    wabir_model = isotonic.train_wabir(training_class, training_scores)
    wabir_prob = isotonic.predict_wabir(wabir_model, test_scores)
    wabir_metrics.append(isotonic.get_metrics(test_class, wabir_prob, k=k))
    # Estimating bir-probabilities using the same IR models as generated by wabir:
    bir_prob = isotonic.predict_wabir(wabir_model,
                                      test_scores,
Exemple #12
0
        raise TypeException("Cannot coerce vector to type '%s'" % desired_type)
    return robjects.RVector(v_list)

def vector_conv(v, desired_type=None):
    v_list = eval(v)
    return create_vector(v_list, desired_type)

RVector = new_constant('RVector', staticmethod(vector_conv),
                       robjects.RVector([]),
                       staticmethod(lambda x: isinstance(x, robjects.RVector)))

def bool_vector_conv(v):
    return vector_conv(v, bool)

RBoolVector = new_constant('RBoolVector' , staticmethod(bool_vector_conv), 
                            robjects.BoolVector([]),
                            staticmethod(lambda x: isinstance(x, robjects.RVector)),
                            base_class=RVector)
                       
def int_vector_conv(v):
    return vector_conv(v, int)

RIntVector = new_constant('RIntVector' , staticmethod(int_vector_conv), 
                            robjects.IntVector([]),
                            staticmethod(lambda x: isinstance(x, robjects.RVector)),
                            base_class=RVector)

def float_vector_conv(v):
    return vector_conv(v, float)

RFloatVector = new_constant('RFloatVector' , staticmethod(float_vector_conv), 
Exemple #13
0
    def predict(self, Xtest, num_predicted_frames=8, ycol0=0):
        ''' Make predictions of the next num_predicted_frames frames.
        Start at variable ycol0 only (do not predict the values of the first
        0 to ycol0-1 variables).
        For this example we predict persistence of the last frame.'''
        vprint(self.verbose, "Model :: ========= Making predictions =========")
        vprint(self.verbose, "===============================================")
        start = time.time()
        #Ytest = np.array([Xtest[random.randint(0,10),ycol0:]] * num_predicted_frames)

        ######################
        # import rpy2's package module
        import rpy2
        import rpy2.robjects as robjects
        import rpy2.robjects.packages as rpackages
        from rpy2.robjects.packages import importr

        # import R's "base" package
        base = rpackages.importr('base')

        # import R's utility package
        utils = rpackages.importr('utils')

        # select a mirror for R packages
        utils.chooseCRANmirror(ind=1)  # select the first mirror in the list

        if rpy2.robjects.packages.isinstalled(
                'forecast', lib_loc=rpy2.__path__[0]) == False:
            utils.install_packages('forecast', lib=rpy2.__path__[0])
        forecast = importr('forecast', lib_loc=rpy2.__path__[0])

        ts = robjects.r('ts')

        #from rpy2.robjects.vectors import FloatVector
        #from rpy2.robjects.vectors import IntVector
        #from rpy2.robjects.vectors import BoolVector

        #from rpy2.robjects import pandas2ri

        from rpy2.robjects import pandas2ri
        from rpy2.robjects import vectors

        pandas2ri.activate()
        ######################

        Ytest = np.zeros((7, 57))

        # Code assumes daily data (not aggregated. Arima will break if it's run on aggregated data.
        # I've provided commented code that should undo aggrgation in inputs into model and redo
        # aggregation to return the predictions (Ytest)

        # undo aggregation:
        future_starts = []
        for col in range(ycol0, Xtest.shape[1]):
            init = Xtest[0, col]
            for row in range(1, Xtest.shape[0]):
                Xtest[row, col] -= init
                init += Xtest[row, col]
            future_starts.append(init)

        for col in range(ycol0, Xtest.shape[1]):
            #print(col)
            dtp = num_predicted_frames - 1  # days to predict
            ndpat = num_predicted_frames  # number days to predict at a time
            dat = Xtest[1:, col]
            #print(dat)
            #print(len(dat))
            sum_RMSE = 0
            f = ts(dat, frequency=1, start=1, end=len(dat))
            best_params = robjects.IntVector([0, 0, 0])
            best_RMSE = 1000000

            for p in range(1, 5):
                for q in range(0, 5):
                    for d in range(0, 3):
                        try:
                            t_order = robjects.IntVector([p, d, q])
                            fit2 = forecast.Arima(f,
                                                  order=t_order,
                                                  xreg=robjects.r("NULL"),
                                                  include_mean=True,
                                                  include_drift=False,
                                                  biasadj=False,
                                                  method="ML",
                                                  model=robjects.r("NULL"))
                            RMSE = forecast.accuracy(fit2)[0][2]  #RMSE
                            if RMSE < best_RMSE:
                                best_RMSE = RMSE
                                best_params = robjects.IntVector([p, d, q])
                        except:
                            continue

            best_opts = robjects.BoolVector([True, False])
            possible_opts = robjects.BoolVector([True, False])
            for mean_opt in range(0, 1):
                for drift_opt in range(0, 1):
                    mean_opt = possible_opts[mean_opt]
                    drift_opt = possible_opts[drift_opt]
                    fit2 = forecast.Arima(f,
                                          order=best_params,
                                          xreg=robjects.r("NULL"),
                                          include_mean=mean_opt,
                                          include_drift=drift_opt,
                                          biasadj=False,
                                          method="ML",
                                          model=robjects.r("NULL"))
                    RMSE = forecast.accuracy(fit2)[0][2]  #RMSE
                    if (RMSE < best_RMSE):
                        #print(paste("Reset best_params to (p,d,q) = (", p, ",", d, ",", q , ")", sep = ""))
                        best_RMSE = RMSE
                        best_opts = robjects.BoolVector([mean_opt, drift_opt])

            #print("best params = ", best_params)
            #print("best opts = ", best_opts)
            fit2 = forecast.Arima(f,
                                  order=best_params,
                                  xreg=robjects.r("NULL"),
                                  include_mean=best_opts[0],
                                  include_drift=best_opts[1],
                                  biasadj=False,
                                  method="ML",
                                  model=robjects.r("NULL"))
            #    print(forecast.forecast(fit2, ndpat))
            #    print(forecast.forecast(fit2, ndpat)[0])
            #    print(forecast.forecast(fit2, ndpat)[1])
            #    print(forecast.forecast(fit2, ndpat)[2])
            #    print(forecast.forecast(fit2, ndpat)[3])
            Ytest[:, col] = forecast.forecast(fit2, ndpat)[3]
            #print(Ytest)

        #print(Xtest.shape)    # (78, 57)
        #print(Xtest.shape[0]) # 78
        #print(Ytest.shape)    # typically (7, 57)

        # reconstruct aggregated predictions
        for col in range(ycol0, Xtest.shape[1]):
            init = future_starts[col]
            for row in range(0, num_predicted_frames - 1):
                tinc = init
                init += Ytest[row, col]
                Ytest[row, col] += tinc

        end = time.time()
        vprint(self.verbose,
               "[+] Success, predictions made in %5.2f sec" % (end - start))
        vprint(self.verbose, "Model :: ======== Predictions finished ========")
        return Ytest
Exemple #14
0
def preprocess(analysis_id):
    a = Analysis.objects.get(id=analysis_id)
    # Get GEM directory
    gem_dir = os.path.join(settings.MEDIA_ROOT, a.gem.name)
    # Directory to store processed data
    store_dir = os.path.join(settings.MEDIA_ROOT,
                             'analyses/user_{0}/{1}'.format(a.user.id, a.id))

    ri.initr()

    # Import libraries
    base = importr('base')
    geoquery = importr('GEOquery')

    # Check if file starts with !Series_title, otherwise getGEO never stops
    with open(gem_dir) as f:
        first_line = f.readline()
    if not first_line[:14].startswith("!Series_title\t"):
        a.status = "-2. Preprocessing failed: invalid gene expression matrix format"
        a.save()
        return "invalid gene expression matrix format"

    # Get GEO series matrix and extract GEM
    try:
        gsm = geoquery.getGEO(filename=gem_dir, getGPL=False)
    except:
        a.status = "-2. Preprocessing failed: invalid gene expression matrix format"
        a.save()
        return "invalid gene expression matrix format"

    try:
        gem = gsm.slots['assayData']['exprs']
        # Remove any genes with NAs
        row_keep = ro.IntVector(
            np.argwhere(np.array(ro.r.rowSums(ro.r['is.na'](gem))) == 0) + 1)
        gem = gem.rx(row_keep, True)
        # Remove any genes with all 0s
        row_keep = ro.IntVector(np.nonzero(np.array(ro.r.rowSums(gem)))[0] + 1)
        gem = gem.rx(row_keep, True)
        # Write to CSV
        ro.r['write.table'](gem, file=os.path.join(store_dir, 'gem.csv'))
    except:
        a.status = "-2. Preprocessing failed: invalid gene expression matrix format"
        a.save()
        return "invalid gene expression matrix format"

    try:
        # Get pheno data
        pheno_data = gsm.slots['phenoData']
        pheno_data = pheno_data.slots['data']

        # Extract explicitly defined characteristics
        char_index = ro.r['!'](ro.r.grepl('characteristics|date',
                                          ro.r.names(pheno_data)))
        char = pheno_data.rx(True, char_index)
        gene_name = ro.r.rownames(char)
        char = ro.r['data.frame'](ro.r.lapply(char, ro.r['as.character']),
                                  stringsAsFactors=False)
        char = ro.r.cbind(char,
                          **{'gene.name': gene_name},
                          stringsAsFactors=False)

        # Data cleaning
        for i in range(char.nrow):
            for j in range(char.ncol - 1):
                char_val = char.rx(i + 1, j + 1)[0].strip()
                if char_val == "":
                    char.rx[i + 1, j + 1] = "unknown"
                elif char_val == "None" or char_val == "NONE" or char_val == "none":
                    char.rx[i + 1, j + 1] = "none"
                else:
                    char.rx[i + 1, j + 1] = char_val

        # Remove columns where more than 80% of unique values have less than 5 occurrences
        col_keep = np.repeat([True], char.ncol)
        for i in range(char.ncol - 1):
            unique_count = ro.r.table(char.rx(True, i + 1))
            unique_length = ro.r.length(unique_count)[0]
            if unique_length == 1:
                col_keep[i] = False
                continue
            count = 0
            for j in range(unique_length):
                if ro.r.names(unique_count).rx(j + 1)[0] == "unknown":
                    unique_length = unique_length - 1
                elif unique_count.rx(j + 1)[0] < 5:
                    count = count + 1
            if count / unique_length > 0.8:
                col_keep[i] = False

        char = char.rx(True, ro.BoolVector(col_keep))

        # Change all values with less than 5 occurences to "Other"
        for i in range(char.ncol - 1):
            rare_char = np.where(
                np.array(ro.r.table(char.rx2(i + 1))) < 5)[0] + 1
            rare_char = ro.r.names(ro.r.table(char.rx2(i + 1))).rx(
                ro.IntVector(rare_char))
            if ro.r.length(rare_char)[0] == 1:
                rare_char = ro.IntVector([])
            if ro.r.length(rare_char)[0] != 0:
                for j in range(char.nrow):
                    if str(char.rx(j + 1, i + 1)[0]) in np.array(rare_char):
                        print(np.array(rare_char))
                        char.rx[j + 1, i + 1] = "Other"

        # Write to CSV
        ro.r['write.table'](char,
                            file=os.path.join(store_dir,
                                              'characteristics.csv'))
    except:
        a.char_ok = False
        a.save()

    a.status = "2. Ready for analysis"
    a.save()

    return "success"
Exemple #15
0
sampler.run_mcmc(starting_guesses, nsteps)

sample = sampler.chain  # shape = (nwalkers, nsteps, ndim)

ests = [np.mean(sample[:, :, j]) for j in range(ndim)]
intercept = ests[0]
slope = ests[1]
gs = [ests[j + 2] for j in range(len(x))]
print gs
cut = min(0.5, np.percentile(gs, 15))
typical = [g >= cut for g in gs]


pdf = ro.DataFrame({'x': ro.FloatVector(x), \
                   'y': ro.FloatVector(y), \
                   'e': ro.FloatVector(e), \
                   'ymin': ro.FloatVector(y-e), \
                   'ymax': ro.FloatVector(y+e), \
                   'yest': ro.FloatVector(slope*x+intercept), \
                   'typical': ro.BoolVector(typical)})
rprint(pdf)
gpf = ggplot2.ggplot(pdf)
ppf = gpf + \
   ggplot2.geom_point(ggplot2.aes_string(x='x', y='y',\
     color='typical',shape='typical'),size=5) + \
   ggplot2.geom_errorbar(ggplot2.aes_string(x='x', ymin='ymin', ymax='ymax')) +\
   ggplot2.geom_line(ggplot2.aes_string(x='x', y='yest'))
grdevices.png(file="fit.png", width=512, height=512)
print(ppf)
grdevices.dev_off()
Exemple #16
0
 def testNewBoolVector(self):
     vec = robjects.BoolVector([True, False])
     self.assertEqual(True, vec[0])
     self.assertEqual(False, vec[1])
     self.assertEqual(2, len(vec))
Exemple #17
0
    def select_feature_lr_wrapper(n_para, x, y, model_type, fit_intercept = False):
        n_r, n_f = x.shape

        if model_type == 'nr':
            general_simple = LogisticRegression()
            general_simple.fit(x, y)
            original_model_paras = general_simple.coef_[0]
            index = np.argsort(abs(original_model_paras))[::-1]
            list_of_select_features = index[:n_para]
            new_x = x[:, list_of_select_features]
            lr_refit = LogisticRegression()
            lr_refit.fit(new_x, y)
            return np.concatenate((lr_refit.coef_[0], lr_refit.intercept_)), 1 - lr_refit.score(new_x, y)

        if model_type == 'bs':
            min_err = float('inf')
            min_ls_f = None
            ls_f_arr = list(combinations(range(n_f), n_para))
            for ls_f in ls_f_arr:
                x_sub = x[:,ls_f]
                general_simple = LogisticRegression(fit_intercept = fit_intercept)
                general_simple.fit(x_sub, y)
                err = 1 - general_simple.score(x_sub, y)
                if err < min_err:
                    min_err = err
                    min_ls_f = ls_f

            x_sub = x[:,min_ls_f]
            lr_refit = LogisticRegression()
            lr_refit.fit(x[:,min_ls_f], y)

            if fit_intercept:
                return np.concatenate((lr_refit.intercept_, lr_refit.coef_[0])), 1 - lr_refit.score(x_sub, y), min_ls_f
            return lr_refit.coef_[0], 1 - lr_refit.score(x_sub, y), list(min_ls_f)

        if model_type == 'vs':

            import rpy2.robjects as ro
            r = ro.r
            from rpy2.robjects.numpy2ri import numpy2ri

            rpy2.robjects.numpy2ri.activate()
            r.library("glmnet")
            r_x = ro.r.assign('dummy', x)
            r_y = ro.r.assign('dummy', y)
            r_fit_intercept = ro.BoolVector((fit_intercept,))

            r(
                '''
                var_select<-function(x,y,degree, fit_intercept){
                fit = glmnet(x, y, intercept = fit_intercept)
                df<-fit$df
                index = which(df<=degree)
                index = index[length(index)]
                lambda = fit$lambda[index]
                coefs = coef(fit, s=lambda)
                if(fit_intercept){
                    res = which(abs(coefs)>0)
                }else{
                    res = which(abs(coefs[2:length(coefs)])>0)
                }
                return(res-1)
                }
                '''
            )
            active_ind = np.asarray(r.var_select(r_x, r_y, n_para, r_fit_intercept[0])).tolist()
            active_ind = [int(x) for x in active_ind]
            lr_refit = LogisticRegression(fit_intercept=fit_intercept)
            lr_refit.fit(x[:, active_ind], y)

            if fit_intercept:
                return np.concatenate((lr_refit.intercept_, lr_refit.coef_[0])), \
                       1 - lr_refit.score(x[:, active_ind], y), active_ind
            return lr_refit.coef_[0], 1 - lr_refit.score(x[:, active_ind], y), active_ind
Exemple #18
0
 def testNALogical(self):
     vec = robjects.BoolVector((True, False, True))
     vec[0] = robjects.NA_Logical
     self.assertTrue(robjects.baseenv['is.na'](vec)[0])
import atddm
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pytz
# from datetime import time
from constants import COLORS, TZONES, CODES, BEGDT, ENDDT

import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri

r = robjects.r
TRUE = robjects.BoolVector([True])
FALSE = robjects.BoolVector([False])
pandas2ri.activate()
dgof = importr('dgof')
dweib = importr('DiscreteWeibull')


def format_time_interval(t1, t2):
    return '{h1:02d}:{m1:02d}--{h2:02d}:{m2:02d}'.format(h1=t1.hour,
                                                         m1=t1.minute,
                                                         h2=t2.hour,
                                                         m2=t2.minute)


def formatter_float_n_digits(x, n):
    return '{x:.{n}f}'.format(x=x, n=n)
Exemple #20
0
def test_nalogical():
    vec = robjects.BoolVector((True, False, True))
    vec[0] = robjects.NA_Logical
    assert robjects.baseenv['is.na'](vec)[0] is True
Exemple #21
0
def test_init_boolvector():
    vec = robjects.BoolVector([True, False])
    assert vec[0] is True
    assert vec[1] is False
    assert len(vec) == 2