Beispiel #1
0
def batch_adjust(expr_df, batch):
    # sample call: ir.batch_adjust(exprset, batch=[batch1.columns.to_list(), [1,1,1]])
    """
    TODO: some 0 or na somewhere causing regection
         Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) :
         contrasts can be applied only to factors with 2 or more levels
    Args:
        expr_df:
        batch:

    Returns:

    """
    ro.pandas2ri.activate()
    sva = importr('sva')
    base = importr('base')
    batch_vec = ro.Vector(batch)
    # batch can be a pd dataframe. has to have multiple levels
    rbatch = ro.FactorVector(obj=ro.Vector(batch),
                             labels=ro.Vector(batch[0]),
                             levels=ro.Vector(batch[1:]))
    print(rbatch.slots['levels'].r_repr())
    # print(expr_df.slots['assayData'].r_repr())
    # NAs in corrected matrix caused by genes = 0?
    # remove na: batch1 = batch1[(batch1.T != 0).any()]
    # batch needs to have multiple list dimensions (multi-level)
    # df =base.as_matrix(rpd.py2ri_pandasdataframe(expr_df))
    batch_corrected = sva.ComBat(
        dat=expr_df, batch=rbatch)  #, mod=ro.NULL)#, mean_only=True)
    ro.pandas2ri.deactivate()
    return batch_corrected
Beispiel #2
0
    def testNew(self):
        identical = ri.baseenv["identical"]
        py_a = array.array('i', [1, 2, 3])
        ro_v = robjects.Vector(py_a)
        self.assertEqual(ro_v.typeof, ri.INTSXP)

        ri_v = ri.SexpVector(py_a, ri.INTSXP)
        ro_v = robjects.Vector(ri_v)

        self.assertTrue(identical(ro_v, ri_v)[0])

        del (ri_v)
        self.assertEqual(ri.INTSXP, ro_v.typeof)
Beispiel #3
0
def do_bicor(this_trait_vals, target_trait_vals):
    r_library = ro.r["library"]             # Map the library function
    r_options = ro.r["options"]             # Map the options function

    r_library("WGCNA")
    r_bicor = ro.r["bicorAndPvalue"]        # Map the bicorAndPvalue function

    r_options(stringsAsFactors = False)

    this_vals = ro.Vector(this_trait_vals)
    target_vals = ro.Vector(target_trait_vals)

    the_r, the_p, _fisher_transform, _the_t, _n_obs = [numpy.asarray(x) for x in r_bicor(x = this_vals, y = target_vals)]

    return the_r, the_p
Beispiel #4
0
    def testCall(self):
        ri_f = rinterface.baseenv.get('sum')
        ro_f = robjects.Function(ri_f)

        ro_v = robjects.Vector(array.array('i', [1, 2, 3]))

        s = ro_f(ro_v)
Beispiel #5
0
    def two_var_intr_effects(self, target, vars, nval=100, plot=True):
        """ Loads first level interactions.
        Args:
          target - Variable identifier (column name or number) specifying the
                   target variable
          vars - List of variable identifiers (column names or numbers) specifying
                 other selected variables. Must not contain target
          nval - Number of evaluation points used for calculation.
          plot - Determines whether or not to plot results.
        Returns: Pandas dataframe of interaction effects
        """
        # Check if null.models have already been generated
        check_str = """
                function(){
                  if(exists("null.models")){
                    return(T)
                  } else {
                    return(F)
                  }
                }
                """
        if not robjects.r(check_str)()[0]:
            self.logger.info(
                'Null models not generated, generating null models '
                '(n=10)')
            self._generate_interaction_null_models(10, quiet=False)

        int_str = """
              function(target, vars, nval){
                interactions <- twovarint(tvar=target, vars=vars, null.models, 
                                          nval=nval, plot=F)
              }
              """
        # Check the input type. If int, add one, if string do nothing.
        target = target if type(target) is str else target + 1
        vars = [var if type(var) is str else var + 1 for var in vars]
        r_interact = robjects.r(int_str)(target,
                                         robjects.Vector(np.array(vars)), nval)
        interact = pd.DataFrame(
            {
                'interact_str': list(r_interact[0]),
                'exp_null_int': list(r_interact[1]),
                'std_null_int': list(r_interact[2])
            },
            index=vars)

        if plot:
            int_effects = interact.reset_index().rename(
                columns={'index': 'vars'})
            int_effects_m = pd.melt(
                int_effects,
                id_vars='vars',
                value_vars=['interact_str', 'exp_null_int'])
            p = gg.ggplot(gg.aes(x='vars', fill='variable', weight='value'),
                          data=int_effects_m) \
                + gg.geom_bar() \
                + gg.labs(
                    title='Two-var interaction effects - {}'.format(target))
            print(p)
        return interact
Beispiel #6
0
def test_call_with_sexp():
    ri_f = rinterface.baseenv.get('sum')
    ro_f = Function(ri_f)

    ro_v = robjects.Vector(array.array('i', [1, 2, 3]))

    s = ro_f(ro_v)
    assert s[0] == 6
Beispiel #7
0
def vecterize(value_list):
    '''
        Vecterize a python list into a R vector
        input:
            value_list: a python list 
        output:
            An R vector
    '''
    return robjects.Vector(value_list)
 def ML_survivorship(times= np.arange(0,np.max(data['mortality']),0.1) ):
     
     ptime = np.array(times) 
     rtime = ro.Vector(ptime)
     
     return np.array(pgammagompertz(rtime,
          model_para['est']['beta'],
          model_para['est']['s'],
          model_para['est']['rate'],lower_tail=False))
 def ML_hazard(times= np.arange(0,np.max(data['mortality']),0.1) ):
     
     ptime = np.array(times) 
     rtime = ro.Vector(ptime)
     
     return np.array(hgammagompertz(rtime,
          model_para['est']['beta'],
          model_para['est']['s'],
          model_para['est']['rate']))
Beispiel #10
0
    def testExtractByIndex(self):
        seq_R = robjects.baseenv["seq"]
        mySeq = seq_R(0, 10)
        # R indexing starts at one
        myIndex = robjects.Vector(array.array('i', range(1, 11, 2)))

        mySubset = mySeq.rx(myIndex)
        for i, si in enumerate(myIndex):
            self.assertEqual(mySeq[si - 1], mySubset[i])
Beispiel #11
0
    def testGetNames(self):
        vec = robjects.Vector(array.array('i', [1, 2, 3]))
        v_names = [robjects.baseenv["letters"][x] for x in (0, 1, 2)]
        #FIXME: simplify this
        r_names = robjects.baseenv["c"](*v_names)
        vec = robjects.baseenv["names<-"](vec, r_names)
        for i in range(len(vec)):
            self.assertEqual(v_names[i], vec.names[i])

        vec.names[0] = 'x'
 def ML_cumulative_hazard(times= np.arange(0,np.max(data['mortality']),0.1) ):
     
     ptime = np.array(times) 
     rtime = ro.Vector(ptime)
     
     return np.array(Hgammagompertzmakeham(rtime,
          model_para['est']['rate'],
          model_para['est']['beta'],
          model_para['est']['s'],
          model_para['est']['lambda']))
Beispiel #13
0
    def testExtractIndexError(self):
        seq_R = robjects.baseenv["seq"]
        mySeq = seq_R(0, 10)
        # R indexing starts at one
        myIndex = robjects.Vector(['a', 'b', 'c'])

        def noconsole(x):
            pass
        robjects.rinterface.set_writeconsole(noconsole)

        self.assertRaises(ri.RRuntimeError, mySeq.rx, myIndex)
Beispiel #14
0
    def testExtractByName(self):
        seq_R = robjects.baseenv["seq"]
        mySeq = seq_R(0, 25)

        letters = robjects.baseenv["letters"]
        mySeq = robjects.baseenv["names<-"](mySeq, letters)

        # R indexing starts at one
        myIndex = robjects.Vector(letters[2])

        mySubset = mySeq.rx(myIndex)

        for i, si in enumerate(myIndex):
            self.assertEqual(2, mySubset[i])
Beispiel #15
0
 def single_partial_dependency(self, vars, nav=500):
     """ Display single variable partial dependancy plots
     Args:
       vars - A list of variable indecies or column names indicated which plots
              to print.
       nav - Maximum number of observations used for average calculations.
             Higher values give more accurate calculations with diminishing
             returns.
     """
     dep_str = """
           function(vars, nval, nav){
             singleplot(vars)
           }
           """
     if any([type(var) == int for var in vars]):
         vars = [var + 1 for var in vars]
     robjects.r(dep_str)(robjects.Vector(np.array(vars)), nav)
Beispiel #16
0
def r_main():
    #f*****g can't make it work with cbind f**k this
    from rpy2.robjects.lib.dplyr import DataFrame
    print("Loading file...")
    robj.r['load'](sys.argv[1])

    lang = []
    for i, chunk in enumerate(misc.chunks_from_r(sys.argv[2])):
        print("Chunk ", i + 1)
        lang += lang_det(chunk.text)

    robj.globalenv['tmp'] = robj.Vector(
        map(lambda x: x if x != None else rpy2.rinterface.NULL, lang))
    robj.globalenv[sys.argv[2]] = DataFrame(
        robj.globalenv[sys.argv[2]]).mutate(lang2='tmp')

    print("Saving...")
    robj.r.save(sys.argv[2], file=sys.argv[1])
Beispiel #17
0
def run_mccoil(barcode_file_lines, maf_file_lines=None, verbose=False):
    ## init barcode set
    barcode_set = Barcode.SetOfBarcodes()
    barcode_set.readBarcodeFileLines(barcode_file_lines)
    # validate
    is_valid_barcode_set, err_msg = barcode_set.validate()
    if not is_valid_barcode_set:
        print err_msg
        return ([])
    ## init mafs
    if not maf_file_lines:
        mafs = barcode_set.computeMAFFromBarcodes(1)
    else:
        mafs = MAF.MAF()
        mafs.readMAFFileLines(maf_file_lines)
    mafs_R_vector = robjects.Vector(mafs.minor_allele_freqs())
    # validate
    is_valid_mafs, err_msg = mafs.validate()
    if not is_valid_mafs:
        print err_msg
        return ({})
    ## compute zygosity matrix, then convert to R DataFrame
    zygosity_matrix = barcode_set.to_zygosity_matrix(mafs,
                                                     header=True,
                                                     index=True)
    if verbose: print(zygosity_matrix)
    data = to_R_zygosity_df(zygosity_matrix)
    if verbose: print(data)
    ## import MCCOIL
    mccoil_R_code = open(mccoil_R, 'r').read()
    mccoil = SignatureTranslatedAnonymousPackage(mccoil_R_code, 'mccoil')
    ## compute result
    result = mccoil.McCOIL_categorical(data, P=mafs_R_vector)
    #print result
    ## get sites/samples
    sites, samples = list(result[-2]), list(result[-1])
    ## get maf/coi predictions, which map 1-to-1 sites/samples
    # (i.e. maf_prediction[i] is prediction for site[i])
    maf_predictions, coi_predictions = list(result[6]), list(result[5])
    return ({
        'mafs': zip(sites, maf_predictions),
        'cois': zip(samples, coi_predictions)
    })
def cut_genes(cluster_data, diss_tom_path, min_module_size):
    tree = robjects.Vector([
        np.array(cluster_data['merge']),
        robjects.FloatVector(cluster_data['height']),
        robjects.IntVector(cluster_data['order'])
    ])
    tree.names = ['merge', 'height', 'order']
    diss_tom = np.loadtxt(diss_tom_path)
    modules = robjects.r.cutreeHybrid(dendro=tree,
                                      distM=diss_tom,
                                      deepSplit=2,
                                      pamRespectsDendro=False,
                                      minClusterSize=min_module_size)
    labels = pandas2ri.ri2py(modules.rx2('labels'))[tree.rx2('order')]
    colors = WGCNA.labels2colors(labels)
    rgb = robjects.r.col2rgb(colors)
    hex = robjects.r.rgb(rgb.rx(1, True),
                         rgb.rx(2, True),
                         rgb.rx(3, True),
                         maxColorValue=255)
    return {'modules': list(colors), 'hex': list(hex)}
def DGSA(A, B, name_A, num_cluster):
    '''
	Distance based Generalized Sensitivity Analysis method 
	Args:
		A: (np.array) A in SA(A|B), # features * # realizations
		B: (np.array) B in SA(A|B), # features * # realizations
		name_A: (np.vector) Row names of A
		num_cluster: (int) number of cluster
	Output:
		SA_dataframe: (pd.DataFrame) StandardizedSensitivity data frame
	'''
    ## Load DGSA R script
    ro.r('source(\'./source_code/dgsa/dgsa_rightcolor.R\')')
    rpy2.robjects.numpy2ri.activate()
    ## Clustering
    score = B.T
    kmeans = KMeans(n_clusters=num_cluster, random_state=0).fit(score)
    clustering = kmeans.labels_
    clustering = ro.Vector(clustering + 1)

    ## DGSA
    pandas2ri.activate()
    A = pd.DataFrame(A.T, columns=name_A)
    r_A = pandas2ri.py2ri(A)

    r_dgsa = ro.r['dgsa']
    myDGSA = r_dgsa(clustering, r_A)

    SensitivityMatrix = np.asarray(myDGSA.rx2(1))
    SA_stats = np.diag(np.nan_to_num(SensitivityMatrix).max(axis=0))

    names = np.asarray(myDGSA.rx2(2))

    SA_dataframe = pd.DataFrame(SA_stats, index=names)

    return SA_dataframe
Beispiel #20
0
 def testSetNames(self):
     vec = robjects.Vector(array.array('i', [1, 2, 3]))
     names = ['x', 'y', 'z']
     vec.names = names
     for i in range(len(vec)):
         self.assertEqual(names[i], vec.names[i])
    def run_analysis(self, requestform):
        print("Starting WGCNA analysis on dataset")
        self.r_enableWGCNAThreads()  # Enable multi threading
        self.trait_db_list = [
            trait.strip() for trait in requestform['trait_list'].split(',')
        ]
        print("Retrieved phenotype data from database",
              requestform['trait_list'])
        helper_functions.get_trait_db_obs(self, self.trait_db_list)

        self.input = {
        }  # self.input contains the phenotype values we need to send to R
        strains = []  # All the strains we have data for (contains duplicates)
        traits = [
        ]  # All the traits we have data for (should not contain duplicates)
        for trait in self.trait_list:
            traits.append(trait[0].name)
            self.input[trait[0].name] = {}
            for strain in trait[0].data:
                strains.append(strain)
                self.input[trait[0].name][strain] = trait[0].data[strain].value

        # Transfer the load data from python to R
        uStrainsR = r_unique(ro.Vector(strains))  # Unique strains in R vector
        uTraitsR = r_unique(ro.Vector(traits))  # Unique traits in R vector

        r_cat("The number of unique strains:", r_length(uStrainsR), "\n")
        r_cat("The number of unique traits:", r_length(uTraitsR), "\n")

        # rM is the datamatrix holding all the data in R /rows = strains columns = traits
        rM = ro.r.matrix(ri.NA_Real,
                         nrow=r_length(uStrainsR),
                         ncol=r_length(uTraitsR),
                         dimnames=r_list(uStrainsR, uTraitsR))
        for t in uTraitsR:
            trait = t[0]  # R uses vectors every single element is a vector
            for s in uStrainsR:
                strain = s[
                    0]  # R uses vectors every single element is a vector
                #DEBUG: print(trait, strain, " in python: ", self.input[trait].get(strain), "in R:", rM.rx(strain,trait)[0])
                rM.rx[strain, trait] = self.input[trait].get(
                    strain)  # Update the matrix location
                sys.stdout.flush()

        self.results = {}
        self.results['nphe'] = r_length(uTraitsR)[
            0]  # Number of phenotypes/traits
        self.results['nstr'] = r_length(uStrainsR)[0]  # Number of strains
        self.results['phenotypes'] = uTraitsR  # Traits used
        self.results['strains'] = uStrainsR  # Strains used in the analysis
        self.results[
            'requestform'] = requestform  # Store the user specified parameters for the output page

        # Calculate soft threshold if the user specified the SoftThreshold variable
        if requestform.get('SoftThresholds') is not None:
            powers = [
                int(threshold.strip()) for threshold in
                requestform['SoftThresholds'].rstrip().split(",")
            ]
            rpow = r_unlist(r_c(powers))
            print "SoftThresholds: {} == {}".format(powers, rpow)
            self.sft = self.r_pickSoftThreshold(rM,
                                                powerVector=rpow,
                                                verbose=5)

            print "PowerEstimate: {}".format(self.sft[0])
            self.results['PowerEstimate'] = self.sft[0]
            if self.sft[0][0] is ri.NA_Integer:
                print "No power is suitable for the analysis, just use 1"
                self.results['Power'] = 1  # No power could be estimated
            else:
                self.results['Power'] = self.sft[0][
                    0]  # Use the estimated power
        else:
            # The user clicked a button, so no soft threshold selection
            self.results['Power'] = requestform.get(
                'Power')  # Use the power value the user gives

        # Create the block wise modules using WGCNA
        network = self.r_blockwiseModules(
            rM,
            power=self.results['Power'],
            TOMType=requestform['TOMtype'],
            minModuleSize=requestform['MinModuleSize'],
            verbose=3)

        # Save the network for the GUI
        self.results['network'] = network

        # How many modules and how many gene per module ?
        print "WGCNA found {} modules".format(r_table(network[1]))
        self.results['nmod'] = r_length(r_table(network[1]))[0]

        # The iconic WCGNA plot of the modules in the hanging tree
        self.results['imgurl'] = webqtlUtil.genRandStr("WGCNAoutput_") + ".png"
        self.results['imgloc'] = webqtlConfig.TMPDIR + self.results['imgurl']
        r_png(self.results['imgloc'], width=1000, height=600)
        mergedColors = self.r_labels2colors(network[1])
        self.r_plotDendroAndColors(network[5][0],
                                   mergedColors,
                                   "Module colors",
                                   dendroLabels=False,
                                   hang=0.03,
                                   addGuide=True,
                                   guideHang=0.05)
        r_dev_off()
        sys.stdout.flush()
Beispiel #22
0
    def construct_rule_list(self,
                            train_df,
                            label_col,
                            tree_constructors,
                            nr_bootstraps=3):
        y_train = train_df[label_col]
        X_train = train_df.copy()
        X_train = X_train.drop(label_col, axis=1)

        importr('randomForest')
        importr('inTrees')

        ro.globalenv["X"] = com.convert_to_r_dataframe(X_train)
        ro.globalenv["target"] = ro.FactorVector(y_train.values.tolist())

        feature_mapping = {}
        feature_mapping_reverse = {}
        for i, feature in enumerate(X_train.columns):
            feature_mapping[feature] = i + 1
            feature_mapping_reverse[i + 1] = feature

        treeList = []
        for tree in bootstrap(train_df,
                              label_col,
                              tree_constructors,
                              nr_classifiers=nr_bootstraps):
            if tree.count_nodes() > 1:
                treeList.append(self.tree_to_R_object(tree, feature_mapping))

        ro.globalenv["treeList"] = ro.Vector(
            [len(treeList), ro.Vector(treeList)])
        ro.r('names(treeList) <- c("ntree", "list")')

        rules = ro.r(
            'buildLearner(getRuleMetric(extractRules(treeList, X), X, target), X, target)'
        )
        rules = list(rules)
        conditions = rules[int(0.6 * len(rules)):int(0.8 * len(rules))]
        predictions = rules[int(0.8 * len(rules)):]

        # Create a OrderedRuleList
        rulesets = []
        for idx, (condition,
                  prediction) in enumerate(zip(conditions, predictions)):
            # Split each condition in Rules to form a RuleSet
            rulelist = []
            condition_split = [
                x.lstrip().rstrip() for x in condition.split('&')
            ]
            for rule in condition_split:
                feature = feature_mapping_reverse[int(
                    re.findall(r',[0-9]+]', rule)[0][1:-1])]

                lte = re.findall(r'<=', rule)
                gt = re.findall(r'>', rule)
                eq = re.findall(r'==', rule)
                cond = lte[0] if len(lte) else (gt[0] if len(gt) else eq[0])

                extract_value = re.findall(r'[=>]-?[0-9\.]+', rule)
                if len(extract_value):
                    value = float(re.findall(r'[=>]-?[0-9\.]+', rule)[0][1:])
                else:
                    feature = 'True'
                    value = None

                rulelist.append(Rule(feature, cond, value))
            rulesets.append(RuleSet(idx, rulelist, prediction))

        return OrderedRuleList(rulesets)
Beispiel #23
0
import rpy2.robjects as robjects
r = robjects.r
F = open("table.txt")
lines = F.readlines()
l1 = []
l2 = []
l3 = []
l4 = []
for line in lines:
    col = line.split(',')
    l1.append(float(col[0]))
    l2.append(float(col[1]))
    l3.append(col[2])
    l4.append(float(col[3]))

    #l.append(float(line.split()[0]))

R_vector1 = robjects.FloatVector(l1)
R_vector2 = robjects.FloatVector(l2)
R_vector3 = robjects.Vector(l3)
print("mean", (r.mean(R_vector1)), (r.mean(R_vector2)))
print(R_vector3)
print("2", "3")
Beispiel #24
0
    chrmreq + '.snpid.updates.txt', 'w')
#import time
#t0 = time.time()
for item in range(snpbim.shape[0]):
    #for item in range(20):
    snp = snpbim[item, ]
    chrm = snp[0]
    if str(chrm) != str(chrmreq):
        continue
    oldid = snp[1]
    if item % 1000 == 0:
        print str(item) + ' snps updated...'
        sys.stdout.flush()
    chrm = snp[0]
    snpregion = snp[0] + ':' + snp[3] + ':' + snp[3]
    filters = robjects.Vector([snpregion, True, chrm, "dbSNP"])
    filters.names = [
        "chromosomal_region", "with_validated", "chr_name", "variation_source"
    ]
    snprecords = biomart.getBM(attributes, filters=filters, mart=snpmart)
    #snprecords = snpper(snp[0] + ':' + snp[3] + ':' + snp[3],snp[0])
    if len(snprecords[0]) == 0:
        print >> nameupdates, oldid + '\tncgi' + snp[1]
        continue
    counter = range(len(snprecords[0]))
    counterupdate = []
    for x, y in enumerate(counter):
        if snprecords[5][y] == 1:
            counterupdate.append(x)
    if len(counterupdate) == 0:
        print >> nameupdates, oldid + '\tmcgi' + snp[1]
Beispiel #25
0
    def construct_rule_list(self,
                            train_df,
                            label_col,
                            tree_constructors,
                            nr_bootstraps=3):
        """ Construct an `constructors.inTrees.OrderedRuleList` from an ensemble of decision trees

        **Params**
        ----------
          - `train_df` (pandas DataFrame) - the training data

          - `label_col` (string) - the column identifier for the class labels

          - `tree_constructors` (`constructors.treeconstructor.TreeConstructor`) - the decision tree induction algorithms used to create an ensemble with

          - `nr_bootstraps` (pandas DataFrame) - how many times do we apply bootstrapping for each TreeConstructor? The size of the ensemble will be equal to
          |tree_constructors|*nr_bootstraps

        **Returns**
        -----------
            an OrderedRuleList
        """
        y_train = train_df[label_col]
        X_train = train_df.copy()
        X_train = X_train.drop(label_col, axis=1)

        importr('randomForest')
        importr('inTrees')

        ro.globalenv["X"] = com.convert_to_r_dataframe(X_train)
        ro.globalenv["target"] = ro.FactorVector(y_train.values.tolist())

        feature_mapping = {}
        feature_mapping_reverse = {}
        for i, feature in enumerate(X_train.columns):
            feature_mapping[feature] = i + 1
            feature_mapping_reverse[i + 1] = feature

        treeList = []
        for tree in ensemble.bootstrap(train_df,
                                       label_col,
                                       tree_constructors,
                                       nr_classifiers=nr_bootstraps):
            if tree.count_nodes() > 1:
                treeList.append(self._tree_to_R_object(tree, feature_mapping))

        ro.globalenv["treeList"] = ro.Vector(
            [len(treeList), ro.Vector(treeList)])
        ro.r('names(treeList) <- c("ntree", "list")')

        rules = ro.r(
            'buildLearner(getRuleMetric(extractRules(treeList, X), X, target), X, target)'
        )
        rules = list(rules)
        conditions = rules[int(0.6 * len(rules)):int(0.8 * len(rules))]
        predictions = rules[int(0.8 * len(rules)):]

        # Create a OrderedRuleList
        rulesets = []
        for idx, (condition,
                  prediction) in enumerate(zip(conditions, predictions)):
            # Split each condition in Rules to form a RuleSet
            rulelist = []
            condition_split = [
                x.lstrip().rstrip() for x in condition.split('&')
            ]
            for rule in condition_split:
                feature = feature_mapping_reverse[int(
                    re.findall(r',[0-9]+]', rule)[0][1:-1])]

                lte = re.findall(r'<=', rule)
                gt = re.findall(r'>', rule)
                eq = re.findall(r'==', rule)
                cond = lte[0] if len(lte) else (gt[0] if len(gt) else eq[0])

                extract_value = re.findall(r'[=>]-?[0-9\.]+', rule)
                if len(extract_value):
                    value = float(re.findall(r'[=>]-?[0-9\.]+', rule)[0][1:])
                else:
                    feature = 'True'
                    value = None

                rulelist.append(Condition(feature, cond, value))
            rulesets.append(Rule(idx, rulelist, prediction))

        return OrderedRuleList(rulesets)
Beispiel #26
0
    elif is_str and (desired_type is None or desired_type == str):
        res = [str(elt) for elt in v_list]
        return robjects.StrVector(res)

    if desired_type is not None:
        raise TypeException("Cannot coerce vector to type '%s'" % desired_type)
    return robjects.RVector(v_list)


def vector_conv(v, desired_type=None):
    v_list = literal_eval(v)
    return create_vector(v_list, desired_type)


RVector = new_constant('RVector', staticmethod(vector_conv),
                       robjects.Vector([]),
                       staticmethod(lambda x: isinstance(x, robjects.Vector)))


def bool_vector_conv(v):
    return vector_conv(v, bool)


RBoolVector = new_constant(
    'RBoolVector',
    staticmethod(bool_vector_conv),
    robjects.BoolVector([]),
    staticmethod(lambda x: isinstance(x, robjects.Vector)),
    base_class=RVector)

Beispiel #27
0
def granger_causality(data,
                      cols,
                      y_var,
                      lags,
                      our_type,
                      list_subcausalities=False):
    y_subset = data[y_var]
    pandas2ri.activate()
    data = pandas2ri.py2ri(data)

    ### We define the functions;
    robjects.r('''
      is.installed <- function(mypkg){
        is.element(mypkg, installed.packages()[,1])
      } 

      # check if package "gtools" is installed
      if (!is.installed("gtools")){
        install.packages("gtools", INSTALL_opts = '--no-lock', repos='https://cloud.r-project.org')
      }
      if (!is.installed("vars")){
        install.packages("vars", INSTALL_opts = '--no-lock', repos='https://cloud.r-project.org')
      }      
         library("gtools")
         library("vars")
         for (k in .libPaths()){
           k <- paste0(k,"/00LOCK")
           unlink(k, recursive = TRUE)
         }
         get_p_value <- function(data,lags,y_values,causes,our_type){
         data <- as.data.frame(data)
         mycols <- c(as.character(unlist(causes)))
         mydata <- data[c(as.character(unlist(causes)))]
         mydata <- as.data.frame(mydata)
         mydata <- cbind(Temperatures = y_values,mydata)
         var.2c <- VAR(mydata, p = lags, type = our_type) ### In this case, we are using trended Granger causality
         my_vcov <- vcovHC(var.2c)
         mycause <- causality(var.2c, cause = mycols)
         return(c(mycause$Granger$p.value))
    }

    permuts <- function(data,order,y,columns,our_type){
      list_perms <- do.call("c", lapply(seq_along(columns), function(i) combn(columns, i, FUN = list)))
      d <- data.frame(x = NA, y = 1:length(list_perms))
      i <- 1
      columns <- unlist(columns)
      while (i<=length(list_perms)){
        myp <- get_p_value(data,order,y,list_perms[i][[1]],our_type = our_type)
        d[i,] <- c(toString(unlist(list_perms[i][[1]])),as.numeric(myp))
        i <- i + 1
      }
      colnames(d) <- c("Sets of variables","p-value")
      d$`p-value` <- as.numeric(d$`p-value`)
      return(d)
      #return(.libPaths())
      #return(unlist(list_perms[i-1][[1]]))
        }
            ''')

    r_f = robjects.globalenv['get_p_value']
    permuts = robjects.globalenv['permuts']
    robjects.r.library("vars")
    our_causes = robjects.r('as.data.frame')(cols)
    if list_subcausalities == True:
        mydf = permuts(data, lags, robjects.Vector(y_subset), our_causes,
                       our_type)
        return (mydf)

    return (r_f(data, lags, robjects.Vector(y_subset), our_causes, our_type))
Beispiel #28
0
 def testExtractRecyclingRule(self):
     # recycling rule
     v = robjects.Vector(array.array('i', range(1, 23)))
     m = robjects.r.matrix(v, ncol=2)
     col = m.rx(True, 1)
     self.assertEqual(11, len(col))
Beispiel #29
0
 def testCallClosureWithRObject(self):
     ri_f = rinterface.baseenv["sum"]
     ro_vec = robjects.Vector(array.array('i', [1, 2, 3]))
     res = ri_f(ro_vec)
     self.assertEquals(6, res[0])