Ejemplo n.º 1
0
    prob = ProbSpace(data)

    N = prob.N
    vars = prob.fieldList
    cond = []
    # Get the conditional variables
    for i in range(len(vars)):
        var = vars[i]
        if var[0] != 'A':
            cond.append(var)
    # There is a target: 'A<dims>' for each conditional dimension.  So for 3D (2 conditionals),
    # we would use A3.
    target = 'A' + str(dims)

    smoothness=1.0
    R1 = RKHS(prob.ds, delta=None, includeVars=[target] + cond[:dims-1], s=smoothness)
    R2 = RKHS(prob.ds, delta=None, includeVars=cond[:dims-1], s=smoothness)
    evaluations = 0
    start = time.time()
    results = []
    totalErr_jp = 0
    totalErr_ps = 0
    conds = len(cond)
    tps = []
    evaluations = 0
    means = [prob.E(c) for c in cond]
    stds = [prob.distr(c).stDev() for c in cond]
    minvs = [means[i] - stds[i] * lim for i in range(len(means))]
    maxvs = [means[i] + stds[i] * lim for i in range(len(means))]

    # Generate the test points
Ejemplo n.º 2
0
    # Get the conditional variables
    for i in range(len(vars)):
        var = vars[i]
        if var[0] != 'A':
            cond.append(var)

    target = 'A'

    amean = prob.E(target)
    astd = prob.distr(target).stDev()
    amin = amean - lim * astd
    arange = lim * astd - lim * -astd
    aincr = arange / (evalpts - 1)
    #print('A: mean, std, range, incr = ', amean, astd, arange, aincr)
    R1 = RKHS(prob.ds,
              delta=None,
              includeVars=[target] + cond[:dims - 1],
              s=smoothness)
    R2 = RKHS(prob.ds, delta=None, includeVars=cond[:dims - 1], s=smoothness)
    evaluations = 0
    start = time.time()
    results = []
    totalErr_jp = 0
    totalErr_ps = 0
    conds = len(cond)
    tps = []
    numTests = numPts**(dims - 1)
    evaluations = 0
    means = [prob.E(c) for c in cond]
    stds = [prob.distr(c).stDev() for c in cond]
    minvs = [means[i] - stds[i] * lim for i in range(len(means))]
    incrs = [(std * lim - std * -lim) / (numPts - 1) for std in stds]
Ejemplo n.º 3
0
    sdg = synthDataGen.run(test, datSize)
    d = getData.DataReader(datFileName)
    data = d.read()

    prob = ProbSpace(data)

    lim = 3  # Std's from the mean to test conditionals
    numPts = 30  # How many eval points for each conditional
    print('Test Limit = ', lim, 'standard deviations from mean')
    print('Dimensions = ', dims, '.  Conditionals = ', dims - 1)
    print('Number of points to test for each conditional = ', numPts)
    N = prob.N
    cond = 'B'

    target = 'A2'
    R1 = RKHS(prob.ds, delta=None, includeVars=[target, cond], s=smoothness)

    # Do some general assessment of cumulative probabilities

    # Do some univariate CDF calculations.  F is normal(0,1)
    R0_1 = RKHS(prob.ds, includeVars=['F'], s=smoothness)
    for v in [-4, -3, -2, -1, 0, 1, 2, 3, 4]:
        psR = prob.P(('F', None, v))
        print('cdf(', v, ') = ', R0_1.CDF(v), psR)
    # Now some 2-D single conditional evals
    A2std = .3
    for v2 in [-1 * A2std, 0, 1 * A2std]:
        for v1 in [-4, -3, -2, -1, 0, 1, 2, 3, 4]:
            exp = 1 / (1 + e**(-v2 / A2std))
            ps = prob.P(('A2', None, v2 + tanh(v1)), ('B', v1))
            jp = R1.condCDF([v2 + tanh(v1), v1])
Ejemplo n.º 4
0
    # Get the conditional variables
    for i in range(len(vars)):
        var = vars[i]
        if var[0] != 'A':
            cond.append(var)

    target = 'A'

    amean = prob.E(target)
    astd = prob.distr(target).stDev()
    amin = amean - lim * astd
    arange = lim * astd - lim * -astd
    aincr = arange / (evalpts - 1)
    #print('A: mean, std, range, incr = ', amean, astd, arange, aincr)
    R1 = RKHS(prob.ds,
              delta=None,
              includeVars=[target] + cond[:dims - 1],
              s=smoothness)
    R2 = RKHS(prob.ds, delta=None, includeVars=cond[:dims - 1], s=smoothness)
    evaluations = 0
    start = time.time()
    results = []
    totalErr_jp = 0
    totalErr_ps = 0
    conds = len(cond)
    tps = []
    numTests = numPts**(dims - 1)
    evaluations = 0
    means = [prob.E(c) for c in cond]
    stds = [prob.distr(c).stDev() for c in cond]
    minvs = [means[i] - stds[i] * lim for i in range(len(means))]
    incrs = [(std * lim - std * -lim) / (numPts - 1) for std in stds]
Ejemplo n.º 5
0
    def condP(self, Vals,K = None):
        #Vals is a list of (x1,x2....xn) such that P(X1=x1|X2=x2.....), same UI as rkhsmv
        if(K != None):
            self.k = K
        filter_len = floor((len(self.includeVars)-1)*self.k*0.01)
        dims = len(Vals)
        print(dims,filter_len)
        if(self.rangeFactor==0):
            self.rangeFactor = 0.5
        if(filter_len !=0):
            filter_vars = self.includeVars[-filter_len:]
            filter_vals = Vals[-filter_len:]
            include_vars = self.includeVars[:-filter_len]            
            self.minPoints = ceil(self.N**((dims-filter_len)/dims))*self.rangeFactor 
            self.maxPoints = ceil(self.N**((dims-filter_len)/dims))/self.rangeFactor
            

        else:
            filter_vars = []
            filter_vals = []
            include_vars = self.includeVars
        
        # print("filter vars:",filter_vars)
        # print("include vars:",self.includeVars[:-filter_len])
        # print(self.includeVars)
        
        #Calculating R1 
        zDim = floor(self.k * (dims-1)* 0.01)
        minminpoints = 10
        if(filter_len == (len(self.includeVars)-1)):
            for i in range(zDim,0,-1):
                print("runing i=",i)
                filter_len = floor(i*self.k*0.01)
                self.minPoints = ceil(self.N*((dims-i)/dims))*self.rangeFactor 
                self.maxPoints = ceil(self.N*((dims-i)/dims))/self.rangeFactor
                rkhsminpoints = minminpoints*(dims-i)
                if(self.minPoints < rkhsminpoints):
                    print("minpoints < minminpoints",self.minPoints,rkhsminpoints)
                    continue                
                P = ProbSpace(self.data)
                filter_data = []
                for j in range(filter_len):
                    x = (filter_vars[j],filter_vals[j])
                    filter_data.append(x)
                print("filter metrics = ",filter_data)                   
                FilterData, parentProb, finalQuery = P.filter(filter_data,self.minPoints,self.maxPoints)
                if(len(FilterData[self.includeVars[0]])<self.minPoints):
                    print("not enough filter points for filterlen =",i,len(FilterData[self.includeVars[0]]),self.minPoints,self.maxPoints)
                    continue
                print("filter len",filter_len)
                print("filtered datapoints:",len(FilterData['B']))
                print("include vars:",self.includeVars[:-filter_len])
                self.R1 = RKHS(FilterData,includeVars=self.includeVars[:(dims-filter_len)],delta=self.delta,s=self.s)
                self.r1filters = filter_vals            
                return self.R1.P(Vals[:dims-filter_len])
            print("running empty")            
            self.R1 = RKHS(self.data,includeVars=self.includeVars,delta=self.delta,s=self.s)
            p = self.R1.condP(Vals)
            if p>0:
                return p
            else:
                return None    
        
        
        
        elif(filter_len != 0 and self.r1filters != filter_vals):
            P = ProbSpace(self.data)
            filter_data = []
            for i in range(filter_len):
                x = (filter_vars[i],filter_vals[i])
                filter_data.append(x)                    
            FilterData, parentProb, finalQuery = P.filter(filter_data,self.minPoints,self.maxPoints)
            # print("filter len",filter_len)
            # print("filtered datapoints:",len(FilterData['B']))
            # print("include vars:",self.includeVars[:-filter_len])
            self.R1 = RKHS(FilterData,includeVars=self.includeVars[:-filter_len],delta=self.delta,s=self.s)
            self.r1filters = filter_vals

        elif(self.R1==None):
            print("running empty")            
            self.R1 = RKHS(self.data,includeVars=self.includeVars,delta=self.delta,s=self.s)

        elif(self.R1.varNames != include_vars):            
            self.R1 = RKHS(self.data,includeVars=self.includeVars,delta=self.delta,s=self.s)

        if(filter_len != 0):                
            p = self.R1.condP(Vals[:-filter_len])
            if p>0:
                return p
            else:
                return None
        else:
            p = self.R1.condP(Vals)
            if p>0:
                return p
            else:
                return None
Ejemplo n.º 6
0
    def condE(self,target, Vals,K = None):
        #Vals is a list of (x1,x2....xn) such that E(Y|X1=x1,X2=x2.....), same UI as rkhsmv
        if(K == None):
            K = self.k 
        filter_len = floor((len(self.includeVars)-1)*K*0.01)
        #print("filter len",filter_len)
        dims = len(Vals) + 1
        if(self.rangeFactor == None):
            self.rangeFactor = 0.8
        minminpoints = 5
        
        
        if(filter_len !=0):
            filter_vars = self.includeVars[-filter_len:]
            filter_vals = Vals[-filter_len:]
            include_vars = self.includeVars[1:-filter_len]
            self.minPoints = self.N**((dims-filter_len)/dims)*self.rangeFactor
            self.maxPoints = self.N**((dims-filter_len)/dims)/self.rangeFactor
            #print("minpoints,maxpoints=",self.minPoints,self.maxPoints)

        else:
            filter_vars = []
            filter_vals = []       
            include_vars = self.includeVars
        
        #print("filter vars:",filter_vars)
        #print("include vars:",self.includeVars[:-filter_len])
        #print("self:",self.R2.varNames,"cond",self.includeVars[:-filter_len])
                
        
        if(filter_len == (len(self.includeVars)-1) ):
            P = ProbSpace(self.data)
            filter_vars = self.includeVars[1:]
            filter_vals = Vals            
            filter_data = []
            for i in range(filter_len):
                x = (filter_vars[i],filter_vals[i])
                filter_data.append(x)                    
            #print("minpoints,maxpoints:",self.minPoints,self.maxPoints)
            FilterData, parentProb, finalQuery = P.filter(filter_data,self.minPoints,self.maxPoints)
            X = FilterData[self.includeVars[0]]
            if(len(X)<self.minPoints):
                newk = ceil((((K*(dims-1)*0.01)-1)/(dims-1))*100) # update K =100 to K = 80
                #newk = ceil(K - ((filter_len-1)/filter_len)*100) #update k = 100 to K = 20
                print("not enough datapoints, newk=",newk)
                return self.condE(target, Vals, newk)
            #print(len(X))
            if(len(X)!=0):
                return sum(X)/len(X)
            else:
                return 0
                
        
        elif(filter_len != 0 and self.r2filters != filter_vals):
            P = ProbSpace(self.data)
            filter_data = []
            for i in range(filter_len):
                x = (filter_vars[i],filter_vals[i])
                #print(x)
                filter_data.append(x)                    
            FilterData, parentProb, finalQuery = P.filter(filter_data,self.minPoints,self.maxPoints)
            print("filter len",filter_len)
            print("filtered datapoints:",len(FilterData['B']))
            print("include vars:",self.includeVars[:-filter_len])
            X = FilterData[self.includeVars[0]]
            if(len(X)<self.minPoints or len(X)<=minminpoints):
                newk = ceil((((K*(dims-1)*0.01)-1)/(dims-1)) * 100)
                #newk = ceil(((filter_len+1)/filter_len)*K)
                print("not enough datapoints, newk=",newk)
                return self.condE(target, Vals, newk)
            self.R2 = RKHS(FilterData,includeVars=self.includeVars[1:-filter_len],delta=self.delta,s=self.s)
            self.r2filters = filter_vals          
        
        elif(self.R2==None):
            self.R2 = RKHS(self.data,includeVars=self.includeVars[1:],delta=self.delta,s=self.s)

        elif(self.R2.varNames != include_vars):
            self.R2 = RKHS(self.data,includeVars=self.includeVars[1:],delta=self.delta,s=self.s)
        
        if(filter_len !=0):
            return self.R2.condE(target,Vals[:-filter_len])
        else:
            return self.R2.condE(target, Vals)
Ejemplo n.º 7
0
class UPROB:
    def __init__(self,data, includeVars = None, delta = 3, s=1.0,k=50,rangeFactor = None):
        assert type(data) == type({}), "Error -- Data must be in the form of a dictionary varName -> [val1, val2, ... , valN]"
        
        self.data = data                    # data in the form of a dictionary
        self.delta = delta                  # deviation of test points
        self.includeVars = includeVars      # variables to be included from data
        self.s = s                          # smoothness factor
        self.k = k                          # % of variables to be filtered
        self.R1 = None                      # cached rkhsMV class
        self.R2 = None                      # cached rkhsMV class2
        self.r1filters = None               # cached filter values in R1
        self.r2filters = None               # cached filter values in R2
        self.minPoints = None               # min and max points for filteration
        self.maxPoints = None
        if includeVars is None:
            self.varNames = list(data.keys())
            self.D = len(self.varNames)
        else:
            self.varNames = includeVars
            self.D = len(self.varNames)
        self.N = len(data[self.varNames[0]])
        self.rangeFactor = rangeFactor

        #Automatic selction of K. When the datapoints are abundant, K=100 (DPROB) is utilized, else K is reduced to predominantly use JPROB (K=25 for example)

        self.Ndim = self.N**(1/self.D)
        self.tresh = 10
        if(self.D > 4 and self.Ndim < self.tresh):
            self.k = ceil(100/(self.D-1))
            self.rangeFactor = 0.01
            print(self.k, self.rangeFactor)
        else:
            self.k = 100
            self.rangeFactor = 0.5
        



    def condP(self, Vals,K = None):
        #Vals is a list of (x1,x2....xn) such that P(X1=x1|X2=x2.....), same UI as rkhsmv
        if(K != None):
            self.k = K
        filter_len = floor((len(self.includeVars)-1)*self.k*0.01)
        dims = len(Vals)
        print(dims,filter_len)
        if(self.rangeFactor==0):
            self.rangeFactor = 0.5
        if(filter_len !=0):
            filter_vars = self.includeVars[-filter_len:]
            filter_vals = Vals[-filter_len:]
            include_vars = self.includeVars[:-filter_len]            
            self.minPoints = ceil(self.N**((dims-filter_len)/dims))*self.rangeFactor 
            self.maxPoints = ceil(self.N**((dims-filter_len)/dims))/self.rangeFactor
            

        else:
            filter_vars = []
            filter_vals = []
            include_vars = self.includeVars
        
        # print("filter vars:",filter_vars)
        # print("include vars:",self.includeVars[:-filter_len])
        # print(self.includeVars)
        
        #Calculating R1 
        zDim = floor(self.k * (dims-1)* 0.01)
        minminpoints = 10
        if(filter_len == (len(self.includeVars)-1)):
            for i in range(zDim,0,-1):
                print("runing i=",i)
                filter_len = floor(i*self.k*0.01)
                self.minPoints = ceil(self.N*((dims-i)/dims))*self.rangeFactor 
                self.maxPoints = ceil(self.N*((dims-i)/dims))/self.rangeFactor
                rkhsminpoints = minminpoints*(dims-i)
                if(self.minPoints < rkhsminpoints):
                    print("minpoints < minminpoints",self.minPoints,rkhsminpoints)
                    continue                
                P = ProbSpace(self.data)
                filter_data = []
                for j in range(filter_len):
                    x = (filter_vars[j],filter_vals[j])
                    filter_data.append(x)
                print("filter metrics = ",filter_data)                   
                FilterData, parentProb, finalQuery = P.filter(filter_data,self.minPoints,self.maxPoints)
                if(len(FilterData[self.includeVars[0]])<self.minPoints):
                    print("not enough filter points for filterlen =",i,len(FilterData[self.includeVars[0]]),self.minPoints,self.maxPoints)
                    continue
                print("filter len",filter_len)
                print("filtered datapoints:",len(FilterData['B']))
                print("include vars:",self.includeVars[:-filter_len])
                self.R1 = RKHS(FilterData,includeVars=self.includeVars[:(dims-filter_len)],delta=self.delta,s=self.s)
                self.r1filters = filter_vals            
                return self.R1.P(Vals[:dims-filter_len])
            print("running empty")            
            self.R1 = RKHS(self.data,includeVars=self.includeVars,delta=self.delta,s=self.s)
            p = self.R1.condP(Vals)
            if p>0:
                return p
            else:
                return None    
        
        
        
        elif(filter_len != 0 and self.r1filters != filter_vals):
            P = ProbSpace(self.data)
            filter_data = []
            for i in range(filter_len):
                x = (filter_vars[i],filter_vals[i])
                filter_data.append(x)                    
            FilterData, parentProb, finalQuery = P.filter(filter_data,self.minPoints,self.maxPoints)
            # print("filter len",filter_len)
            # print("filtered datapoints:",len(FilterData['B']))
            # print("include vars:",self.includeVars[:-filter_len])
            self.R1 = RKHS(FilterData,includeVars=self.includeVars[:-filter_len],delta=self.delta,s=self.s)
            self.r1filters = filter_vals

        elif(self.R1==None):
            print("running empty")            
            self.R1 = RKHS(self.data,includeVars=self.includeVars,delta=self.delta,s=self.s)

        elif(self.R1.varNames != include_vars):            
            self.R1 = RKHS(self.data,includeVars=self.includeVars,delta=self.delta,s=self.s)

        if(filter_len != 0):                
            p = self.R1.condP(Vals[:-filter_len])
            if p>0:
                return p
            else:
                return None
        else:
            p = self.R1.condP(Vals)
            if p>0:
                return p
            else:
                return None

    def condE(self,target, Vals,K = None):
        #Vals is a list of (x1,x2....xn) such that E(Y|X1=x1,X2=x2.....), same UI as rkhsmv
        if(K == None):
            K = self.k 
        filter_len = floor((len(self.includeVars)-1)*K*0.01)
        #print("filter len",filter_len)
        dims = len(Vals) + 1
        if(self.rangeFactor == None):
            self.rangeFactor = 0.8
        minminpoints = 5
        
        
        if(filter_len !=0):
            filter_vars = self.includeVars[-filter_len:]
            filter_vals = Vals[-filter_len:]
            include_vars = self.includeVars[1:-filter_len]
            self.minPoints = self.N**((dims-filter_len)/dims)*self.rangeFactor
            self.maxPoints = self.N**((dims-filter_len)/dims)/self.rangeFactor
            #print("minpoints,maxpoints=",self.minPoints,self.maxPoints)

        else:
            filter_vars = []
            filter_vals = []       
            include_vars = self.includeVars
        
        #print("filter vars:",filter_vars)
        #print("include vars:",self.includeVars[:-filter_len])
        #print("self:",self.R2.varNames,"cond",self.includeVars[:-filter_len])
                
        
        if(filter_len == (len(self.includeVars)-1) ):
            P = ProbSpace(self.data)
            filter_vars = self.includeVars[1:]
            filter_vals = Vals            
            filter_data = []
            for i in range(filter_len):
                x = (filter_vars[i],filter_vals[i])
                filter_data.append(x)                    
            #print("minpoints,maxpoints:",self.minPoints,self.maxPoints)
            FilterData, parentProb, finalQuery = P.filter(filter_data,self.minPoints,self.maxPoints)
            X = FilterData[self.includeVars[0]]
            if(len(X)<self.minPoints):
                newk = ceil((((K*(dims-1)*0.01)-1)/(dims-1))*100) # update K =100 to K = 80
                #newk = ceil(K - ((filter_len-1)/filter_len)*100) #update k = 100 to K = 20
                print("not enough datapoints, newk=",newk)
                return self.condE(target, Vals, newk)
            #print(len(X))
            if(len(X)!=0):
                return sum(X)/len(X)
            else:
                return 0
                
        
        elif(filter_len != 0 and self.r2filters != filter_vals):
            P = ProbSpace(self.data)
            filter_data = []
            for i in range(filter_len):
                x = (filter_vars[i],filter_vals[i])
                #print(x)
                filter_data.append(x)                    
            FilterData, parentProb, finalQuery = P.filter(filter_data,self.minPoints,self.maxPoints)
            print("filter len",filter_len)
            print("filtered datapoints:",len(FilterData['B']))
            print("include vars:",self.includeVars[:-filter_len])
            X = FilterData[self.includeVars[0]]
            if(len(X)<self.minPoints or len(X)<=minminpoints):
                newk = ceil((((K*(dims-1)*0.01)-1)/(dims-1)) * 100)
                #newk = ceil(((filter_len+1)/filter_len)*K)
                print("not enough datapoints, newk=",newk)
                return self.condE(target, Vals, newk)
            self.R2 = RKHS(FilterData,includeVars=self.includeVars[1:-filter_len],delta=self.delta,s=self.s)
            self.r2filters = filter_vals          
        
        elif(self.R2==None):
            self.R2 = RKHS(self.data,includeVars=self.includeVars[1:],delta=self.delta,s=self.s)

        elif(self.R2.varNames != include_vars):
            self.R2 = RKHS(self.data,includeVars=self.includeVars[1:],delta=self.delta,s=self.s)
        
        if(filter_len !=0):
            return self.R2.condE(target,Vals[:-filter_len])
        else:
            return self.R2.condE(target, Vals)
Ejemplo n.º 8
0
def F(r,
      x=0,
      dat=None):  # Evaluation function, takes x2 and sigma of K() as parameter
    sum = 0.0
    for i in dat:
        val = r.K(i, x)
        sum += val
    return sum / len(dat)


path = '../models/Mdist.csv'
d = getData.DataReader(path)
data = d.read()
dat = data['X']
s = 1.3
r = RKHS(data, ['X'], s=1.3)
max = 5
min = -5
num = 200
ker = []
xax = []
gdf = []

step = (max - min) / num
#print(step)
for i in np.arange(min, max + step, step):
    ker.append(r.P(i))
    xax.append(i)
    gdf.append(pdf(i))

# plt.plot(xax,ker,label = 'F(X),sigma='+str(sigma))
Ejemplo n.º 9
0
        var = vars[i]
        if var[0] != 'A':
            cond.append(var)
    # There is a target: 'A<dims>' for each conditional dimension.  So for 3D (2 conditionals),
    # we would use A3.
    target = 'A' + str(dims)

    amean = prob.E(target)
    astd = prob.distr(target).stDev()
    amin = amean - lim * astd
    arange = lim * astd - lim * -astd
    aincr = arange / (evalpts - 1)
    #print('A: mean, std, range, incr = ', amean, astd, arange, aincr)
    smoothness = 1.0
    R1 = RKHS(prob.ds,
              delta=None,
              includeVars=[target] + cond[:dims - 1],
              s=smoothness)
    R2 = RKHS(prob.ds, delta=None, includeVars=cond[:dims - 1], s=smoothness)

    U = UPROB(prob.ds,
              includeVars=[target] + cond[:dims - 1],
              k=25,
              rangeFactor=RF)

    evaluations = 0
    start = time.time()
    results = []
    totalErr_jp = 0
    totalErr_up = 0
    totalErr_ps = 0
    conds = len(cond)