sigma = [0.2] SigmaErrors = [] MaxErrors = [] traces = [] times = [] choice = [0,1] j=0 e = [] file = 'residual.txt' s = 1* size**(-1/5) R1 = RKHS(X1,kparms=[s]) R2 = RKHS(Y1,kparms=[s]) readfile = open("residual.txt","r") e = readfile.read().splitlines() for i in range(len(e)): e[i] = float(e[i]) readfile.close() if len(e) == 0: t1 = time.time() e = residual(R1,R2) filename = open("residual.txt","w") np.savetxt(filename,e) filename.close() t2 = time.time()
rtime1 = time.time() if choice == 1: FilterData, parentProb, finalQuery = P.filter([('X', conditional)]) else: FilterData, parentProb, finalQuery = P.filter([('Y', conditional)]) X = FilterData['X'] Y = FilterData['Y'] Z = FilterData['Z'] filterlen = len(Z) s = 0.2 #sigma if choice == 0: r1 = RKHS(X, kparms=[s]) else: r1 = RKHS(Y, kparms=[s]) r2 = RKHS(Z, kparms=[s]) rtime2 = time.time() Rtime = rtime2 - rtime1 Ptime = 0 testPoints = [] if choice == 1: testMin = -10 testMax = 10 else: testMin = -6
path = '../models/Cprobdata.csv' d = getData.DataReader(path) data = d.read() X1 = data['X'] Y1 = data['Y'] Datasize = 10000 Featsize = 10 Featsize2 = int(Datasize / 10) X = np.reshape(X1[:Datasize], (Datasize, 1)) Y = np.reshape(Y1[:Datasize], (Datasize, 1)) s = 0.2 #sigma value for RKHS r1 = RKHS(X1, kparms=[s]) r2 = RKHS(Y1, kparms=[s]) R = RFFGaussianProcessRegressor(rff_dim=Featsize, sigma=0.2) R.fit(X, Y) #Z,W,b = get_rffs(X,Featsize) W = np.random.normal(loc=0, scale=1, size=(Featsize, 1)) b = np.random.uniform(0, 2 * np.pi, size=Featsize) print(shape(W)) print(W) # print("x=-1, y=",rff(2,W,X,Y,Datasize)) testPoints = [] tp = testMin
testMin = -5 testMax = 5 tp = testMin numTP = 200 # Number of test points for graphing interval = (testMax - testMin) / numTP X2 = [] # Resampled X stdx = np.std(X) # Std Dev of X # Generate a uniform range of test points, from testMin to testMax. for i in range(numTP + 1): tps.append(tp) tp += interval sigma = 1 / log(len(X), 4) # Heuristic value for Sigma in the Gaussian kernel. delta = 3 # Ignore points more than delta from the mean. Optimization. # RKHS for the cdf function. rcdf.F(x) = cdf(x) rcdf = RKHS(X, f=Fcdf, k=kcdf, kparms=[sigma, delta]) # Uniformly sample the cdf to generate a series of p -> x mappings U = np.random.uniform(-5, 5, 1000) p2x = [] for i in range(len(U)): x = U[i] p = rcdf.F(x) p2x.append((p, x)) p2x.sort() # Function to retrieve the interpolated mapping from p2x. icdf(p) = quantile(p) def icdf(p): x = p2x[-1][1] prevT = p2x[0] for j in range(len(p2x)): t = p2x[j]
#Probpy Calculation t1 = time.time() ps = ProbSpace(data) tp = testMin for i in range(numTP + 1): p = ps.distr('Y', [('X', tp)]).E() probpy.append(p) tp += interval t2 = time.time() Probtime = t2 - t1 #RKHS Calculation - E(Y|X=x) t1 = time.time() r1 = RKHS(X1, kparms=[sigma]) r2 = RKHS(Y1, kparms=[sigma]) tp = testMin for i in range(numTP + 1): r = m(tp, r1, r2) rkhsEX.append(r) tp += interval t2 = time.time() RKHSEXtime = t2 - t1 #RKHS Calculation - P(Y=y|X=x) t1 = time.time() r1 = RKHS(X1, kparms=[sigma]) r2 = RKHS(Y1, kparms=[sigma]) tp = testMin
Featsize = 100 #Reshaping datset necessary for the RFF method X = np.reshape(X1[:Datasize], (Datasize, 1)) Y = np.reshape(Y1[:Datasize], (Datasize, 1)) #Declare time-taken variables Trkhs = 0 Trff = 0 Tprob = 0 s = 0.2 #sigma value for RKHS t1 = time.time() r1 = RKHS(X1[:Datasize], kparms=[s]) r2 = RKHS(Y1[:Datasize], kparms=[s]) t2 = time.time() Trkhs += (t2 - t1) t1 = time.time() P = ProbSpace(data) t2 = time.time() Tprob += (t2 - t1) testPoints = [] tp = testMin numTP = 200 interval = (testMax - testMin) / numTP sq = [] Probpy = []
testMax = 5 tp = testMin numTP = 1000 interval = (testMax - testMin) / numTP for i in range(numTP + 1): testPoints.append(tp) tp += interval tfp = testF(tp) tfs.append(tfp) tp += interval # Create chart traces of F(x) with various sigmas start = time.time() delta = 3 #delta = None for sigma in sigmas: r = RKHS(X, kparms=[sigma, delta]) fs = [] # The results of F(x) for each test point totalErr = 0 for i in range(len(testPoints)): p = testPoints[i] fp = r.F(p) fs.append(fp) tfp = tfs[i] err = abs(fp - tfp) # F(x) - idealized cdf value totalErr += err errs[sigma] = totalErr / numTP traces.append(fs) end = time.time() print('elapsed time = ', end - start) print('total errors = ', errs) #plt.plot(testPoints, ctfs, label='testF(x)', color='#000000', lineWidth=3)
# Generate a uniform range of test points. # While at it, generate our expected pdf and cdf for i in range(numTP + 1): testPoints.append(tp) tfp = testF(tp) tfs.append(tfp) ctfp = testFCDF(tp) ctfs.append(ctfp) tp += interval delta = None start = time.time() evals = 0 for size in dataSizes: # Choose a reasonable sigma based on data size. #sigma = 1 / log(size, 4) r1 = RKHS(X[:size],k = ksaw, f = fsaw, kparms=[2, delta]) #r2 = RKHS(X[:size], k=kcdf, f=Fcdf, kparms=[sigma, delta]) fs = [] # The results using a pdf kernel fsc = [] # Results using a cdf kernel totalErr = 0 deviations = [] for i in range(len(testPoints)): p = testPoints[i] fp = r1.F(p) evals += 1 fs.append(fp) fc = 0 fc = r2.F(p) evals += 1 fsc.append(fc)
[1, 2, 3, 4, 5, 6, 7], [2, 3, 4, 5], [1, 2, 3, 3, 3, 3, 3]] testPoints = [] testMin = -3 testMax = 10 tp = testMin numTP = 200 interval = (testMax - testMin) / numTP # Generate a uniform range of test points. for i in range(numTP + 1): testPoints.append(tp) tp += interval sigma = 1.0 traces = [] for j in range(len(FuncAddr)): # Choose a reasonable sigma based on data size. r1 = RKHS(FuncAddr[j], kparms=[sigma]) #r1 = RKHS(X[:size], kparms = [1], k=ksaw) fs = [] # The results using a pdf kernel totalErr = 0 deviations = [] for j in range(len(testPoints)): p = testPoints[j] fp = r1.F(p) fs.append(fp) traces.append(fs) # pdf trace for t in range(len(traces)): fs = traces[t] label = 'FuncAddr =' + str(FuncAddr[t]) plt.plot(testPoints, fs, label=label, linestyle='solid') plt.legend() plt.show()