def NaiveDecomposableGaussianORFF(X, A, gamma=1., D=100, eps=1e-5, random_state=0): r"""Return the Naive ORFF map associated with the data X. Parameters ---------- X : {array-like}, shape = [n_samples, n_features] Samples. A : {array-like}, shape = [n_targets, n_targets] Operator of the Decomposable kernel (positive semi-definite) gamma : {float}, Gamma parameter of the RBF kernel. D : {integer}, Number of random features. eps : {float}, Cutoff threshold for the singular values of A. random_state : {integer}, Seed of the generator. Returns ------- \tilde{\Phi}(X) : array """ # Decompose A=BB^T u, s, v = svd(A, full_matrices=False, compute_uv=True) B = dot(diag(sqrt(s[s > eps])), v[s > eps, :]) # Sample a RFF from the scalar Gaussian kernel phi_s = RBFSampler(gamma=gamma, n_components=D, random_state=random_state) phiX = phi_s.fit_transform(X) # Create the ORFF linear operator return matrix(kron(phiX, B))
def NaiveCurlFreeGaussianORFF(X, gamma=1., D=100, eps=1e-5, random_state=0): r"""Return the Naive ORFF map associated with the data X. Parameters ---------- X : {array-like}, shape = [n_samples, n_features] Samples. gamma : {float}, Gamma parameter of the RBF kernel. D : {integer}, Number of random features. eps : {float}, Cutoff threshold for the singular values of A. random_state : {integer}, Seed of the generator. Returns ------- \tilde{\Phi}(X) : array """ phi_s = RBFSampler(gamma=gamma, n_components=D, random_state=random_state) phiX = phi_s.fit_transform(X) phiX = (phiX.reshape((phiX.shape[0], 1, phiX.shape[1])) * phi_s.random_weights_.reshape((1, -1, phiX.shape[1]))) return matrix(phiX.reshape((-1, phiX.shape[2])))
def test_rbf_sampler(): """test that RBFSampler approximates kernel on random data""" # compute exact kernel gamma = 10. kernel = rbf_kernel(X, Y, gamma=gamma) # approximate kernel mapping rbf_transform = RBFSampler(gamma=gamma, n_components=1000, random_state=42) X_trans = rbf_transform.fit_transform(X) Y_trans = rbf_transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) assert_array_almost_equal(kernel, kernel_approx, 1)
def test_rbf_sampler(): # test that RBFSampler approximates kernel on random data # compute exact kernel gamma = 10. kernel = rbf_kernel(X, Y, gamma=gamma) # approximate kernel mapping rbf_transform = RBFSampler(gamma=gamma, n_components=1000, random_state=42) X_trans = rbf_transform.fit_transform(X) Y_trans = rbf_transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) error = kernel - kernel_approx assert_less_equal(np.abs(np.mean(error)), 0.01) # close to unbiased np.abs(error, out=error) assert_less_equal(np.max(error), 0.1) # nothing too far off assert_less_equal(np.mean(error), 0.05) # mean is fairly close
def trainSGD(self): sgd = SGDClassifier( loss=self.loss, penalty=self.reg, alpha=self.alpha, n_iter=self.epochs, shuffle=True, n_jobs=self.multicpu, class_weight="auto", ) # print "Classifier (sklearn SGD): training the model \t(%s)"%self.dspath if self.kernel_approx is True: rbf_feature = RBFSampler(gamma=1, n_components=100.0, random_state=1) Xk = rbf_feature.fit_transform(self.X) self.glm = OneVsRestClassifier(sgd).fit(Xk, self.Y) else: self.glm = OneVsRestClassifier(sgd).fit(self.X, self.Y) print "Classifier (sklearn SGD): Done. \t(%s)" % self.dspath
def train_models(X_train, y_train, X_test, y_test): clf = linear_model.SGDClassifier(penalty='elasticnet') print clf print "fitting a linear elasticnet (L1+L2 regularized linear classif.) with SGD" clf = clf.fit(X_train, y_train) print "score on the training set", clf.score(X_train, y_train) print "score on 80/20 split", clf.score(X_test, y_test) rbf_feature = RBFSampler(gamma=1, random_state=1) X_train_feats = rbf_feature.fit_transform(X_train) X_test_feats = rbf_feature.transform(X_test) print "fitting a linear elasticnet with SGD on RBF sampled features" clf = clf.fit(X_train_feats, y_train) print "score on the training set", clf.score(X_train_feats, y_train) print "score on 80/20 split", clf.score(X_test_feats, y_test) clf2 = RandomForestClassifier(max_depth=None, min_samples_split=3) print clf2 print "fitting a random forest" clf2 = clf2.fit(X_train, y_train) print "score on the training set", clf2.score(X_train, y_train) print "score on 80/20 split", clf2.score(X_test, y_test) clf3 = svm.SVC(kernel='linear') print clf3 print "fitting an SVM with a linear kernel" clf3 = clf3.fit(X_train, y_train) print "score on the training set", clf3.score(X_train, y_train) print "score on 80/20 split", clf3.score(X_test, y_test) clf4 = svm.SVC(kernel='rbf') print clf4 print "fitting an SVM with an RBF-kernel" clf4 = clf4.fit(X_train, y_train) print "score on the training set", clf4.score(X_train, y_train) print "score on 80/20 split", clf4.score(X_test, y_test) clf5 = linear_model.LogisticRegression(penalty='l1', tol=0.01) print clf5 print "fitting a logistic regression reg. with L1" clf5 = clf5.fit(X_train, y_train) print "score on the training set", clf5.score(X_train, y_train) print "score on 80/20 split", clf5.score(X_test, y_test)
def __init__(self, X, y, dataset, policy_name, scale=True, n_iter=10, passive=True): seed = RandomState(1234) self.X = np.asarray(X, dtype=np.float64) self.y = np.asarray(y) self.X = StandardScaler().fit_transform(self.X) if scale else self.X self.policy_name = policy_name self.dataset = dataset self.passive = passive # estimate the kernel using the 90th percentile heuristic random_idx = seed.choice(X.shape[0], 1000) distances = pairwise_distances(self.X[random_idx], metric='l1') self.gamma = 1 / np.percentile(distances, 90) transformer = RBFSampler(gamma=self.gamma, random_state=seed, n_components=100) self.X_transformed = transformer.fit_transform(self.X) n_samples = self.X.shape[0] train_size = min(10000, int(0.7 * n_samples)) test_size = min(20000, n_samples - train_size) self.kfold = StratifiedShuffleSplit(self.y, n_iter=n_iter, test_size=test_size, train_size=train_size, random_state=seed)
def EfficientDecomposableGaussianORFF(X, A, gamma=1., D=100, eps=1e-5, random_state=0): r"""Return the Efficient ORFF map associated with the data X. Parameters ---------- X : {array-like}, shape = [n_samples, n_features] Samples. A : {array-like}, shape = [n_targets, n_targets] Operator of the Decomposable kernel (positive semi-definite) gamma : {float}, Gamma parameter of the RBF kernel. D : {integer}, Number of random features. eps : {float}, Cutoff threshold for the singular values of A. random_state : {integer}, Seed of the generator. Returns ------- \tilde{\Phi}(X) : Linear Operator, callable """ # Decompose A=BB^T u, s, v = svd(A, full_matrices=False, compute_uv=True) B = dot(diag(sqrt(s[s > eps])), v[s > eps, :]) # Sample a RFF from the scalar Gaussian kernel phi_s = RBFSampler(gamma=gamma, n_components=D, random_state=random_state) phiX = phi_s.fit_transform(X) # Create the ORFF linear operator cshape = (D, B.shape[0]) rshape = (X.shape[0], B.shape[1]) return LinearOperator((phiX.shape[0] * B.shape[1], D * B.shape[0]), matvec=lambda b: dot(phiX, dot(b.reshape(cshape), B)), rmatvec=lambda r: dot(phiX.T, dot(r.reshape(rshape), B.T)), dtype=float)
def EfficientCurlFreeGaussianORFF(X, gamma=1., D=100, eps=1e-5, random_state=0): r"""Return the Efficient ORFF map associated with the data X. Parameters ---------- X : {array-like}, shape = [n_samples, n_features] Samples. gamma : {float}, Gamma parameter of the RBF kernel. D : {integer}, Number of random features. eps : {float}, Cutoff threshold for the singular values of A. random_state : {integer}, Seed of the generator. Returns ------- \tilde{\Phi}(X) : array """ phi_s = RBFSampler(gamma=gamma, n_components=D, random_state=random_state) phiX = phi_s.fit_transform(X) return LinearOperator((phiX.shape[0] * X.shape[1], phiX.shape[1]), matvec=lambda b: dot(phiX.reshape((phiX.shape[0], 1, phiX.shape[1])) * phi_s.random_weights_.reshape((1, -1, phiX.shape[1])), b), rmatvec=lambda r: dot((phiX.reshape((phiX.shape[0], 1, phiX.shape[1])) * phi_s.random_weights_.reshape((1, -1, phiX.shape [1]))).reshape (phiX.shape[0] * X.shape[1], phiX.shape[1]).T, r), dtype=float)
def __init__(self, name: str, input_size: int, filter_size: int, gamma: float, m: int, R: float, r: int, lr: float): self.name = name self.input_size = input_size self.filter_size = filter_size self.patch_size = filter_size ** 2 self.output_size = self.input_size - self.filter_size + 1 self.n_patchs = self.output_size ** 2 self.m = m self.R = R self.lr = lr self.rbf_feature = RBFSampler(gamma=gamma, n_components=m, random_state=1) self.svd = TruncatedSVD(n_components=r)
def get_orff_map(self, X, D=100, random_state=0): r"""Return the Random Fourier Feature map associated with the data X. .. math:: K_x: Y \mapsto \tilde{\Phi}(X) Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Samples. Returns ------- \tilde{\Phi}(X) : Linear Operator, callable """ self.r = 1 if not hasattr(self, 'Xb_'): self.phi_ = RBFSampler(gamma=self.gamma, n_components=D, random_state=random_state) self.phi_.fit(X) self.Xb_ = self.phi_.transform(X) self.Xb_ = (self.Xb_.reshape((self.Xb_.shape[0], 1, self.Xb_.shape[1])) * self.phi_.random_weights_.reshape((1, -1, self.Xb_.shape[1]))) self.Xb_ = self.Xb_.reshape((-1, self.Xb_.shape[2])) D = self.phi_.n_components if X is self.Xb_: return LinearOperator(self.Xb_.shape, matvec=lambda b: dot(self.Xb_ * b), rmatvec=lambda r: dot(self.Xb_.T * r)) else: Xb = self.phi_.transform(X) # TODO: # w = self.phi_.random_weights_.reshape((1, -1, Xb.shape[1])) # wn = np.linalg.norm(w) # Xb = (Xb.reshape((Xb.shape[0], 1, Xb.shape[1])) * # wn * np.eye()w np.dot(w.T, w) / wn) Xb = Xb.reshape((-1, Xb.shape[2])) return LinearOperator(Xb.shape, matvec=lambda b: dot(Xb, b), rmatvec=lambda r: dot(Xb.T, r))
random_state=42, ngram_range=(2, 4)) # Fit the rbf_sampler with the similarity matrix. column_transformer = make_column_transformer( (similarity_encoder, ['NONPROPRIETARYNAME']), (OneHotEncoder(handle_unknown='ignore'), ['DOSAGEFORMNAME', 'ROUTENAME']), sparse_threshold=1) transformed_categories = column_transformer.fit_transform(X_encoder) # gamma is a parameter of the rbf function, that sets how fast the similarity # between two points should decrease as the distance between them rises. It # is data-specific, and needs to be chosen carefully, for example using # cross-validation. rbf_sampler = RBFSampler(gamma=0.5, n_components=n_out_rbf, random_state=42) rbf_sampler.fit(transformed_categories) def encode(X, y_int, one_hot_encoder, column_transformer, rbf_sampler): X_sim_encoded = column_transformer.transform(X) X_highdim = rbf_sampler.transform(X_sim_encoded.toarray()) y_onehot = one_hot_encoder.transform(y_int.reshape(-1, 1)) return X_highdim, y_onehot # The inputs and labels of the val and test sets have to be pre-processed the # same way the training set was processed:
def run(args): #if __name__=="__main__": # initialize parameters of interest # Method: # 0: linear policy # 1: RBF policy # 2: MLP policy #method = args[0] #RBF_components = args[1] #MLP_neurons = args[2] process_index = args[3] folder_name = args[4] np.random.seed(process_index+100) #process_index = 0 #np.random.seed(process_index + 100) #vel_var = args[5] #num_targets = args[6] method = 0 RBF_components = 20 MLP_neurons = 50 vel_var = .001 num_targets = min(6,max(2,np.random.poisson(3))) num_targets = np.random.randint(2,10) #num_targets = 4 print("Starting Thread:" + str(process_index)) #Initialize all the parameters params ={0:{},1:{},2:{}} if method==0: params[0]["weight2"] = np.random.normal(0, .3, [2, num_states_layer2]) #params[0]["weight2"] = np.array([[ 3.97573312, 0.4639474 , 2.27280486, 12.9085868 , # 3.45722461, 6.36735166], #[-11.87940874, 2.59549414, -5.68556954, 2.87746786, # 7.08059984, 5.5631133 ]]) params[0]["weight"] = np.array([[7.18777985, -13.68815256, 1.69010242, -5.62483187, -4.30451483, 10.09592853], [13.33104057, 13.60537864, 3.46939294, 0.8446329, -14.79733566, -4.78599648]]) #params[0]["weight"] = np.array([[ 1.45702249, -1.17664153, -0.11593174, 1.02967173, -0.25321044, #0.09052774], #[ 0.67730786, 0.3213561 , 0.99580938, -2.39007038, -1.16340594, #-1.77515938]]) elif method==1: featurizer = sklearn.pipeline.FeatureUnion([("rbf1", RBFSampler(gamma=rbf_var, n_components=RBF_components, random_state=1))]) featurizer.fit(np.array(list_of_states)) # Use this featurizer for normalization params[1]["weight"] = np.random.normal(0, 1, [2, RBF_components]) elif method==2: params[2]["weigh1"] = np.random.normal(0, 1, [MLP_neurons, num_states]) params[2]["bias1"] = np.random.normal(0,1,[MLP_neurons,1]) params[2]["weigh2"] = np.random.normal(0, 1, [2, MLP_neurons]) params[2]["bias2"] = np.random.normal(0, 1, [2, 1]) return_saver = [] error_saver = [] episode_counter = 0 weight_saver1 = [] weight_saver2 = [] weight_saver2_1 = [] weight_saver2_2 = [] #for episode_counter in range(0,N_max): #Training parameters avg_reward = [] avg_error = [] var_reward = [] training = True result_folder = base_path+folder_name+"/" reward_file = open(result_folder+"reward_noise:"+str(vel_var)+"_"+str(process_index)+ "_linear_6states.txt","a") error_file = open(result_folder + "error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a") error_file_median = open(result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") var_file = open(result_folder + "var_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a") var_error_file = open(result_folder + "var_error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a") weight_file = open(result_folder + "weight_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a") #flatten initial weight and store the values if method==0: weight = params[0]['weight'] flatted_weights = list(weight[0, :]) + list(weight[1, :]) temp = [] [temp.append(str(x)) for x in flatted_weights] weight_file.write("\t".join(temp)+"\n") elif method==1: weight = params[1]['weight'] flatted_weights = list(weight[0, :]) + list(weight[1, :]) temp = [] [temp.append(str(x)) for x in flatted_weights] weight_file.write("\t".join(temp) + "\n") elif method==2: pass #weight = np.reshape(np.array(weights[0]), [2, 6]) init_max_target = 3 num_targets = init_max_target while episode_counter<N_max: if episode_counter%1000==0 and episode_counter>0: init_max_target +=1 init_max_target = min(20,init_max_target) if episode_counter%100==0 and episode_counter>0: num_targets = np.random.randint(3,init_max_target+1) sigma = gen_learning_rate(episode_counter,sigma_max,.1,5000) sigma = sigma_max discounted_return = np.array([]) discount_vector = np.array([]) #print(episodes_counter) scen = scenario(1,1) bearing_var = 1E-2#variance of bearing measurement #Target information x = 10000*np.random.random([num_targets])-5000#initial x-location y = 10000 * np.random.random([num_targets]) - 5000#initial y-location xdot = 10*np.random.random([num_targets])-5#initial xdot-value ydot = 10 * np.random.random([num_targets]) - 5#initial ydot-value #TEMP #x = [2000,-2000] #y = [2000,2000] #xdot = [1,1] #ydot = [-1,-1] init_target_state = [] init_for_smc = [] for target_counter in range(0,num_targets): init_target_state.append([x[target_counter],y[target_counter],xdot[target_counter],ydot[target_counter]])#initialize target state init_for_smc.append([x[target_counter]+np.random.normal(0,5),y[target_counter] +np.random.normal(0,5),np.random.normal(0,5),np.random.normal(0,5)])#init state for the tracker (tracker doesn't know about the initial state) #temp_loc = np.array(init_target_state[0:2]).reshape(2,1) #init_location_estimate = temp_loc+0*np.random.normal(np.zeros([2,1]),10) #init_location_estimate = [init_location_estimate[0][0],init_location_estimate[1][0]] #init_velocity_estimate = [6*random.random()-3,6*random.random()-3] #init_velocity_estimate = [init_target_state[2],init_target_state[3]] #init_estimate = init_location_estimate+init_velocity_estimate init_covariance = np.diag([MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY])#initial covariance of state estimation t = [] for i in range(0,num_targets): t.append(target(init_target_state[i][0:2], init_target_state[i][2], init_target_state[i][3], vel_var, vel_var, "CONS_V"))#constant-velocity model for target motion A, B = t[0].constant_velocity(1E-10)#Get motion model x_var = t[0].x_var y_var = t[0].y_var tracker_object = [] for i in range(0,num_targets): tracker_object.append(EKF_tracker(init_for_smc[i], np.array(init_covariance), A,B,x_var,y_var,bearing_var))#create tracker object #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc)) #Initialize sensor object if method==0: s = sensor("POLICY_COMM_LINEAR")#create sensor object (stochastic policy) elif method==1: s = sensor("POLICY_COMM_RBF") elif method==2: s = sensor("POLICY_COMM_MLP") measure = measurement(bearing_var)#create measurement object m = [] x_est = []; y_est = []; x_vel_est = []; y_vel_est = [] x_truth = []; y_truth = []; x_vel_truth = []; y_vel_truth = [] uncertainty = [] vel_error = [] pos_error = [] iteration = [] innovation = [] for i in range(0,num_targets): x_truth.append([]) y_truth.append([]) x_vel_truth.append([]) y_vel_truth.append([]) uncertainty.append([]) vel_error.append([]) x_est.append([]) y_est.append([]) x_vel_est.append([]) y_vel_est.append([]) pos_error.append([]) innovation.append([]) reward = [] episode_condition = True n=0 violation = 0 #store required information episode_state = [] episode_state_out_layer = [] episode_MLP_state = [] episode_actions = [] avg_uncertainty= [] max_uncertainty = [] while episode_condition: temp_m = [] input_state_temp = [] for i in range(0,num_targets): t[i].update_location() temp_m.append(measure.generate_bearing(t[i].current_location,s.current_location)) m.append(temp_m) temp_reward = [] target_actions = [] for i in range(0,num_targets): tracker_object[i].update_states(s.current_location, m[-1][i]) normalized_innovation = (tracker_object[i].innovation_list[-1])/tracker_object[i].innovation_var[-1] #print(normalized_innovation) #if (normalized_innovation<1E-4 or n<10) and n<200: #end of episode current_state = list(tracker_object[i].x_k_k.reshape(len(tracker_object[i].x_k_k))) + list(s.current_location) #print(current_state) #state normalization x_slope = 2.0/(scen.x_max-scen.x_min) y_slope = 2.0 / (scen.y_max - scen.y_min) x_slope_sensor = 2.0 / (40000) y_slope_sensor = 2.0 / (40000) vel_slope = 2.0/(scen.vel_max-scen.vel_min) #normalization current_state[0] = -1+x_slope*(current_state[0]-scen.x_min) current_state[1] = -1 + y_slope * (current_state[1] - scen.y_min) current_state[2] = -1 + vel_slope * (current_state[2] - scen.vel_min) current_state[3] = -1 + vel_slope * (current_state[3] - scen.vel_min) current_state[4] = -1 + x_slope * (current_state[4] -scen.x_min) current_state[5] = -1 + y_slope * (current_state[5] - scen.y_min) #Refactor states based on the usage if method==0 or method==2: input_state = current_state input_state_temp.append(input_state) #store input-sates elif method==1: #Generate states for the RBF input input_state = featurizer.transform(np.array(current_state).reshape(1,len(current_state))) input_state = list(input_state[0]) target_actions.append(s.generate_action(params,input_state,.01)) estimate = tracker_object[i].x_k_k episode_state.append(input_state) ####Neeed to get modified if method==2: episode_MLP_state.append(extra_information) #need to get modified truth = t[i].current_location x_est[i].append(estimate[0]) y_est[i].append(estimate[1]) x_vel_est[i].append(estimate[2]) y_vel_est[i].append(estimate[3]) x_truth[i].append(truth[0]) y_truth[i].append(truth[1]) x_vel_truth[i].append(t[i].current_velocity[0]) y_vel_truth[i].append(t[i].current_velocity[1]) vel_error[i].append(np.linalg.norm(estimate[2:4]-np.array([t[i].current_velocity[0],t[i].current_velocity[1]]).reshape(2,1))) pos_error[i].append(np.linalg.norm(estimate[0:2]-np.array(truth).reshape(2,1))) innovation[i].append(normalized_innovation[0]) unormalized_uncertainty = np.sum(tracker_object[i].p_k_k.diagonal()) #if unormalized_uncertainty>MAX_UNCERTAINTY: # normalized_uncertainty = 1 #else: # normalized_uncertainty = (1.0/MAX_UNCERTAINTY)*unormalized_uncertainty uncertainty[i].append((1.0 / MAX_UNCERTAINTY) * unormalized_uncertainty) #if len(uncertainty[i])<window_size+window_lag: # temp_reward.append(0) #else: # current_avg = np.mean(uncertainty[i][-window_size:]) # prev_avg = np.mean(uncertainty[i][-(window_size+window_lag):-window_lag]) # if current_avg<prev_avg or uncertainty[i][-1]<.1: #if current_avg < prev_avg: # temp_reward.append(1) #else: # temp_reward.append(0) this_uncertainty = [] [this_uncertainty.append(uncertainty[x][-1]) for x in range(0, num_targets)] avg_uncertainty.append(np.mean(this_uncertainty)) max_uncertainty.append(np.max(this_uncertainty)) if len(avg_uncertainty) < window_size + window_lag: reward.append(0) else: current_avg = np.mean(avg_uncertainty[-window_size:]) prev_avg = np.mean(avg_uncertainty[-(window_size + window_lag):-window_lag]) if current_avg < prev_avg or avg_uncertainty[-1] < .1: # if current_avg < prev_avg: reward.append(1) else: reward.append(0) #voting #if np.mean(temp_reward)>.5: # reward.append(np.mean(temp_reward)) #else: # reward.append(np.mean(temp_reward)) #if sum(reward)>1100 and num_targets>2: sys.exit(1) #Do something on target_actions #Create feature-vector from generated target actions normalized_state,index_matrix1,index_matrix2,slope = s.update_location_decentralized(target_actions,sigma,params) #Update the sensor location based on all individual actions #index_matrix: an n_s \times T matrix that shows the derivative of state in the output layer to the action space in the internal-layer backpropagated_to_internal_1 = index_matrix1.dot(np.array(input_state_temp))#8 by 6 backpropagated_to_internal_2 = index_matrix2.dot(np.array(input_state_temp))# 8 by 6 episode_state_out_layer.append(normalized_state) episode_state.append([backpropagated_to_internal_1,backpropagated_to_internal_2]) #each entry would be a T \times 6 matrix with T being the number of targets #reward.append(-1*uncertainty[-1]) #update return discount_vector = gamma*np.array(discount_vector) discounted_return+= (1.0*reward[-1])*discount_vector new_return = 1.0*reward[-1] list_discounted_return = list(discounted_return) list_discounted_return.append(new_return) discounted_return = np.array(list_discounted_return) list_discount_vector = list(discount_vector) list_discount_vector.append(1) discount_vector = np.array(list_discount_vector) iteration.append(n) if n>episode_length: break n+=1 #Based on the return from the episode, update parameters of the policy model #Normalize returns by the length of episode #if episode_counter%10==0 and episode_counter>0: print(weight_saver[-1]) prev_params = dict(params) condition = True for i in range(0,num_targets): if np.mean(pos_error[i])>10000: condition = False break episode_condition = False episode_counter-=1 if not condition: #print("OOPSSSS...") continue #if episode_counter%100==0 and training: #print("Starting the evaluation phase...") #training = False #episode_condition = False condition = True if episode_condition and training: normalized_discounted_return = discounted_return episode_actions = s.sensor_actions #init_weight = np.array(weight) rate = gen_learning_rate(episode_counter,learning_rate,1E-12,20000) internal_rate = gen_learning_rate(episode_counter, 3*1E-5, 1E-15, 20000) total_adjustment = np.zeros(np.shape(weight)) for e in range(0,len(episode_actions)): #calculate gradiant #state = np.array(episode_state[e]).reshape(len(episode_state[e]),1) out_state = np.array(episode_state_out_layer[e]).reshape(len(episode_state_out_layer[e]),1) backpropagated_terms = episode_state[e] #calculate gradient if method==0: deriv_with_out_state = (episode_actions[e].reshape(2, 1) - params[0]['weight2'].dot(out_state)).transpose().dot(params[0]['weight2']) #1 by n_s==> derivative of F with respect to the output state-vector internal_gradiant1 = deriv_with_out_state.dot(backpropagated_terms[0]) #1 by 6 internal_gradiant2 = deriv_with_out_state.dot(backpropagated_terms[1]) #1 by 6 internal_gradiant = np.concatenate([internal_gradiant1,internal_gradiant2]) #gradiant = ((episode_actions[e].reshape(2,1)-params[0]['weight'].dot(state)).dot(state.transpose()))/sigma**2#This is the gradiant gradiant_out_layer = ((episode_actions[e].reshape(2, 1) - params[0]['weight2'].dot(out_state)).dot( out_state.transpose())) / sigma ** 2 # This is the gradiant elif method==1: gradiant = ((episode_actions[e].reshape(2, 1) - params[1]['weight'].dot(state)).dot( state.transpose())) / sigma ** 2 # This is the gradiant elif method==2: #Gradient for MLP pass if np.max(np.abs(gradiant_out_layer))>1E2 or np.max(np.abs(internal_gradiant))>1E2: #print("OOPPSSSS...") continue #clip large gradients if method==0: adjustment_term_out_layer = gradiant_out_layer*normalized_discounted_return[e]#an unbiased sample of return adjustment_term_internal_layer = internal_gradiant*normalized_discounted_return[e] params[0]['weight2'] += rate * adjustment_term_out_layer params[0]['weight'] += internal_rate* adjustment_term_internal_layer elif method==1: adjustment_term = gradiant * normalized_discounted_return[e] # an unbiased sample of return params[1]['weight'] += rate * adjustment_term elif method==2: #Gradient for MLP pass #if not condition: # weight = prev_weight # continue episode_counter+=1 flatted_weights1 = list(params[0]['weight'][0, :]) + list(params[0]['weight'][1, :]) flatted_weights2 = list(params[0]['weight2'][0, :]) + list(params[0]['weight2'][1, :]) temp1 = [] [temp1.append(str(x)) for x in flatted_weights1] temp2 = [] [temp2.append(str(x)) for x in flatted_weights2] weight_file.write("\t".join(temp1)+"$$$"+"\t".join(temp2)+"\n") #flatted_weights = list(weight[0, :]) + list(weight[1, :]) #temp = [] #[temp.append(str(x)) for x in flatted_weights] #weight_file.write("\t".join(temp)+"\n") weight_saver1.append(params[0]['weight'][0][0]) weight_saver2.append(params[0]['weight'][1][0]) weight_saver2_1.append(params[0]['weight2'][0][0]) weight_saver2_2.append(params[0]['weight2'][1][0]) else: #print("garbage trajectory: no-update") pass #if not training: return_saver.append(sum(reward)) error_saver.append(np.mean(pos_error)) #print(len(return_saver),n) if episode_counter%100 == 0 and episode_counter>0: # if episode_counter%100==0 and episode_counter>0: print(episode_counter, np.mean(return_saver), sigma) #print(params[method]['weight']) #weight = np.reshape(np.array(weights[episode_counter]), [2, 6]) #print(weight) reward_file.write(str(np.mean(sorted(return_saver,reverse=True)[0:int(.95*len(return_saver))]))+"\n") error_file.write(str(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n") error_file_median.write(str(np.median(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n") var_error_file.write(str(np.var(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n") var_file.write(str(np.var(sorted(return_saver,reverse=True)[0:int(.95*len(return_saver))]))+"\n") #weight_file.write(str(np.mean(return_saver)) + "\n") avg_reward.append(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))])) avg_error.append(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))])) var_reward.append(np.var(return_saver)) reward_file.close() var_file.close() error_file.close() error_file_median.close() var_error_file.close() weight_file.close() reward_file = open( result_folder + "reward_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") error_file = open( result_folder + "error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") var_file = open( result_folder + "var_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") var_error_file = open( result_folder + "var_error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") weight_file = open( result_folder + "weight_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") error_file_median = open( result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a") return_saver = [] error_saver = [] num_episodes.append(n)
import numpy as np import pandas as pd from sklearn.kernel_approximation import RBFSampler from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from xgboost import XGBClassifier # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=11) # Average CV score on the training set was:0.6341268075639599 exported_pipeline = make_pipeline( RBFSampler(gamma=0.9), XGBClassifier(learning_rate=0.01, max_depth=4, min_child_weight=7, n_estimators=100, nthread=1, subsample=0.55)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
def approximate_smooth_weight_kernel_ridge_regression( data, labels, ind_mat, opts): #, ITERS=100, THRESH=1e-6): """ Define a multitask regression problem in which tasks are locally smooth (e.g. bin size prediction), and introduce a penalty in which weights of regressors of related tasks are encouraged to be similar. Instead of optimizing the dual and using explicit kernel matrices, random kitchen sinks style kernel approximations are used. """ from sklearn.metrics import mean_squared_error, r2_score from sklearn.metrics.pairwise import rbf_kernel from sklearn.kernel_approximation import RBFSampler D = opts['APPROXIMATION_DIM'] sm = RBFSampler(gamma=1. / opts['kpar']**2, n_components=D, random_state=666) def retrieve_neigh_norm(W, ind_w, ss): L = W.shape[1] hs = int(np.floor(ss / 2)) if ind_w < hs: W_subs = W[:, 0:(ind_w + hs + 1)] elif ind_w > (L - hs - 1): W_subs = W[:, (ind_w - hs):L] else: W_subs = W[:, (ind_w - hs):(ind_w + hs + 1)] return 1 / (ss) * np.sum(W_subs, axis=1) W = np.random.rand(D, opts['TASKS']) # init stuff k = 0 epsi = np.Inf loss = [] while (epsi > opts['THRESH']) & (k < opts['ITERS']): # schedule = np.random.permutation(range(T)) schedule = range(opts['TASKS']) W_old = W.copy() t_loss = 0 t_norm_w = 0 for ind_w in schedule: X_Y = data.assign(y=labels.iloc[:, ind_w]).copy() trn_X = sm.fit_transform(X_Y.iloc[ind_mat[:, ind_w] == 1, :-1]) trn_Y = X_Y.iloc[ind_mat[:, ind_w] == 1, -1] N = np.sum(ind_mat[:, ind_w] == 1) W_mt_n = retrieve_neigh_norm(W, ind_w, opts['WIN_SIZE']) A = np.dot( trn_X.T, trn_X) + opts['par1'] * np.eye(D) + opts['par2'] * np.eye(D) A = np.linalg.inv(A) B = np.dot(trn_X.T, trn_Y) + opts['par2'] * W_mt_n W[:, ind_w] = np.dot(A, B) t_loss += 1 / N * np.sum( (trn_Y.values - np.dot(trn_X, W[:, ind_w]))** 2) # + np.linalg.norm(W[:,ind_w]) t_norm_w += np.linalg.norm(W[:, ind_w]) emp_err = 1 / opts['TASKS'] * t_loss epsi = np.abs(np.sum(np.sum(W - W_old))) loss.append(emp_err) # print(f'iter {k}, size W {W.shape}, conv {epsi}, emp error {emp_err}, mean norm w {1/opts["TASKS"] * t_norm_w}') k += 1 if k == opts['ITERS']: print( f'converged in iter {k}, size W {W.shape}, conv {epsi}, emp error {emp_err}, mean norm w {1/opts["TASKS"] * t_norm_w}' ) y_hat = np.zeros((opts['DATA_SIZE'][0], opts['TASKS'])) pred_stats = {} pred_stats['tr_RMSE'] = [] #np.inf*np.ones((T)) pred_stats['tr_R2'] = [] #np.inf*np.ones((T)) pred_stats['va_RMSE'] = [] #np.inf*np.ones((T)) pred_stats['va_R2'] = [] #np.inf*np.ones((T)) # if opts['VAL_MODE']: for ind_w in range(opts['TASKS']): X_Y = data.assign(y=labels.iloc[:, ind_w]).copy() trn_X = sm.fit_transform(X_Y.iloc[ind_mat[:, ind_w] == 1, :-1]) trn_Y = X_Y.iloc[ind_mat[:, ind_w] == 1, -1] pred_tr_Y = np.dot(trn_X, W[:, ind_w]) pred_stats['tr_RMSE'].append( np.sqrt(mean_squared_error(trn_Y, pred_tr_Y))) pred_stats['tr_R2'].append(r2_score(trn_Y, pred_tr_Y)) y_hat[ind_mat[:, ind_w] == 1, ind_w] = pred_tr_Y if np.any(ind_mat[:, ind_w] == 2): val_X = sm.fit_transform(X_Y.iloc[ind_mat[:, ind_w] == 2, :-1]) val_Y = X_Y.iloc[ind_mat[:, ind_w] == 2, -1] pred_va_Y = np.dot(val_X, W[:, ind_w]) pred_stats['va_RMSE'].append( np.sqrt(mean_squared_error(val_Y, pred_va_Y))) pred_stats['va_R2'].append(r2_score(val_Y, pred_va_Y)) y_hat[ind_mat[:, ind_w] == 2, ind_w] = pred_va_Y return W, loss, ind_mat, pred_stats, y_hat
def rbk_RFF(X, σ, ɲ=1000): γ = 1.0 / (2 * σ * σ) rff = RBFSampler(gamma=γ, n_components=ɲ, random_state=None) Φₓ = rff.fit_transform(X) Ƙ = Φₓ.dot(Φₓ.T) return Ƙ
def main(): type_of_problem = "" split = 0.3 su_train = [] su_test = [] p = optparse.OptionParser() # take path of training data set p.add_option("--path_train", "-p", default="/afs/cern.ch/user/s/sganju/private/2014_target.csv") # what type of problem is it? regression/classification/clustering/dimensionality reduction p.add_option("--type_of_problem", "-t", default="c") # include cross validation true/false p.add_option("--cross_validation", "-v", default="True") # take the numerical values # p.add_option('--numerical_values', '-n') # specify target column p.add_option("--target", "-y") options, arguments = p.parse_args() num_values = "id cpu creator dataset dbs dtype era naccess nblk nevt nfiles nlumis nrel nsites nusers parent primds proc_evts procds rel1_0 rel1_1 rel1_2 rel1_3 rel1_4 rel1_5 rel1_6 rel1_7 rel2_0 rel2_1 rel2_10 rel2_11 rel2_2 rel2_3 rel2_4 rel2_5 rel2_6 rel2_7 rel2_8 rel2_9 rel3_0 rel3_1 rel3_10 rel3_11 rel3_12 rel3_13 rel3_14 rel3_15 rel3_16 rel3_17 rel3_18 rel3_19 rel3_2 rel3_20 rel3_21 rel3_22 rel3_23 rel3_24 rel3_25 rel3_26 rel3_3 rel3_4 rel3_5 rel3_6 rel3_7 rel3_8 rel3_9 relt_0 relt_1 relt_2 rnaccess rnusers rtotcpu s_0 s_1 s_2 s_3 s_4size tier totcpu wct" num_values = num_values.split() # load from files train = pd.read_csv(options.path_train) # load target values target = train["target"] # TRAINING DATA SET data = train print "Performing imputation." imp = data.dropna().mean() test = data.fillna(imp) data = data.fillna(imp) print "Splitting the training data with %f." % split features_train, features_test, target_train, target_test = train_test_split( data, target, test_size=split, random_state=0 ) print "Generating Model" # diffrentiate on the basis of type of problem # RANDOM FOREST CLASSIFIER rf = RandomForestClassifier(n_estimators=100) rf = rf.fit(features_train, target_train) cal_score("RANDOM FOREST CLASSIFIER", rf, features_test, target_test) # Ada boost clf_ada = AdaBoostClassifier(n_estimators=100) params = { "learning_rate": [0.05, 0.1, 0.2, 0.3, 2, 3, 5], "max_features": [0.25, 0.50, 0.75, 1], "max_depth": [3, 4, 5], } gs = GridSearchCV(clf_ada, params, cv=5, scoring="accuracy", n_jobs=4) clf_ada.fit(features_train, target_train) cal_score("ADABOOST", clf_ada, features_test, target_test) # RANDOM FOREST CLASSIFIER rf = RandomForestClassifier(n_estimators=100) rf = rf.fit(features_train, target_train) cal_score("RANDOM FOREST CLASSIFIER", rf, features_test, target_test) # predictions = rf.predict_proba(test) # Gradient Boosting gb = GradientBoostingClassifier(n_estimators=100, subsample=0.8) params = { "learning_rate": [0.05, 0.1, 0.2, 0.3, 2, 3, 5], "max_features": [0.25, 0.50, 0.75, 1], "max_depth": [3, 4, 5], } gs = GridSearchCV(gb, params, cv=5, scoring="accuracy", n_jobs=4) gs.fit(features_train, target_train) cal_score("GRADIENT BOOSTING", gs, features_test, target_test) rbf_feature = RBFSampler(gamma=1, random_state=1) X_features = rbf_feature.fit_transform(data) # SGD CLASSIFIER clf = SGDClassifier( alpha=0.0001, class_weight=None, epsilon=0.1, eta0=0.0, fit_intercept=True, l1_ratio=0.15, learning_rate="optimal", loss="hinge", n_iter=5, n_jobs=1, penalty="l2", power_t=0.5, random_state=None, shuffle=True, verbose=0, warm_start=False, ) clf.fit(features_train, target_train) cal_score("SGD Regression", clf, features_test, target_test) # KN Classifier neigh = KNeighborsClassifier(n_neighbors=1) neigh.fit(features_train, target_train) cal_score("KN CLASSIFICATION", neigh, features_test, target_test) # predictions = neigh.predict_proba(test) # Decision Tree classifier clf_tree = tree.DecisionTreeClassifier(max_depth=10) clf_tree.fit(features_train, target_train) cal_score("DECISION TREE CLASSIFIER", clf_tree, features_test, target_test)
from sklearn.kernel_approximation import RBFSampler matplotlib.style.use('ggplot') env = gym.envs.make("MountainCar-v0") # Feature Preprocessing: Normalize to zero mean and unit variance# Featu # We use a few samples from the observation space to do this observation_examples = np.array([env.observation_space.sample() for x in range(10000)]) scaler = sklearn.preprocessing.StandardScaler() scaler.fit(observation_examples) # Used to convert a state to a featurized representation. # We use RBF kernels with different variances to cover different parts of the space featurizer = sklearn.pipeline.FeatureUnion([ ("rbf1", RBFSampler(gamma=5.0, n_components=100)), ("rbf2", RBFSampler(gamma=2.0, n_components=100)), ("rbf3", RBFSampler(gamma=1.0, n_components=100)), ("rbf4", RBFSampler(gamma=0.5, n_components=100)) ]) featurizer.fit(scaler.transform(observation_examples)) class Estimator(): """ Value Function approximator. """ def __init__(self): # We create a separate model for each action in the environment's # action space. Alternatively we could somehow encode the action # into the features, but this way it's easier to code up.
elapTimeNys[i] = timeit.default_timer() - startTime XtrainT = kpls.transform(ktrain) XtestT = kpls.transform(ktest) if n==573: kplsScoresNys[:,0] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest) elif n==1073: kplsScoresNys[:,1] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest) elif n==1573: kplsScoresNys[:,2] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest) # RBF sampler method elapTimeRBFS = np.zeros(np.shape(nComponents)) kplsScoresRBFS = np.zeros((2,3)) for i,n in enumerate(nComponents): rbfs = RBFSampler(n_components=n,gamma=gamma) rbfs.fit(Xtrain) ktrain = rbfs.transform(Xtrain) ktest = rbfs.transform(Xtest) startTime = timeit.default_timer() kpls.fit(ktrain,Ytrain) elapTimeRBFS[i] = timeit.default_timer() - startTime XtrainT = kpls.transform(ktrain) XtestT = kpls.transform(ktest) if n==573: kplsScoresRBFS[:,0] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest) elif n==1073: kplsScoresRBFS[:,1] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest) elif n==1573: kplsScoresRBFS[:,2] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)
import sys import os import numpy as np import itertools from sklearn.svm import LinearSVC from sklearn.kernel_approximation import RBFSampler from sklearn.kernel_approximation import AdditiveChi2Sampler pycharm_mode = True N_FEATURES = 400 # Dimension of the original data. BATCH_SIZE = 30000 chi = AdditiveChi2Sampler() chi.fit(np.zeros(N_FEATURES).ravel()) rbf = RBFSampler(gamma=1, random_state=1337, n_components=5500) rbf.fit(np.zeros(1200).ravel()) def transform(x_original): return rbf.transform(chi.transform(x_original)).ravel() def lines(source): for line in source: line = line.strip() (label, x_string) = line.split(" ", 1) label = int(label) x_original = np.fromstring(x_string, sep=' ') yield label, transform(x_original) def main(): if pycharm_mode:
class RBFDivFreeKernel(object): r""" Divergence-free Operator-Valued Kernel of the form: .. math:: X \mapsto K_X(Y) = exp(-\gamma||X-Y||^2)A_{X,Y}, where, .. math:: A_{X,Y} = 2\gamma(X-Y)(X-T)^T+((d-1)-2\gamma||X-Y||^2 I). Attributes ---------- gamma : {float} RBF kernel parameter. References ---------- See also -------- RBFDivFreeKernelMap Divergence-free Kernel map Examples -------- >>> import operalib as ovk >>> import numpy as np >>> X = np.random.randn(100, 2) >>> K = ovk.RBFDivFreeKernel(1.) >>> # The kernel matrix as a linear operator >>> K(X, X) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS <200x200 _CustomLinearOperator with dtype=float64> """ def __init__(self, gamma): """Initialize the Decomposable Operator-Valued Kernel. Parameters ---------- gamma : {float}, shape = [n_targets, n_targets] RBF kernel parameter. """ self.gamma = gamma def get_kernel_map(self, X): r"""Return the kernel map associated with the data X. .. math:: K_x: Y \mapsto K(X, Y) Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Samples. Returns ------- K_x : DecomposableKernelMap, callable .. math:: K_x: Y \mapsto K(X, Y). """ from .kernel_maps import RBFDivFreeKernelMap return RBFDivFreeKernelMap(X, self.gamma) def get_orff_map(self, X, D=100, random_state=0): r"""Return the Random Fourier Feature map associated with the data X. .. math:: K_x: Y \mapsto \tilde{\Phi}(X) Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Samples. Returns ------- \tilde{\Phi}(X) : Linear Operator, callable """ self.r = 1 if not hasattr(self, 'Xb_'): self.phi_ = RBFSampler(gamma=self.gamma, n_components=D, random_state=random_state) self.phi_.fit(X) self.Xb_ = self.phi_.transform(X) self.Xb_ = (self.Xb_.reshape((self.Xb_.shape[0], 1, self.Xb_.shape[1])) * self.phi_.random_weights_.reshape((1, -1, self.Xb_.shape[1]))) self.Xb_ = self.Xb_.reshape((-1, self.Xb_.shape[2])) D = self.phi_.n_components if X is self.Xb_: return LinearOperator(self.Xb_.shape, matvec=lambda b: dot(self.Xb_ * b), rmatvec=lambda r: dot(self.Xb_.T * r)) else: Xb = self.phi_.transform(X) # TODO: # w = self.phi_.random_weights_.reshape((1, -1, Xb.shape[1])) # wn = np.linalg.norm(w) # Xb = (Xb.reshape((Xb.shape[0], 1, Xb.shape[1])) * # wn * np.eye()w np.dot(w.T, w) / wn) Xb = Xb.reshape((-1, Xb.shape[2])) return LinearOperator(Xb.shape, matvec=lambda b: dot(Xb, b), rmatvec=lambda r: dot(Xb.T, r)) def __call__(self, X, Y=None): r"""Return the kernel map associated with the data X. .. math:: K_x: \begin{cases} Y \mapsto K(X, Y) \enskip\text{if } Y \text{is None,} \\ K(X, Y) \enskip\text{otherwise.} \end{cases} Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples1, n_features] Samples. Y : {array-like, sparse matrix}, shape = [n_samples2, n_features], default = None Samples. Returns ------- K_x : DecomposableKernelMap, callable or LinearOperator .. math:: K_x: \begin{cases} Y \mapsto K(X, Y) \enskip\text{if } Y \text{is None,} \\ K(X, Y) \enskip\text{otherwise} \end{cases} """ Kmap = self.get_kernel_map(X) if Y is None: return Kmap else: return Kmap(Y)
def get_orff_map(self, X, D=100, eps=1e-5, random_state=0): r"""Return the Random Fourier Feature map associated with the data X. .. math:: K_x: Y \mapsto \tilde{\Phi}(X) Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Samples. Returns ------- \tilde{\Phi}(X) : Linear Operator, callable """ u, s, v = svd(self.A, full_matrices=False, compute_uv=True) self.B_ = dot(diag(sqrt(s[s > eps])), v[s > eps, :]) self.r = self.B_.shape[0] if (self.scalar_kernel is rbf_kernel) and not hasattr(self, 'Xb_'): if self.scalar_kernel_params is None: gamma = 1. else: gamma = self.scalar_kernel_params['gamma'] self.phi_ = RBFSampler(gamma=gamma, n_components=D, random_state=random_state) self.phi_.fit(X) self.Xb_ = self.phi_.transform(X).astype(X.dtype) elif (self.scalar_kernel is 'skewed_chi2') and not hasattr(self, 'Xb_'): if self.scalar_kernel_params is None: skew = 1. else: skew = self.scalar_kernel_params['skew'] self.phi_ = SkewedChi2Sampler(skewedness=skew, n_components=D, random_state=random_state) self.phi_.fit(X) self.Xb_ = self.phi_.transform(X).astype(X.dtype) elif not hasattr(self, 'Xb_'): raise NotImplementedError('ORFF map for kernel is not ' 'implemented yet') D = self.phi_.n_components if X is self.Xb_: cshape = (D, self.r) rshape = (self.Xb_.shape[0], self.p) oshape = (self.Xb_.shape[0] * self.p, D * self.r) return LinearOperator(oshape, dtype=self.Xb_.dtype, matvec=lambda b: dot(dot(self.Xb_, b.reshape(cshape)), self.B_), rmatvec=lambda r: dot(Xb.T, dot(r.reshape(rshape), self.B_.T))) else: Xb = self.phi_.transform(X) cshape = (D, self.r) rshape = (X.shape[0], self.p) oshape = (Xb.shape[0] * self.p, D * self.r) return LinearOperator(oshape, dtype=self.Xb_.dtype, matvec=lambda b: dot(dot(Xb, b.reshape(cshape)), self.B_), rmatvec=lambda r: dot(Xb.T, dot(r.reshape(rshape), self.B_.T)))
class DecomposableKernel(object): r""" Decomposable Operator-Valued Kernel of the form: .. math:: X, Y \mapsto K(X, Y) = k_s(X, Y) A where A is a symmetric positive semidefinite operator acting on the outputs. Attributes ---------- A : {array, LinearOperator}, shape = [n_targets, n_targets] Linear operator acting on the outputs scalar_kernel : {callable} Callable which associate to the training points X the Gram matrix. scalar_kernel_params : {mapping of string to any} Additional parameters (keyword arguments) for kernel function passed as callable object. References ---------- See also -------- DecomposableKernelMap Decomposable Kernel map Examples -------- >>> import operalib as ovk >>> import numpy as np >>> X = np.random.randn(100, 10) >>> K = ovk.DecomposableKernel(np.eye(2)) >>> # The kernel matrix as a linear operator >>> K(X, X) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS <200x200 _CustomLinearOperator with dtype=float64> """ def __init__(self, A, scalar_kernel=rbf_kernel, scalar_kernel_params=None): """Initialize the Decomposable Operator-Valued Kernel. Parameters ---------- A : {array, LinearOperator}, shape = [n_targets, n_targets] Linear operator acting on the outputs scalar_kernel : {callable} Callable which associate to the training points X the Gram matrix. scalar_kernel_params : {mapping of string to any}, optional Additional parameters (keyword arguments) for kernel function passed as callable object. """ self.A = A self.scalar_kernel = scalar_kernel self.scalar_kernel_params = scalar_kernel_params self.p = A.shape[0] def get_kernel_map(self, X): r"""Return the kernel map associated with the data X. .. math:: K_x: Y \mapsto K(X, Y) Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Samples. Returns ------- K_x : DecomposableKernelMap, callable .. math:: K_x: Y \mapsto K(X, Y). """ from .kernel_maps import DecomposableKernelMap return DecomposableKernelMap(X, self.A, self.scalar_kernel, self.scalar_kernel_params) def get_orff_map(self, X, D=100, eps=1e-5, random_state=0): r"""Return the Random Fourier Feature map associated with the data X. .. math:: K_x: Y \mapsto \tilde{\Phi}(X) Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Samples. Returns ------- \tilde{\Phi}(X) : Linear Operator, callable """ u, s, v = svd(self.A, full_matrices=False, compute_uv=True) self.B_ = dot(diag(sqrt(s[s > eps])), v[s > eps, :]) self.r = self.B_.shape[0] if (self.scalar_kernel is rbf_kernel) and not hasattr(self, 'Xb_'): if self.scalar_kernel_params is None: gamma = 1. else: gamma = self.scalar_kernel_params['gamma'] self.phi_ = RBFSampler(gamma=gamma, n_components=D, random_state=random_state) self.phi_.fit(X) self.Xb_ = self.phi_.transform(X).astype(X.dtype) elif (self.scalar_kernel is 'skewed_chi2') and not hasattr(self, 'Xb_'): if self.scalar_kernel_params is None: skew = 1. else: skew = self.scalar_kernel_params['skew'] self.phi_ = SkewedChi2Sampler(skewedness=skew, n_components=D, random_state=random_state) self.phi_.fit(X) self.Xb_ = self.phi_.transform(X).astype(X.dtype) elif not hasattr(self, 'Xb_'): raise NotImplementedError('ORFF map for kernel is not ' 'implemented yet') D = self.phi_.n_components if X is self.Xb_: cshape = (D, self.r) rshape = (self.Xb_.shape[0], self.p) oshape = (self.Xb_.shape[0] * self.p, D * self.r) return LinearOperator(oshape, dtype=self.Xb_.dtype, matvec=lambda b: dot(dot(self.Xb_, b.reshape(cshape)), self.B_), rmatvec=lambda r: dot(Xb.T, dot(r.reshape(rshape), self.B_.T))) else: Xb = self.phi_.transform(X) cshape = (D, self.r) rshape = (X.shape[0], self.p) oshape = (Xb.shape[0] * self.p, D * self.r) return LinearOperator(oshape, dtype=self.Xb_.dtype, matvec=lambda b: dot(dot(Xb, b.reshape(cshape)), self.B_), rmatvec=lambda r: dot(Xb.T, dot(r.reshape(rshape), self.B_.T))) def __call__(self, X, Y=None): r"""Return the kernel map associated with the data X. .. math:: K_x: \begin{cases} Y \mapsto K(X, Y) \enskip\text{if } Y \text{is None,} \\ K(X, Y) \enskip\text{otherwise.} \end{cases} Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples1, n_features] Samples. Y : {array-like, sparse matrix}, shape = [n_samples2, n_features], default = None Samples. Returns ------- K_x : DecomposableKernelMap, callable or LinearOperator .. math:: K_x: \begin{cases} Y \mapsto K(X, Y) \enskip\text{if } Y \text{is None,} \\ K(X, Y) \enskip\text{otherwise} \end{cases} """ Kmap = self.get_kernel_map(X) if Y is None: return Kmap else: return Kmap(Y)
#X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size, random_state=42) X_test = X[0:nr_test] Y_test = Y[0:nr_test] X_train = X[nr_test+1:len(X)] Y_train = Y[nr_test+1:len(X)] X_train = robust_scaler.fit_transform(X_train) # save standard scaler joblib.dump(robust_scaler, base_path + 'data/rs-' + algorithm + '-' + str(ps[psi]) + '.pkl') X_test = robust_scaler.transform(X_test) if algorithm == 'kernel-approx': rbf_feature = RBFSampler(gamma=1, random_state=1) X_train = rbf_feature.fit_transform(X_train) X_test = rbf_feature.fit_transform(X_test) elif algorithm == 'mlp': n_output = len(set(Y)) #n_output = 2460 n_input = len(X_train[0]) + 1 n_neurons = int(round(sqrt(n_input*n_output))) print "N input" , n_input print "N output" , n_output print "N neurons", n_neurons classifier = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(n_input, n_neurons, n_output), random_state=1) if classifier is not None or exists_be_file is True: if cv is True:
# Create the Gym environment env = gym.make('TurtleBot3ObstacleAvoidance-v1') # Loads parameters from the ROS param server. Parameters are stored in a # .yaml file inside the /config directory. They are loaded at runtime by # the launch file: lr = rospy.get_param("/turtlebot3_obstacle_avoidance_v1/learning_rate") epsilon = rospy.get_param("/turtlebot3_obstacle_avoidance_v1/epsilon") gamma = rospy.get_param("/turtlebot3_obstacle_avoidance_v1/gamma") epsilon_discount = rospy.get_param( "/turtlebot3_obstacle_avoidance_v1/epsilon_discount") min_epsilon = rospy.get_param( "/turtlebot3_obstacle_avoidance_v1/min_epsilon") nepisodes = rospy.get_param("/turtlebot3_obstacle_avoidance_v1/nepisodes") rbf_samplers = [("rbf1", RBFSampler(gamma=0.05, n_components=1000)), ("rbf2", RBFSampler(gamma=0.1, n_components=1000)), ("rbf3", RBFSampler(gamma=0.5, n_components=1000)), ("rbf4", RBFSampler(gamma=1.0, n_components=1000)), ("rbf5", RBFSampler(gamma=2.0, n_components=1000)), ("rbf6", RBFSampler(gamma=5.0, n_components=1000))] observation_examples = np.array( [env.observation_space.sample() for x in range(20000)]) # Initialises Q-Learning qlearn = qlearnRBF.QLearnRBF(n_actions=env.action_space.n, epsilon=epsilon, lr=lr, gamma=gamma, rbf_samplers=rbf_samplers, observation_examples=observation_examples)
elif trans_name == 'pca': from sklearn.decomposition import PCA qt = PCA() elif trans_name == 'nystronem': from sklearn.kernel_approximation import Nystroem qt = Nystroem() elif trans_name == 'kernel_pca': from solnml.components.feature_engineering.transformations.utils import KernelPCA qt = KernelPCA() elif trans_name == 'kitchen_sink': from sklearn.kernel_approximation import RBFSampler qt = RBFSampler() elif trans_name == 'lda': from sklearn.discriminant_analysis import LinearDiscriminantAnalysis qt = LinearDiscriminantAnalysis() else: raise ValueError('Unsupported transformation name: %s!' % trans_name) qt.fit(X, y) print(X.shape) # Case1: transform and split. x1 = qt.transform(X) _, x1_, _, _ = train_valid_split_X(x1, y) # Case2: split and transform.
train_generator = train_datagen.flow_from_directory( train_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode='sparse' ) train_data_n = len(os.listdir(train_data_dir + '/1')) + len(os.listdir(train_data_dir + '/0')) + len(os.listdir(train_data_dir + '/2')) chi_feature = AdditiveChi2Sampler() clf = SGDClassifier(class_weight={0:1.0, 1:1.2, 2:1.0}) classes_ = np.array([0, 1, 2]) rbf_feature = RBFSampler(gamma=4.0, n_components=3000) #rbf_feature = Nystroem(n_components=100, gamma=1.0, random_state=1) """ feature_train_stack = np.zeros((100, 2048)) - 1 label_train_stack = np.zeros((100, 1)) - 1 for i in range(train_data_n // batch_size): #for i in range(2): print("======= data reading! =======") print("batch No." + str(i) ) feature_train, label_train = get_feature_and_label(model, train_generator, i) feature_train = normalize(feature_train) #print(feature_train.shape) #feature_train = rbf_feature.fit_transform(feature_train) #feature_train = chi_feature.fit_transform(feature_train, label_train)
return X_train, X_test, y_train, y_test ESTIMATORS = { "dummy": DummyClassifier(), "CART": DecisionTreeClassifier(), "ExtraTrees": ExtraTreesClassifier(), "RandomForest": RandomForestClassifier(), "Nystroem-SVM": make_pipeline(Nystroem(gamma=0.015, n_components=1000), LinearSVC(C=100)), "SampledRBF-SVM": make_pipeline(RBFSampler(gamma=0.015, n_components=1000), LinearSVC(C=100)), "LogisticRegression-SAG": LogisticRegression(solver="sag", tol=1e-1, C=1e4), "LogisticRegression-SAGA": LogisticRegression(solver="saga", tol=1e-1, C=1e4), "MultilayerPerceptron": MLPClassifier( hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4, solver="sgd", learning_rate_init=0.2, momentum=0.9, verbose=1, tol=1e-4,
import numpy as np import pandas as pd from sklearn.kernel_approximation import RBFSampler from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from xgboost import XGBClassifier # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=2) # Average CV score on the training set was:0.6341119762699294 exported_pipeline = make_pipeline( RBFSampler(gamma=0.1), XGBClassifier(learning_rate=0.001, max_depth=2, min_child_weight=13, n_estimators=100, nthread=1, subsample=0.9500000000000001)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
def initialize(self): """Initializes RBFSampler for the detector""" self.kernel = RBFSampler(gamma=0.5, n_components=20000, random_state=290)
def rbf_kernel(self, matrix, n_components): rbf = RBFSampler(n_components = n_components) print rbf matrix_features = rbf.fit_transform(matrix) return matrix_features
'learning_rate': [.05, .1,.2,.3,2,3, 5], 'max_features': [.25,.50,.75,1], 'max_depth': [3,4,5], } gs = GridSearchCV(gb, params, cv=5, scoring ='accuracy', n_jobs=4) gs.fit(features_train, target_train) #predictions = gs.predict_proba(test) #print predictions cal_score("GRADIENT BOOSTING",gs, features_test, target_test) #sorted(gs.grid_scores_, key = lambda x: x.mean_validation_score) #print gs.best_score_ #print gs.best_params_ #predictions = gs.predict_proba(test) #KERNEL APPROXIMATIONS - RBF rbf_feature = RBFSampler(gamma=1, random_state=1) X_features = rbf_feature.fit_transform(data) #SGD CLASSIFIER clf = SGDClassifier(alpha=0.0001, class_weight=None, epsilon=0.1, eta0=0.0, fit_intercept=True, l1_ratio=0.15, learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1, penalty='l2', power_t=0.5, random_state=None, shuffle=True, verbose=0, warm_start=False) clf.fit(features_train, target_train) cal_score("SGD Regression",clf, features_test, target_test) #KN Classifier neigh = KNeighborsClassifier(n_neighbors = 1) neigh.fit(features_train, target_train) cal_score("KN CLASSIFICATION",neigh, features_test, target_test)
from sklearn.kernel_approximation import RBFSampler from sklearn.linear_model import SGDClassifier import pandas as pd data = pd.read_csv('crop_tsc_balanced_imputed_2015.csv', index_col=None, header=None) X = data.iloc[:, 0:8] y = data.iloc[:, 9] rbf_feature = RBFSampler(gamma=10, random_state=1) X_features = rbf_feature.fit_transform(X) clf = SGDClassifier(max_iter=1000) clf.fit(X_features, y) print(clf.score(X_features, y)) df = pd.DataFrame(X_features) df.insert(100, 100, y) print(df.head()) df.to_csv("crop_tsc_balanced_imputed_rbf_2015.csv", header=False, index=False)
ytest = None classa = [0, 1, 2, 3, 4] num = len(x) - 10000 xtest, ytest, = x[num:], y[num:] x, y = x[:num], y[:num] print x[:10], y[:10] clf = clf.fit(x, y) clf2_RFC = RandomForestClassifier(random_state=0, class_weight=({1:0.25, 2:0.56, 3:0.17, 4:0.02})) clf2_RFC = clf2_RFC.fit(x, y) rbf_feature = RBFSampler(gamma=1, random_state=1) X_features = rbf_feature.fit_transform(x) X_test = rbf_feature.fit_transform(xtest) clfK = linear_model.SGDClassifier() clfK.fit(x, y) print "SGD classifier", clfK.score(xtest, ytest) #DECISION TREEE clft = tree.DecisionTreeClassifier( max_depth= 7) clft.fit(x, y) print "Tree", clft.score(xtest, ytest) #gen image
def __init__(self, state_dim, action_dim, number_of_features): Policy.__init__(self, state_dim, action_dim) self.rbf_feature = RBFSampler(gamma=25., n_components=number_of_features) self.rbf_feature.fit(np.random.randn(action_dim, state_dim))
# Calculation of a Kernel # K(x, y) = exp(-gamma ||x-y||^2) # sigma = sqrt( 1/(2*gamma) ) # gamma = 1/(2*sigma^2) num_of_samples = 14000 X = np.random.random((num_of_samples,5)) sampling_percentage = 0.05 start_time = time.time() RFF = RBFSampler(gamma=1,n_components= int(num_of_samples*sampling_percentage)) V = RFF.fit_transform(X) RFF_estimated_kernel = V.dot(V.T) print("--- RFF Time : %s seconds ---" % (time.time() - start_time)) start_time = time.time() N = Nystroem(gamma=1,n_components= int(num_of_samples*sampling_percentage)) V = N.fit_transform(X) estimated_kernel = V.dot(V.T) print("--- Nystrom Time : %s seconds ---" % (time.time() - start_time)) start_time = time.time()
def compute_approximate_HSIC(X, Y, ncom=100, gamma=[None, None], ntrials=100, random_state=1, sigma_prior=1): """ Using approximations, computes the HSIC score between two different data series. Apprixmations are subsampling for the RBF kernel bandwidth selection and random kitchen sinks to approximate kernels (adn therefore directly using inner products to estimate the cross-covariance operator in the approximate RKHS) :param X, Y: (mutlivariate) data series :param ncom: number of components to use in the random kernel approximation :param gamma: bandwidth for the RBF kernels :param ntrials: number of trials, on which HSIC is averaged :param random_state: set initial random state for reproducibility :param sigma_prior: scaling for the sigmas """ from sklearn.kernel_approximation import RBFSampler import random if random_state is not None: random.seed(random_state) def centering(K): """ center kernel matrix """ n = K.shape[0] unit = np.ones([n, n]) I = np.eye(n) Q = I - unit / n return np.dot(np.dot(Q, K), Q) def rbf(X, sigma=None): """ define RBF kernel + its parameter """ GX = np.dot(X, X.T) KX = np.diag(GX) - GX + (np.diag(GX) - GX).T if sigma is None: mdist = np.median(KX[KX != 0]) sigma = np.sqrt(mdist) KX *= -0.5 / sigma / sigma np.exp(KX, KX) return KX if gamma[0] is None: if X.shape[0] > 1000: yy = np.random.choice(len(X), 1000) x_ = X[yy] del yy else: x_ = X GX = np.dot(x_, x_.T) KX = np.diag(GX) - GX + (np.diag(GX) - GX).T mdist = np.median(KX[KX != 0]) gamma[0] = 1 / (np.sqrt(sigma_prior * mdist)**2) del GX, KX, mdist if gamma[1] is None: if Y.shape[0] > 1000: yy = np.random.choice(len(Y), 1000) y_ = Y[yy] del yy else: y_ = Y GY = np.dot(y_, y_.T) KY = np.diag(GY) - GY + (np.diag(GY) - GY).T mdist = np.median(KY[KY != 0]) gamma[1] = 1 / (np.sqrt(sigma_prior * mdist)**2) del GY, KY, mdist hs_a = 0 rbf_feature_x = RBFSampler(gamma=gamma[0], random_state=random_state, n_components=ncom) rbf_feature_y = RBFSampler(gamma=gamma[1], random_state=random_state, n_components=ncom) for trial in range(ntrials): if (X.shape[0] < 1) | (Y.shape[0] < 1): continue X_f = rbf_feature_x.fit_transform(X) X_f -= np.mean(X_f, axis=0) Y_f = rbf_feature_y.fit_transform(Y) Y_f -= np.mean(Y_f, axis=0) A = X_f.T.dot(Y_f) B = Y_f.T.dot(X_f) C = A.dot(B) hs_a += 1 / X_f.shape[0]**2 * np.trace(C) return hs_a / ntrials
f.write(header) size = header.count(',') for (id, label) in zip(ids, labels): f.write('%d' % int(id)) for i in range(0, size): if i == label: f.write(',1') else: f.write(',0') f.write('\n') if __name__ == '__main__': # get X and y train_x, train_y = loadDataHelper('train_data.txt') test_x, test_id = loadDataHelper('test_data.txt') print('train size: %d %d' % (len(train_x), len(train_y))) print('test size: %d %d' % (len(test_x), len(test_id))) rbf_feature = RBFSampler(gamma=1, random_state=1) X_features = rbf_feature.fit_transform(train_x) model = SGDClassifier(loss="hinge", alpha=0.01, n_iter=200, fit_intercept=True) # model = SGDClassifier() model.fit(X_features, train_y) print(model) X_features = rbf_feature.fit_transform(test_x) predicted = model.predict(X_features) saveResult('result-sgd.csv', test_id, predicted)
def smooth_weight_approximate_gaussian_process_regression( data, labels, ind_mat, opts): #, ITERS=100, THRESH=1e-6): """ Define a Bayesian multitask regression problem in which tasks are locally smooth (e.g. bin size prediction), and introduce a penalty in which weights of regressors of related tasks are encouraged to be similar. See https://icml.cc/Conferences/2005/proceedings/papers/128_GaussianProcesses_YuEtAl.pdf """ import scipy.stats as stats from sklearn.metrics import mean_squared_error, r2_score from sklearn.kernel_approximation import RBFSampler D = opts['approximation_dim'] sm = RBFSampler(gamma=1. / opts['kpar']**2, n_components=D, random_state=666) T = opts['TASKS'] W = np.random.rand(D, T) tau = opts['par1'] pi = opts['par2'] C_w = stats.invwishart.rvs(tau, np.eye(D)) mu_w = np.random.multivariate_normal(np.zeros((D)), 1 / pi * C_w) # np.ones((D,1)) sigma = D / T k = 0 epsi = 1000 loss = [] while (epsi > opts['THRESH']) & (k < opts['ITERS']): # E-step: schedule = range(T) W_old = W.copy() C_w_temp = 0 w_l_ce_temp = 0 sigma_l = 0 n_l = 0 t_loss = 0 t_norm_w = 0 for ind_w in schedule: # print(ind_w) X_Y = data.assign(y=labels.iloc[:, ind_w]).copy() trn_X = sm.fit_transform(X_Y.iloc[ind_mat[:, ind_w] == 1, :-1]) trn_Y = X_Y.iloc[ind_mat[:, ind_w] == 1, -1] N = trn_X.shape[0] A = 1 / sigma * np.dot(trn_X.T, trn_X) + np.linalg.inv(C_w) A = np.linalg.inv(A) # print(A) C_w_temp += A B = 1 / sigma * np.dot(trn_X.T, trn_Y).T + np.dot( mu_w.T, np.linalg.inv(C_w)) W[:, ind_w] = np.dot(A, np.squeeze(B)) w_ = W[:, ind_w] - mu_w w_l_ce_temp += np.dot(w_, w_.T) n_l += N sigma_l += np.sum((trn_Y - trn_X.dot(W[:, ind_w]))**2) + np.trace( np.dot(trn_X, np.dot(C_w, trn_X.T))) t_loss += 1 / N * np.sum( (trn_Y.values - np.dot(trn_X, W[:, ind_w]))** 2) # + np.linalg.norm(W[:,ind_w]) t_norm_w += np.linalg.norm(W[:, ind_w]) # M-step mu_w = 1 / (pi + T) * np.sum(W, axis=1) C_w = 1 / (tau + T) * (pi * np.dot(mu_w, mu_w.T) + tau * np.eye(D) + w_l_ce_temp) # + tau*np.eye(D) sigma = 1 / n_l * sigma_l emp_err = 1 / T * t_loss epsi = np.abs(np.sum(np.sum(W - W_old))) loss.append(emp_err) # print(f'iter {k}, size W {W.shape}, conv {epsi}, emp error {emp_err}, mean norm w {1/T * t_norm_w}') k += 1 # print(f'iter {k}, size W {W.shape}, conv {epsi}, emp error {emp_err}, mean norm w {1/T * t_norm_w}') print( f'iter {k}, size W {W.shape}, conv {epsi}, emp error {emp_err}, mean norm w {1/T * t_norm_w}' ) y_hat = np.zeros((opts['DATA_SIZE'][0], T)) pred_stats = {} pred_stats['tr_RMSE'] = [] #np.inf*np.ones((T)) pred_stats['tr_R2'] = [] #np.inf*np.ones((T)) pred_stats['va_RMSE'] = [] #np.inf*np.ones((T)) pred_stats['va_R2'] = [] #np.inf*np.ones((T)) # if opts['VAL_MODE']: for ind_w in range(T): X_Y = data.assign(y=labels.iloc[:, ind_w]).copy() trn_X = sm.fit_transform(X_Y.iloc[ind_mat[:, ind_w] == 1, :-1]) trn_Y = X_Y.iloc[ind_mat[:, ind_w] == 1, -1] pred_tr_Y = np.dot(trn_X, W[:, ind_w]) pred_stats['tr_RMSE'].append( np.sqrt(mean_squared_error(trn_Y, pred_tr_Y))) pred_stats['tr_R2'].append(r2_score(trn_Y, pred_tr_Y)) y_hat[ind_mat[:, ind_w] == 1, ind_w] = pred_tr_Y if np.any(ind_mat[:, ind_w] == 2): val_X = sm.fit_transform(X_Y.iloc[ind_mat[:, ind_w] == 2, :-1]) val_Y = X_Y.iloc[ind_mat[:, ind_w] == 2, -1] pred_va_Y = np.dot(val_X, W[:, ind_w]) pred_stats['va_RMSE'].append( np.sqrt(mean_squared_error(val_Y, pred_va_Y))) pred_stats['va_R2'].append(r2_score(val_Y, pred_va_Y)) y_hat[ind_mat[:, ind_w] == 2, ind_w] = pred_va_Y return W, loss, ind_mat, pred_stats, y_hat
def transform(x_original, make_np=True): orig = x_original MEAN = [ 0.00213536, 0.00324656, 0.00334724, 0.00175428, 0.00349227, 0.0035413 , 0.00188289, 0.00216241, 0.00184026, 0.00351317, 0.00520942, 0.00450718, 0.00346782, 0.00300477, 0.00223811, 0.00180039, 0.00216675, 0.00381716, 0.00258565, 0.00291358, 0.00616643, 0.00237084, 0.00440006, 0.00729192, 0.00369302, 0.00058215, 0.00312047, 0.00629086, 0.00184585, 0.0018266 , 0.00329771, 0.00352135, 0.00246634, 0.00261958, 0.00357113, 0.00307333, 0.00211512, 0.00125184, 0.00212255, 0.00307451, 0.00171408, 0.0126576 , 0.00252346, 0.00528872, 0.0026387 , 0.00283739, 0.00394586, 0.00207473, 0.00307515, 0.002017 , 0.00408066, 0.00185709, 0.00316201, 0.00349098, 0.00415104, 0.00348125, 0.00069981, 0.00128145, 0.0023404 , 0.00396659, 0.00240324, 0.01251434, 0.00125352, 0.00266113, 0.00435828, 0.00066137, 0.00221134, 0.00083185, 0.00278664, 0.00118505, 0.00335414, 0.00340527, 0.0026939 , 0.00096786, 0.00214149, 0.0026521 , 0.00155538, 0.00300255, 0.0040405 , 0.00275396, 0.00077404, 0.00257667, 0.00268743, 0.00279948, 0.0018655 , 0.00239569, 0.0032419 , 0.00288355, 0.00123361, 0.00220135, 0.0021836 , 0.00225123, 0.00366629, 0.00279189, 0.00058814, 0.00310452, 0.00276981, 0.00128716, 0.00074161, 0.00358908, 0.003292 , 0.00233592, 0.00317694, 0.00381526, 0.00269197, 0.00098085, 0.00231831, 0.00133682, 0.00460957, 0.00387842, 0.0004473 , 0.0015644 , 0.00247717, 0.00179484, 0.00281831, 0.00053689, 0.00415889, 0.00232736, 0.00361601, 0.00192624, 0.00224487, 0.00210838, 0.00140079, 0.00608319, 0.00211861, 0.00230604, 0.00124033, 0.0029389 , 0.00227564, 0.00086638, 0.0035496 , 0.00228789, 0.00361703, 0.00270277, 0.00196611, 0.00206865, 0.00146788, 0.00019011, 0.00222272, 0.00351472, 0.00305718, 0.00239471, 0.00040766, 0.00299186, 0.00368983, 0.00244158, 0.00084154, 0.00109796, 0.00278565, 0.00135904, 0.00424855, 0.00323784, 0.00255397, 0.00234946, 0.00210558, 0.00291688, 0.00172516, 0.00284473, 0.00308164, 0.00316225, 0.0041659 , 0.00055891, 0.00303591, 0.00028217, 0.00261526, 0.00196658, 0.00264379, 0.00018002, 0.00227361, 0.00190785, 0.00344782, 0.00305479, 0.00057851, 0.00115452, 0.00365707, 0.0009598 , 0.00184313, 0.00286183, 0.00400594, 0.0003848 , 0.00086102, 0.00277779, 0.00214625, 0.00329827, 0.00129511, 0.00114751, 0.00249452, 0.00236266, 0.00353646, 0.00319208, 0.00540883, 0.00323167, 0.00299791, 0.00025745, 0.00227873, 0.00228826, 0.0040653 , 0.00238598, 0.00483883, 0.00054585, 0.00091663, 0.00037232, 0.0008229 , 0.00073563, 0.00283771, 0.0035899 , 0.00578833, 0.0032107 , 0.0014048 , 0.00401052, 0.002748 , 0.00229416, 0.00130351, 0.00308403, 0.00146506, 0.00188529, 0.00236308, 0.00259649, 0.00185155, 0.00230195, 0.00421584, 0.00231917, 0.00227335, 0.00296253, 0.00077996, 0.0001668 , 0.00069015, 0.00220702, 0.00238395, 0.00034903, 0.00303323, 0.00407338, 0.00178655, 0.00456887, 0.00254606, 0.00215019, 0.00306377, 0.00134979, 0.00112832, 0.00350681, 0.00253643, 0.00431348, 0.00094915, 0.00150396, 0.00043838, 0.00207101, 0.00301119, 0.00057716, 0.00062709, 0.00543404, 0.00061686, 0.00237189, 0.00522715, 0.00321869, 0.00172645, 0.00244482, 0.00334951, 0.00183201, 0.00038157, 0.0023022 , 0.00418559, 0.00329119, 0.00411452, 0.00089033, 0.00283673, 0.00210368, 0.00222242, 0.00213262, 0.0033576 , 0.00250707, 0.00423595, 0.00237407, 0.00127654, 0.00387341, 0.00216695, 0.00325004, 0.00246333, 0.00396034, 0.0031676 , 0.00354552, 0.00227099, 0.00205363, 0.00128859, 0.00290737, 0.00301655, 0.00319576, 0.00072449, 0.00230528, 0.00326406, 0.00283315, 0.00338869, 0.00212552, 0.00135612, 0.00250613, 0.00045907, 0.0014009 , 0.00177951, 0.00042544, 0.00073249, 0.00303487, 0.0013664 , 0.00248306, 0.00025601, 0.00435174, 0.00443799, 0.00479944, 0.0009997 , 0.00275155, 0.00286969, 0.00244896, 0.00177604, 0.00278218, 0.00078876, 0.00142078, 0.00186949, 0.0018215 , 0.0027254 , 0.00316367, 0.00192957, 0.00176559, 0.00289111, 0.00048977, 0.00411342, 0.00130383, 0.00250934, 0.00324275, 0.00159243, 0.00334068, 0.00324279, 0.00158259, 0.00041714, 0.00161102, 0.00145149, 0.00222112, 0.00296289, 0.00282892, 0.00123731, 0.00281891, 0.00016613, 0.0014267 , 0.00262089, 0.00367506, 0.00281706, 0.00318947, 0.00090315, 0.00230826, 0.00310803, 0.00889549, 0.00197781, 0.00160006, 0.00307063, 0.00176858, 0.00252353, 0.00141795, 0.00047073, 0.00241224, 0.00165672, 0.00138939, 0.00257068, 0.00148445, 0.00193734, 0.004368 , 0.00247817, 0.00249266, 0.00329317, 0.00078468, 0.00045822, 0.00259324, 0.00298367, 0.00335009, 0.00307879, 0.00325237, 0.00254531, 0.00749495, 0.0026701 , 0.00100689, 0.00184948, 0.00317616, 0.00255977, 0.00112342, 0.00165774, 0.00227449, 0.00064219, 0.00269639, 0.00114312, 0.00203549, 0.00064574, 0.00130932, 0.00304631, 0.00131053, 0.00174587, 0.0027975 , 0.00461148, 0.0015227 , 0.0027072 , 0.00210673, 0.00323388, 0.00028426, 0.00113429, 0.00315131] VAR = [ 3.87111312e-06, 1.29838726e-05, 1.23895436e-05, 5.11051819e-06, 1.87834728e-05, 5.81101229e-05, 1.22431672e-05, 3.14238203e-06, 6.15186426e-06, 1.16054974e-05, 2.61629851e-05, 1.51823678e-05, 3.20501352e-05, 6.75625364e-06, 6.90383937e-06, 7.10772563e-06, 3.93108356e-06, 1.38147699e-05, 9.45390664e-06, 6.18869987e-06, 1.23460353e-03, 5.15741591e-06, 1.27185867e-05, 7.62148434e-05, 9.61369316e-06, 3.59794999e-06, 4.49714597e-05, 1.15313013e-04, 2.51027515e-06, 3.23518027e-06, 1.15175054e-05, 5.55007797e-05, 3.61287015e-06, 4.24901217e-06, 1.57731133e-05, 8.83739880e-06, 4.11832891e-06, 4.51594425e-06, 5.66233716e-06, 2.76312055e-05, 3.10286633e-05, 2.06523833e-04, 4.99679342e-06, 3.59423460e-05, 5.53408014e-06, 5.02979264e-06, 2.29845095e-05, 3.52580303e-06, 4.74110466e-06, 2.77776825e-06, 1.15279947e-05, 4.78634098e-06, 8.24242505e-06, 1.65141090e-05, 1.84669015e-05, 1.65851869e-05, 9.69125917e-07, 4.07269628e-06, 4.79411492e-06, 7.95185399e-06, 6.05491604e-06, 2.30133633e-04, 2.43045915e-06, 9.99138675e-06, 1.61846281e-05, 1.36250194e-06, 3.83900385e-06, 4.03501076e-06, 4.49190746e-06, 2.20133970e-06, 1.40571788e-05, 1.23973871e-05, 1.91642968e-05, 1.83384119e-06, 3.55110501e-06, 6.38707023e-06, 7.58389225e-06, 9.66052931e-06, 1.33459561e-05, 6.01834583e-06, 1.75975058e-06, 9.93625536e-06, 5.57880408e-06, 5.20632392e-06, 2.63891241e-06, 4.96341232e-06, 1.35361419e-05, 5.09588225e-06, 2.13213362e-06, 3.67884149e-06, 4.02580880e-06, 3.36118966e-06, 1.23913905e-05, 1.19327162e-05, 1.33013390e-06, 1.56844681e-05, 5.05235129e-06, 3.27510379e-06, 4.18496352e-06, 1.32615022e-05, 8.00089632e-06, 5.24889508e-06, 7.61725520e-06, 2.45732025e-05, 4.73942392e-06, 3.26874106e-06, 4.19502445e-06, 4.67408597e-06, 4.07529951e-05, 1.85623369e-05, 1.42640177e-06, 9.02420306e-06, 3.99465979e-06, 2.91695819e-06, 7.51525182e-06, 3.28339831e-06, 9.23579413e-06, 8.82938566e-06, 1.67017625e-05, 7.18046179e-06, 6.67502140e-06, 4.53568390e-06, 4.59241197e-06, 9.71055426e-05, 4.06108283e-06, 3.21309715e-06, 2.83145362e-06, 1.30979068e-05, 4.30934096e-06, 1.33494112e-06, 1.23067054e-05, 4.55467345e-06, 4.16151366e-05, 4.39300907e-06, 3.81081336e-06, 3.57599046e-06, 2.44792045e-06, 1.04884156e-06, 5.66646773e-06, 1.38454953e-05, 7.03958785e-06, 7.96561298e-06, 1.15832827e-06, 5.34098000e-06, 1.08664502e-05, 5.33706713e-06, 1.58029233e-06, 4.16948014e-06, 1.10410603e-05, 3.08923185e-06, 3.60056097e-05, 1.35575315e-05, 7.21297470e-06, 5.46186866e-06, 3.83067878e-06, 4.93382163e-06, 8.74249160e-06, 6.95763983e-06, 8.57639945e-06, 1.99238085e-05, 2.06143616e-05, 4.15158574e-06, 6.98539924e-06, 7.29978665e-07, 1.05324242e-05, 4.03610511e-06, 4.54024757e-06, 1.12380259e-06, 7.25149490e-06, 4.68609708e-06, 4.47583007e-05, 5.73128000e-06, 1.55383559e-06, 6.10201277e-06, 1.56226083e-05, 2.07417481e-06, 3.92362694e-06, 5.07511158e-06, 1.91527526e-05, 1.23196439e-06, 2.78105795e-06, 6.20886459e-06, 9.77619759e-06, 4.54569998e-05, 3.69801329e-06, 3.90055801e-06, 8.95043365e-06, 4.62714915e-06, 8.59072207e-06, 7.93476416e-06, 2.94461267e-05, 1.27513460e-05, 6.37168538e-06, 1.42869302e-06, 3.88169829e-06, 3.73479924e-06, 3.41961106e-05, 5.99249536e-06, 3.52894229e-05, 3.60535269e-06, 1.97432492e-06, 1.08726206e-06, 6.34745318e-06, 1.85853697e-06, 4.88355657e-06, 1.45421337e-05, 4.71209759e-05, 9.75886239e-06, 1.92188254e-06, 2.44175182e-05, 6.48665880e-06, 3.77833988e-06, 4.94021824e-06, 1.11375076e-05, 2.48913056e-06, 7.50221434e-06, 7.71706724e-06, 4.40449246e-06, 5.01260110e-06, 7.55913298e-06, 9.61114153e-06, 4.71524238e-06, 5.71612330e-06, 5.35067657e-06, 1.24371020e-06, 1.05315411e-06, 3.93981671e-06, 4.10917913e-06, 4.50131192e-06, 1.41029887e-06, 5.21404239e-06, 3.10300539e-05, 2.86295992e-06, 3.14574375e-05, 4.13089781e-06, 3.94511845e-06, 5.21837923e-06, 1.86040011e-06, 4.33877122e-06, 6.79169351e-06, 7.34233345e-06, 2.46684357e-05, 6.04518227e-06, 3.50075336e-06, 1.22008735e-06, 3.82670787e-06, 1.29928488e-05, 1.30317263e-06, 1.82923403e-06, 1.68159694e-04, 1.39570985e-06, 6.82018782e-06, 2.77705938e-05, 5.50219803e-06, 6.94297855e-06, 5.56691651e-06, 4.40913139e-05, 8.64954832e-06, 1.13623461e-06, 3.91895303e-06, 2.90528320e-05, 8.95829181e-06, 2.13802762e-05, 1.45383845e-06, 2.19748855e-05, 2.92403666e-06, 4.11580346e-06, 3.79422424e-06, 1.01354981e-05, 1.12666398e-05, 2.12954971e-05, 4.73278161e-06, 2.26826965e-06, 2.45301255e-05, 5.86185180e-06, 6.92235736e-06, 8.42678526e-06, 2.47795958e-05, 6.25412728e-06, 1.41974527e-05, 3.95337688e-06, 7.16912125e-06, 2.00884144e-06, 2.00349034e-05, 5.97662651e-06, 3.01450892e-05, 4.63002816e-06, 4.09857661e-06, 1.23373959e-05, 5.62286236e-06, 1.23868932e-05, 7.79128188e-06, 4.02737664e-06, 4.26867074e-06, 1.30633550e-06, 2.16092242e-06, 2.53344988e-06, 1.55130629e-06, 1.20587686e-06, 8.47719131e-06, 1.72865161e-06, 8.85885938e-06, 1.36250583e-06, 3.02467214e-05, 2.85941868e-05, 1.68684969e-05, 2.17024274e-06, 9.09429716e-06, 1.12517072e-05, 5.39997088e-06, 3.16738113e-06, 7.44227101e-06, 1.39521345e-06, 1.80325624e-06, 3.23437991e-06, 4.12906812e-06, 6.51981136e-06, 7.28606378e-06, 4.44469608e-06, 4.00705337e-06, 1.34244753e-05, 1.34953189e-06, 3.86701616e-05, 4.30733919e-06, 4.29618197e-06, 1.67568650e-05, 5.39451612e-06, 8.50733433e-06, 1.04900918e-05, 4.68246794e-06, 2.92591087e-06, 2.54589900e-06, 6.68970689e-06, 3.68698856e-06, 5.70542637e-06, 1.57329410e-05, 3.45199222e-06, 7.27799975e-06, 8.64176250e-07, 5.59882582e-06, 4.16052401e-06, 1.73753080e-05, 7.85748797e-06, 6.46626446e-06, 2.23241624e-06, 6.79217908e-06, 6.18545939e-06, 5.41203600e-04, 2.75355566e-06, 5.01654998e-06, 9.55004050e-06, 3.36241075e-06, 4.95540827e-06, 4.38650100e-06, 2.19975452e-06, 4.99878215e-06, 2.08615031e-06, 6.57349770e-06, 6.07825138e-06, 1.82116637e-05, 3.98356104e-06, 3.02862803e-05, 1.45275531e-05, 1.80111343e-05, 1.81263109e-05, 1.37630960e-06, 1.01588605e-06, 1.09961427e-05, 7.09189456e-06, 8.63553483e-06, 1.28377215e-05, 1.15539997e-05, 4.30247032e-06, 3.69651334e-05, 1.13411365e-05, 1.43191945e-06, 2.76733205e-06, 7.03730009e-06, 4.93027252e-06, 2.72768641e-06, 3.15867713e-06, 3.51786262e-06, 1.33668414e-06, 5.15268762e-06, 2.24808552e-06, 3.91888753e-06, 1.96848802e-06, 5.96948656e-06, 6.72807533e-06, 2.52024742e-06, 4.64795350e-06, 6.00152269e-06, 4.42994740e-05, 2.59223022e-06, 4.76032620e-06, 3.15249648e-06, 1.02942457e-05, 7.54992395e-07, 2.48130225e-06, 5.97253972e-06]; x_original = np.array(x_original) x_original -= MEAN x_original /= VAR def extend_x(arr, additions=True, extension=True): if extension: x.extend(arr) if additions: x.append(scipy.std(arr)) x.append(scipy.var(arr)) x.append(sum(arr) / len(arr)) x.append(sum(np.abs(arr)) / len(arr)) x.append(min(arr)) x.append(max(arr)) x.append(scipy.mean(arr)) x.append(scipy.median(arr)) x = [] extend_x(x_original) extend_x(np.abs(x_original)) # extend_x(np.sqrt(np.abs(x_original))) # sampler1 = SkewedChi2Sampler(skewedness=0.022, n_components=50, random_state=1) # zzz1 = sampler1.fit_transform(np.abs(np.array(orig)))[0] # sampler2 = SkewedChi2Sampler(skewedness=8.5, n_components=50, random_state=1) # zzz2 = sampler2.fit_transform(np.abs(np.array(x)))[0] sampler3 = RBFSampler(gamma=0.0025, random_state=2, n_components=20) zzz3 = sampler3.fit_transform(np.array(x))[0] extend_x(list(zzz1)) extend_x(list(zzz2)) extend_x(list(zzz3)) if make_np: return np.array(x) return x
import numpy as np import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.kernel_approximation import RBFSampler from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=28) # Average CV score on the training set was:0.6521171672228402 exported_pipeline = make_pipeline( RBFSampler(gamma=0.55), RandomForestClassifier(bootstrap=False, criterion="gini", max_features=0.4, min_samples_leaf=19, min_samples_split=13, n_estimators=100)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
class CCNNLayer: def __init__(self, name: str, input_size: int, filter_size: int, gamma: float, m: int, R: float, r: int, lr: float): self.name = name self.input_size = input_size self.filter_size = filter_size self.patch_size = filter_size ** 2 self.output_size = self.input_size - self.filter_size + 1 self.n_patchs = self.output_size ** 2 self.m = m self.R = R self.lr = lr self.rbf_feature = RBFSampler(gamma=gamma, n_components=m, random_state=1) self.svd = TruncatedSVD(n_components=r) def initPars(self, n_classes: int, batch_size: int): self.n_classes = n_classes self.batch_size = batch_size self.lr /= batch_size self.A = np.random.normal(0, 0.1, size=(n_classes, self.n_patchs, self.m)) def getZMatrix(self, X): """ Input: (n_instances, n_channels, input_size, input_size) Output: (n_instances, n_patchs, m) """ Z = view_as_windows(X, (1, X.shape[1], self.filter_size, self.filter_size)) Z = Z.reshape(np.prod(Z.shape[:4]), np.prod(Z.shape[4:])) Q = self.rbf_feature.transform(Z).astype(np.float16) return Q.reshape(X.shape[0], self.n_patchs, -1) def predict(self, X, transform: bool=False): """ Input: (batch_size, n_channels, input_size, input_size) Transformed input: (batch_size, n_patchs, m) Output: (batch_size, n_classes) """ Z = self.getZMatrix(X) if transform else X p = np.exp(np.tensordot(Z, self.A, axes=[(1, 2), (1, 2)])) return (p.T / np.sum(p, axis=1)).T def fit(self, X, ylabel, n_epoch: int): assert X.shape[2] == X.shape[3] == self.input_size n = X.shape[0] self.rbf_feature.fit(np.zeros((1, X.shape[1] * self.filter_size ** 2))) print("Preparing patches...") Z_batches = [self.getZMatrix(X[i: i + self.batch_size]) for i in range(0, n, self.batch_size)] y_batches = ylabel.reshape(-1, self.batch_size) print("Starting PSGD...") loss = np.inf rhat = self.m for epoch in range(n_epoch): print("{0}: Epoch {1}: loss = {2}, r_hat = {3}".format(self.name, epoch + 1, loss / n, rhat)) loss = 0 for i, (Z_batch, y_batch) in enumerate(zip(Z_batches, y_batches)): p_batch = self.predict(Z_batch) loss += np.sum(-np.log(p_batch[np.arange(self.batch_size), y_batch])) dL_batch = -p_batch dL_batch[np.arange(self.batch_size), y_batch] += 1 self.A += self.lr * np.tensordot(dL_batch, Z_batch, axes=[0, 0]) A_unfold = self.A.reshape(-1, self.A.shape[2]).T U = self.svd.fit_transform(A_unfold) self.U = U.copy() d = np.linalg.norm(U, axis=0) U *= 1 / d d_cum = np.cumsum(d) rhat = np.searchsorted(d_cum - self.R > np.append(d[1:] * np.arange(1, d.size), 0), True) + 1 if rhat >= d.size: print("Warning: Hard-thresholding applied") if rhat <= d.size: scale = np.maximum(0, d - (d_cum[rhat - 1] - self.R) / rhat) U = U[:, :rhat] d = d[:rhat] self.U = U * scale[:rhat] self.A = ((self.U * (1 / d)) @ (U.T @ A_unfold)).T.reshape(*self.A.shape) Z_batches = None y_batches = None def transform(self, X): """ Input: (batch_size, n_channels, input_size, input_size) Output: (batch_size, n_output_channels, output_size, output_size) """ Z = np.rollaxis(np.tensordot(self.U, self.getZMatrix(X), axes=[0, 2]), 0, 2) return Z.reshape(Z.shape[0], Z.shape[1], self.output_size, self.output_size)
train_test_split(features, tpot_data['target'], random_state=None) # Average CV score on the training set was: -3.6343523092856613 exported_pipeline = make_pipeline( StackingEstimator( estimator=KNeighborsRegressor(n_neighbors=48, p=1, weights="uniform")), RobustScaler(), MinMaxScaler(), StackingEstimator(estimator=LinearSVR(C=25.0, dual=True, epsilon=0.01, loss="epsilon_insensitive", tol=0.0001)), StackingEstimator(estimator=DecisionTreeRegressor( max_depth=8, min_samples_leaf=17, min_samples_split=9)), FeatureAgglomeration(affinity="l2", linkage="average"), RBFSampler(gamma=0.75), StackingEstimator(estimator=LinearSVR(C=1.0, dual=True, epsilon=1.0, loss="squared_epsilon_insensitive", tol=0.1)), StackingEstimator( estimator=KNeighborsRegressor(n_neighbors=9, p=1, weights="uniform")), StackingEstimator(estimator=LassoLarsCV(normalize=True)), SelectPercentile(score_func=f_regression, percentile=26), StandardScaler(), PCA(iterated_power=7, svd_solver="randomized"), StackingEstimator(estimator=LinearSVR(C=10.0, dual=True, epsilon=0.01, loss="squared_epsilon_insensitive", tol=1e-05)), ZeroCount(),
from sklearn.kernel_approximation import RBFSampler from torch.distributions import MultivariateNormal from scipy.stats import multivariate_normal import numpy as np import linear_trpo_config as C import torch rbf_feature = RBFSampler(gamma=1, n_components = C.extracted_feature_size, random_state=12345) def extract_features(state, num_actions): """ This function computes the RFF features for a state for all the discrete actions :param state: column vector of the state we want to compute phi(s,a) of (shape |S|x1) :param num_actions: number of discrete actions you want to compute the RFF features for :return: phi(s,a) for all the actions (shape 100x|num_actions|) """ s = state.reshape(1, -1) s = np.repeat(s, num_actions, 0) a = np.arange(0, num_actions).reshape(-1, 1) sa = np.concatenate([s,a], -1) feats = rbf_feature.fit_transform(sa) feats = feats.T return feats def compute_action_distribution(theta, phis, mode): """ compute probability distrubtion over actions :param theta: model parameter (shape d x 1) :param phis: RFF features of the state and actions (shape d x |A|) """
scaler_action = sklearn.preprocessing.StandardScaler() scaler_action.fit(action_examples) # featurizer_action = sklearn.pipeline.FeatureUnion([ # ("rbf1", RBFSampler(gamma=5.0, n_components=100)), # ("rbf2", RBFSampler(gamma=2.0, n_components=100)), # ("rbf3", RBFSampler(gamma=1.0, n_components=100)), # ("rbf4", RBFSampler(gamma=0.5, n_components=100)) # ]) # featurizer_action.fit(scaler_action.transform(action_examples)) featurizer_action = sklearn.pipeline.FeatureUnion([ ("rbf1", RBFSampler(gamma=5.0, n_components=1)), ("rbf2", RBFSampler(gamma=2.0, n_components=1)) ]) featurizer_action.fit(scaler_action.transform(action_examples)) def featurize_action(action): # action = np.array([action]) scaled = scaler_action.transform([action]) featurized_action = featurizer_action.transform(scaled) return featurized_action[0]
def real_data_error_profile(data_name, sketch_size): ''' Use polynomial feature map which generates {feature_size + degree \choose degree} new features ''' # * Experimental parameters n = 20000 trials = 5 fd_iterations = 15 rp_iterations = 45 # ds = DataFactory(n=n) if data_name == 'CoverType': _ = np.load('../../datasets/covertype.npy') _X, _y = _[:, :-1], _[:, -1] feature_expansion = 'Polynomial' features = [2] elif data_name == 'w8a': _ = np.load('../../datasets/w8a.npy') _X, _y = _[:, :-1], _[:, -1] feature_expansion = 'RBF' features = [2500] # * Results data structures exact_results = { 'solve time': {_: np.zeros(trials, dtype=float) for _ in features} } fd_results = { 'errors': { _: np.zeros((fd_iterations + 1, trials), dtype=float) for _ in features }, 'build times': {_: np.zeros(trials, dtype=float) for _ in features}, 'iteration times': {_: np.zeros(trials, dtype=float) for _ in features}, 'all_times': { _: np.zeros((fd_iterations + 1, trials), dtype=float) for _ in features } } rfd_results = { 'errors': { _: np.zeros((fd_iterations + 1, trials), dtype=float) for _ in features }, 'build times': {_: np.zeros(trials, dtype=float) for _ in features}, 'iteration times': {_: np.zeros(trials, dtype=float) for _ in features}, 'all_times': { _: np.zeros((fd_iterations + 1, trials), dtype=float) for _ in features } } rp_srht_results = { 'errors': { _: np.zeros((rp_iterations + 1, trials), dtype=float) for _ in features }, 'build times': {_: np.zeros(trials, dtype=float) for _ in features}, 'iteration times': {_: np.zeros(trials, dtype=float) for _ in features}, 'all_times': { _: np.zeros((rp_iterations + 1, trials), dtype=float) for _ in features } } rp_cntsk_results = { 'errors': { _: np.zeros((rp_iterations + 1, trials), dtype=float) for _ in features }, 'build times': {_: np.zeros(trials, dtype=float) for _ in features}, 'iteration times': {_: np.zeros(trials, dtype=float) for _ in features}, 'all_times': { _: np.zeros((rp_iterations + 1, trials), dtype=float) for _ in features } } ihs_srht_results = { 'errors': { _: np.zeros((rp_iterations + 1, trials), dtype=float) for _ in features }, 'build times': {_: np.zeros((rp_iterations, trials), dtype=float) for _ in features}, 'iteration times': {_: np.zeros(trials, dtype=float) for _ in features}, 'all_times': { _: np.zeros((rp_iterations + 1, trials), dtype=float) for _ in features } } ihs_sjlt_results = { 'errors': { _: np.zeros((rp_iterations + 1, trials), dtype=float) for _ in features }, 'build times': {_: np.zeros((rp_iterations, trials), dtype=float) for _ in features}, 'iteration times': {_: np.zeros(trials, dtype=float) for _ in features}, 'all_times': { _: np.zeros((rp_iterations + 1, trials), dtype=float) for _ in features } } ihs_countsketch_results = { 'errors': { _: np.zeros((rp_iterations + 1, trials), dtype=float) for _ in features }, 'build times': {_: np.zeros((rp_iterations, trials), dtype=float) for _ in features}, 'iteration times': {_: np.zeros(trials, dtype=float) for _ in features}, 'all_times': { _: np.zeros((rp_iterations + 1, trials), dtype=float) for _ in features } } mean_iter_time_single = lambda a: np.mean(a['all_times'][1:] - a[ 'sketch time']) mean_iter_time_multi = lambda a, its: np.mean(a['all_times'][1:] - a[ 'sketch time'] / its) for t in range(trials): print('*' * 10, '\t TRIAL ', t, '\t', '*' * 10) np.random.seed(t) sample = np.random.choice(_X.shape[0], size=n, replace=False) X_sample, y = _X[sample], _y[sample] for i, feature_hyper in enumerate(features): print('######### FEATURIZING #########') if feature_expansion == 'Polynomial': X_poly = PolynomialFeatures( degree=feature_hyper).fit_transform(X_sample) if X_poly.shape[1] > X_poly.shape[0]: nkeep = int(1.5 * (X_poly.shape[0] - X_poly.shape[1])) X_poly = X_poly[:, :nkeep] else: X_poly = RBFSampler( gamma=0.0001, random_state=t, n_components=feature_hyper).fit_transform(X_sample) X = StandardScaler().fit_transform(X_poly) N, D = X.shape X_train_sparse = coo_matrix(X) g = np.linalg.norm(X, ord='fro')**2 / sketch_size print('#' * 10, f'\t GAMMA: G={g}, i={i} d={feature_hyper}\t', '#' * 10) # # ! Optimal solution print('#' * 60) print('Solving exactly: Data shape: ', X.shape) solve_start = timer() x_opt = svd_ridge_solve(X, y, g) solve_time = timer() - solve_start exact_results['solve time'][feature_hyper][t] = solve_time # ! FD Sketching print('#' * 10, '\t FREQUENT DIRECTIONS \t', '#' * 10) #fdr = FDRidge(fd_dim=sketch_size,gamma=g) fdr = IterativeRidge(N, D, sk_dim=sketch_size, sk_mode='FD', gamma=g) _, all_x, fd_measured = fdr.fit(X, y, fd_iterations) fd_results['errors'][feature_hyper][:, t] = get_euclidean_errors( all_x, x_opt) fd_results['build times'][feature_hyper][t] = fd_measured[ 'sketch time'] fd_results['iteration times'][feature_hyper][t] = np.mean( fd_measured['update time'] ) #mean_iter_time_single(fd_measured) fd_results['all_times'][feature_hyper][:, t] = fd_measured[ 'all_times'] # # ! RFD Sketching print('#' * 10, '\t ROBUST FREQUENT DIRECTIONS \t', '#' * 10) #rfdr = FDRidge(fd_dim=sketch_size,fd_mode='RFD',gamma=g) rfdr = IterativeRidge(N, D, sk_dim=sketch_size, sk_mode='RFD', gamma=g) _, rfd_all_x, rfd_measured = rfdr.fit(X, y, fd_iterations) rfd_results['errors'][feature_hyper][:, t] = get_euclidean_errors( rfd_all_x, x_opt) rfd_results['build times'][feature_hyper][t] = rfd_measured[ 'sketch time'] rfd_results['iteration times'][feature_hyper][t] = np.mean( rfd_measured['update time'] ) #mean_iter_time_single(rfd_measured) rfd_results['all_times'][feature_hyper][:, t] = rfd_measured[ 'all_times'] # # ! Single Random sketches print('#' * 10, '\t SRHT SINGLE \t', '#' * 10) srht_single = IterativeRidge( N, D, sk_dim=sketch_size, sk_mode='SRHT', gamma=g ) # RPRidge(rp_dim=sketch_size,rp_mode='Gaussian',gamma=g) _, srht_single_all_x, srht_single_measured = srht_single.fit( X, y, rp_iterations, seed=i) rp_srht_results['errors'][feature_hyper][:, t] = get_euclidean_errors( srht_single_all_x, x_opt) rp_srht_results['build times'][feature_hyper][ t] = srht_single_measured['sketch time'] rp_srht_results['iteration times'][feature_hyper][t] = np.mean( srht_single_measured['update time'] ) # mean_iter_time_single(srht_single_measured) rp_srht_results['all_times'][ feature_hyper][:, t] = srht_single_measured['all_times'] print('#' * 10, '\t CountSketch SINGLE \t', '#' * 10) # ! Sparse methods using NUMBA need to compile the sketch so let's do that ahead of time # ! so that the timing experiment is not compromised. cntsk_single = IterativeRidge( N, D, sk_dim=sketch_size, sk_mode='CountSketch', gamma=g, sparse_data=X_train_sparse ) #RPRidge(rp_dim=sketch_size,rp_mode='SJLT',gamma=g) if t == 0: _, cntsk_single_all_x, cntsk_single_measured = cntsk_single.fit( X, y) _, cntsk_single_all_x, cntsk_single_measured = cntsk_single.fit( X, y, rp_iterations, seed=i) rp_cntsk_results['errors'][ feature_hyper][:, t] = get_euclidean_errors( cntsk_single_all_x, x_opt) rp_cntsk_results['build times'][feature_hyper][ t] = cntsk_single_measured['sketch time'] rp_cntsk_results['iteration times'][feature_hyper][t] = np.mean( cntsk_single_measured['update time'] ) #mean_iter_time_single(cntsk_single_measured) rp_cntsk_results['all_times'][ feature_hyper][:, t] = cntsk_single_measured['all_times'] # ! Multi Random sketches print('#' * 10, '\t CountSketch IHS \t', '#' * 10) ihs_cntsk = IterativeRidge( N, D, sk_dim=sketch_size, sk_mode='CountSketch', gamma=g, sparse_data=X_train_sparse, ihs_mode='multi' ) #RPRidge(rp_dim=sketch_size,rp_mode='SJLT',gamma=g) if t == 0: # ! Sparse methods using NUMBA need to compile the sketch so let's do that ahead of time # ! so that the timing experiment is not compromised. _, ihs_cntsk_all_x, ihs_cntsk_measured = ihs_cntsk.fit(X, y) _, ihs_cntsk_all_x, ihs_cntsk_measured = ihs_cntsk.fit( X, y, rp_iterations, seed=i) ihs_countsketch_results['errors'][ feature_hyper][:, t] = get_euclidean_errors( ihs_cntsk_all_x, x_opt) ihs_countsketch_results['build times'][ feature_hyper][:, t] = ihs_cntsk_measured['sketch time'] ihs_countsketch_results[ 'iteration times'][feature_hyper][t] = np.mean( ihs_cntsk_measured['update time'] ) #mean_iter_time_multi(ihs_cntsk_measured,rp_iterations) ihs_countsketch_results['all_times'][ feature_hyper][:, t] = ihs_cntsk_measured['all_times'] print('#' * 10, '\t SJLT IHS \t', '#' * 10) ihs_sjlt = IterativeRidge( N, D, sk_dim=sketch_size, sk_mode='SJLT', sjlt_sparsity=5, gamma=g, sparse_data=X_train_sparse, ihs_mode='multi' ) #RPRidge(rp_dim=sketch_size,rp_mode='SJLT',gamma=g) if t == 0: # ! Sparse methods using NUMBA need to compile the sketch so let's do that ahead of time # ! so that the timing experiment is not compromised. _, _, _ = ihs_sjlt.fit(X, y) _, ihs_sjlt_all_x, ihs_sjlt_measured = ihs_sjlt.fit(X, y, rp_iterations, seed=i) ihs_sjlt_results['errors'][ feature_hyper][:, t] = get_euclidean_errors( ihs_sjlt_all_x, x_opt) ihs_sjlt_results['build times'][ feature_hyper][:, t] = ihs_sjlt_measured['sketch time'] ihs_sjlt_results['iteration times'][feature_hyper][t] = np.mean( ihs_sjlt_measured['update time'] ) # mean_iter_time_multi(ihs_sjlt_measured,rp_iterations) ihs_sjlt_results['all_times'][ feature_hyper][:, t] = ihs_sjlt_measured['all_times'] print('#' * 10, '\t SRHT IHS \t', '#' * 10) ihs_srht = IterativeRidge( N, D, sk_dim=sketch_size, sk_mode='SRHT', gamma=g, ihs_mode='multi' ) # RPRidge(rp_dim=sketch_size,rp_mode='Gaussian',gamma=g) _, ihs_srht_all_x, ihs_srht_measured = ihs_srht.fit(X, y, rp_iterations, seed=i) ihs_srht_results['errors'][ feature_hyper][:, t] = get_euclidean_errors( ihs_srht_all_x, x_opt) ihs_srht_results['build times'][ feature_hyper][:, t] = ihs_srht_measured['sketch time'] ihs_srht_results['iteration times'][feature_hyper][t] = np.mean( ihs_srht_measured['update time'] ) #mean_iter_time_multi(ihs_srht_measured,rp_iterations) ihs_srht_results['all_times'][ feature_hyper][:, t] = ihs_srht_measured['all_times'] # ! Prepare and save the results in json format pp = pprint.PrettyPrinter(indent=4) # print('FD') # pp.pprint(fd_results['errors']) # print('SRHT-Single') # pp.pprint(rp_srht_results['errors']) # print('SRHT-Multi') # pp.pprint(ihs_g_results['errors']) # # print('Gauss') # # pp.pprint(rpg_results) # print('ihs:SRHT') # pp.pprint(ihs_srht_results) # print('SJLT') # pp.pprint(rp_cntsk_results) # print('ihs:SJLT') # pp.pprint(ihs_countsketch_results) results_file_name = 'results/real_data/error_profile-' + data_name + '.json' for d in [ exact_results, fd_results, rfd_results, rp_srht_results, rp_cntsk_results, ihs_srht_results, ihs_sjlt_results, ihs_countsketch_results ]: for k, v in d.items(): for v_key, v_val in v.items(): if type(v_val) == np.ndarray: d[k][v_key] = v_val.tolist() all_results = { 'Exact': exact_results, 'FD': fd_results, 'RFD': rfd_results, 'SRHT': rp_srht_results, 'CountSketch': rp_cntsk_results, 'ihs:SRHT': ihs_srht_results, 'ihs:SJLT': ihs_sjlt_results, 'ihs:CountSketch': ihs_countsketch_results } with open(results_file_name, 'w') as fp: json.dump(all_results, fp, sort_keys=True, indent=4)
def __init__(self, dataset, obs_dim, act_dim, gamma, horizon, policy_net, value_reg, hidden_layers, activation, output_transform, default_length_scale=0.1, random_feature_per_obs_dim=250, norm='std', scale_length_adjustment='median', input_mode='sa', seed=1): self.obs_dim = obs_dim self.act_dim = act_dim self.gamma = gamma self.horizon = horizon self.norm = norm self.policy_net = policy_net # self.model_reg = model_reg # self.reward_reg = reward_reg self.value_reg = value_reg self.input_mode = input_mode self.n_samples = dataset['obs'].shape[0] self.n_episode = dataset['init_obs'].shape[0] if self.policy_net is not None: self.pi_current = self.policy_net.get_probabilities(dataset['obs']) self.pi_next = self.policy_net.get_probabilities( dataset['next_obs']) self.pi_init = self.policy_net.get_probabilities( dataset['init_obs']) self.pi_term = self.policy_net.get_probabilities( dataset['term_obs']) else: self.pi_current = dataset['target_prob_obs'] self.pi_next = dataset['target_prob_next_obs'] self.pi_init = dataset['target_prob_init_obs'] self.pi_term = dataset['target_prob_term_obs'] if self.norm == 'std': self.obs_mean = np.mean(dataset['obs'], axis=0, keepdims=True) self.obs_std = np.std(dataset['obs'], axis=0, keepdims=True) self.obs = (dataset['obs'] - self.obs_mean) / self.obs_std self.next_obs = (dataset['next_obs'] - self.obs_mean) / self.obs_std self.init_obs = (dataset['init_obs'] - self.obs_mean) / self.obs_std self.term_obs = (dataset['term_obs'] - self.obs_mean) / self.obs_std elif self.norm is None: self.obs = dataset['obs'] self.next_obs = dataset['next_obs'] self.init_obs = dataset['init_obs'] self.term_obs = dataset['term_obs'] else: raise NotImplementedError if scale_length_adjustment == 'median': sample_num = 5000 idx1 = np.random.choice(self.n_samples, sample_num) idx2 = np.random.choice(self.n_samples, sample_num) med_dist = np.median(np.square(self.obs[None, idx1, :] - self.obs[idx2, None, :]), axis=(0, 1)) med_dist[ med_dist < 0.01] = 0.01 # enforce a upperbound on the scale-length of the action component scale_length_vector = 1.0 / med_dist else: scale_length_vector = np.ones(self.obs_dim) # import pdb; pdb.set_trace() #* set the fourier feature transformer_list = [] self.z_dim = random_feature_per_obs_dim * self.obs_dim models = [ RBFSampler(n_components=random_feature_per_obs_dim, gamma=default_length_scale * dist) for dist in scale_length_vector ] for model in models: model.fit([self.obs[0]]) transformer_list.append((str(model), model)) self.rff = FeatureUnion(transformer_list) #* separate action set indexing act_idx = [] for i in range(self.act_dim): act_idx.append(np.where(dataset['acts'] == i)[0]) #* apply transformation Z = self.rff.transform(self.obs) Z_prime = self.rff.transform(self.next_obs) Z_init = self.rff.transform(self.init_obs) Z_term = self.rff.transform(self.term_obs) assert self.z_dim == Z.shape[1] self.Phi = np.zeros((Z.shape[0], Z.shape[1] * self.act_dim)) self.Phi_pi = np.zeros((Z.shape[0], Z.shape[1] * self.act_dim)) self.Phi_prime_pi = np.zeros( (Z_prime.shape[0], Z_prime.shape[1] * self.act_dim)) self.Phi_init_pi = np.zeros( (Z_init.shape[0], Z_init.shape[1] * self.act_dim)) self.Phi_term_pi = np.zeros( (Z_term.shape[0], Z_term.shape[1] * self.act_dim)) for i in range(self.act_dim): self.Phi[act_idx[i], i * self.z_dim:(i + 1) * self.z_dim] = Z[act_idx[i]] self.Phi_pi[:, i * self.z_dim:(i + 1) * self.z_dim] = self.pi_current[:, i][:, None] * Z self.Phi_prime_pi[:, i * self.z_dim:(i + 1) * self.z_dim] = self.pi_next[:, i][:, None] * Z_prime self.Phi_init_pi[:, i * self.z_dim:(i + 1) * self.z_dim] = self.pi_init[:, i][:, None] * Z_init self.Phi_term_pi[:, i * self.z_dim:(i + 1) * self.z_dim] = self.pi_term[:, i][:, None] * Z_term #* Some commonly used variables self.I_sa = np.eye(self.act_dim * self.z_dim) self.rews = dataset['rews'] self.init_idx = np.arange(0, self.n_samples, self.horizon) self.end_idx = np.arange(self.horizon - 1, self.n_samples, self.horizon) self.rho = dataset[ 'ratio'] #* make sure that the importance weights are already calculated #* set-up network #! consider representing the actions better if self.input_mode == 'sa': if self.act_dim == 2: acts = dataset[ 'acts'] * 2 - 1 # turn the actions into [-1,1] for binary action case self.x = torch.tensor(np.concatenate((self.obs, acts), axis=1)) self.w_net = Simple_MLP(input_dim = self.obs_dim+1, output_dim = 1, hidden_layers = hidden_layers,\ activation= activation, output_transform = output_transform) else: raise NotImplementedError elif self.input_mode == 's': self.x = torch.tensor(self.obs) self.w_net = Simple_MLP(input_dim = self.obs_dim, output_dim = 1, hidden_layers = hidden_layers,\ activation = activation, output_transform = output_transform) self.form_td_ball() self.prepare_torch_tensor()
import numpy as np import pandas as pd from sklearn.kernel_approximation import RBFSampler from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from tpot.builtins import DatasetSelector from xgboost import XGBClassifier # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=42) # Average CV score on the training set was:0.7117612161661105 exported_pipeline = make_pipeline( DatasetSelector(sel_subset=4, subset_list="subsets.csv"), RBFSampler(gamma=0.8), XGBClassifier(learning_rate=0.1, max_depth=7, min_child_weight=4, n_estimators=100, nthread=1, subsample=1.0) ) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
train_X=np.random.rand(n)*0.5+0.1 train_X=np.expand_dims(train_X,axis=1) train_y=np.sqrt(train_X*(1-train_X))*np.sin(1.1*np.pi/(train_X+0.05)) n=100 test_X=np.random.rand(n)*0.5+0.1 test_X=np.expand_dims(test_X,axis=1) test_y=np.sqrt(test_X*(1-test_X))*np.sin(2.1*np.pi/(test_X+0.05)) for gamm in [500000]: feature_map_fourier = RBFSampler(gamma=gamm, random_state=1) fourier_approx_linear = pipeline.Pipeline([("feature_map", feature_map_fourier), ("svm",LinearRegression())]) train_acc = [] test_acc = [] features=np.linspace(1,300,301,dtype=int) for D in features: fourier_approx_linear.set_params(feature_map__n_components=D) fourier_approx_linear.fit(train_X, train_y) ''' X_plot=np.expand_dims(np.linspace(0.1,0.6,10000),axis=1) y_plot=fourier_approx_linear.predict(X_plot) plt.plot(X_plot,y_plot,label=str(D)) plt.scatter(train_X,train_y,label=str(D)) plt.show() ''' train_acc.append(np.sqrt(np.mean((fourier_approx_linear.predict(train_X)-train_y)**2))) test_acc.append(np.sqrt(np.mean((fourier_approx_linear.predict(test_X)-test_y)**2)))
def __init__(self, dataset, obs_dim, act_dim, gamma, horizon, value_reg, default_length_scale=0.2, random_feature_per_obs_dim=250, norm=None, scale_length_adjustment='median', dtype=np.float32, policy_net=None, separate_action_indexing=False, action_encoding_scheme='continuous'): self.obs_dim = obs_dim self.act_dim = act_dim self.gamma = gamma self.horizon = horizon self.norm = norm self.policy_net = policy_net self.value_reg = value_reg self.dtype = dtype self.separate_action_indexing = separate_action_indexing self.action_encoding_scheme = action_encoding_scheme self.n_samples = dataset['obs'].shape[0] self.n_episode = dataset['init_obs'].shape[0] self.non_terminal_idx = (dataset['info'] == False)[:, 0] self.n_samples_non_terminal = self.non_terminal_idx.sum() self.data_acts = dataset['acts'][self.non_terminal_idx] if self.policy_net is not None: self.pi_current = self.policy_net.get_probabilities(dataset['obs']) self.pi_next = self.policy_net.get_probabilities( dataset['next_obs']) self.pi_init = self.policy_net.get_probabilities( dataset['init_obs']) self.pi_term = self.policy_net.get_probabilities( dataset['term_obs']) else: self.pi_current = dataset['target_prob_obs'][self.non_terminal_idx] self.pi_next = dataset['target_prob_next_obs'][ self.non_terminal_idx] self.pi_init = dataset['target_prob_init_obs'] self.pi_term = dataset['target_prob_term_obs'] if self.norm is None: self.obs = dataset['obs'][self.non_terminal_idx] self.next_obs = dataset['next_obs'][self.non_terminal_idx] self.init_obs = dataset['init_obs'] self.term_obs = dataset['term_obs'] elif self.norm == 'std': self.obs_mean = np.mean(dataset['obs'], axis=0, keepdims=True) self.obs_std = np.std(dataset['obs'], axis=0, keepdims=True) self.obs = (dataset['obs'] - self.obs_mean) / self.obs_std self.next_obs = (dataset['next_obs'] - self.obs_mean) / self.obs_std self.init_obs = (dataset['init_obs'] - self.obs_mean) / self.obs_std self.term_obs = (dataset['term_obs'] - self.obs_mean) / self.obs_std else: raise NotImplementedError # pdb.set_trace() #* what if we only whiten over the non-terminal tuples non_terminal_idx = (dataset['info'] == False)[:, 0] obs_mean = np.mean(dataset['obs'][non_terminal_idx], axis=0, keepdims=True) obs_std = np.std(dataset['obs'][non_terminal_idx], axis=0, keepdims=True) # #* re-whiten the observations: self.obs = (self.obs - obs_mean) / obs_std self.next_obs = (self.next_obs - obs_mean) / obs_std self.init_obs = (self.init_obs - obs_mean) / obs_std self.term_obs = (self.term_obs - obs_mean) / obs_std #* if not separate action indexing, we are concatenating (s,a) as input if not self.separate_action_indexing: if self.action_encoding_scheme == 'continuous': encoded_actions = np.linspace(-1, 1, self.act_dim) # mean_action = np.mean(encoded_actions[self.data_acts[non_terminal_idx]]) # std_action = np.std(encoded_actions[self.data_acts[non_terminal_idx]]) mean_action = np.mean(encoded_actions[self.data_acts]) std_action = np.std(encoded_actions[self.data_acts]) self.encoded_actions = (encoded_actions - mean_action) / std_action # self.act = (self.data_acts / (self.act_dim-1)) * 2 -1 # self.act = (self.act - np.mean(self.act, axis=0, keepdims=True))/np.std(self.act, axis=0, keepdims=True) self.act = self.encoded_actions[self.data_acts] self.input = np.concatenate((self.obs, self.act), axis=1) self.input_dim = self.input.shape[1] else: raise NotImplementedError else: self.input = self.obs self.input_dim = self.obs.shape[1] if scale_length_adjustment == 'median': sample_num = 5000 # idx1 = np.random.choice(self.n_samples, sample_num); idx2 = np.random.choice(self.n_samples, sample_num) # idx1 = np.random.choice(np.arange(self.n_samples)[non_terminal_idx], sample_num); idx2 = np.random.choice(np.arange(self.n_samples)[non_terminal_idx], sample_num) idx1 = np.random.choice(self.n_samples_non_terminal, sample_num) idx2 = np.random.choice(self.n_samples_non_terminal, sample_num) # med_dist = np.median(np.square(self.obs[None, idx1, :] - self.obs[idx2, None, :]), axis = (0,1)) med_dist = np.median(np.square(self.input[None, idx1, :] - self.input[idx2, None, :]), axis=(0, 1)) med_dist[ med_dist < 0.01] = 0.01 # enforce a upperbound on the scale-length of the action component self.scale_length_vector = 1.0 / med_dist else: # scale_length_vector = np.ones(self.obs_dim) self.scale_length_vector = np.ones(self.input_dim) # self.scale_length_vector = np.linspace(1,2,5) self.scale_length_vector = np.ones(self.input_dim) self.z_dim = random_feature_per_obs_dim * self.input_dim self.rff = RBFSampler(n_components=self.z_dim, gamma=default_length_scale) self.rff.fit([self.input[0]]) # #* set the fourier feature # transformer_list = [] # # self.z_dim = random_feature_per_obs_dim * self.obs_dim # self.z_dim = random_feature_per_obs_dim * self.input_dim # models = [RBFSampler(n_components = random_feature_per_obs_dim, gamma = default_length_scale*dist) for dist in self.scale_length_vector] # for model in models: # # model.fit([self.obs[0]]) # model.fit([self.input[0]]) # transformer_list.append((str(model), model)) # self.rff = FeatureUnion(transformer_list) # models = [RBFSampler(n_components = random_feature_per_obs_dim, gamma = default_length_scale*dist) for dist in self.scale_length_vector] # for model in models: # # model.fit([self.obs[0]]) # model.fit([self.input[0]]) # transformer_list.append((str(model), model)) # self.rff = [RBFSampler(n_components = random_feature_per_obs_dim, gamma = default_length_scale)] # self.rff.fit([self.input[0]]) #* Some commonly used variables # self.I_sa = np.eye(self.act_dim*self.z_dim) self.rews = dataset['rews'][self.non_terminal_idx] # self.init_idx = np.arange(0, self.n_samples, self.horizon) # self.end_idx = np.arange(self.horizon-1, self.n_samples, self.horizon) self.rho = dataset['ratio'][ self. non_terminal_idx] #* make sure that the importance weights are already calculated
import numpy as np import pandas as pd from sklearn.kernel_approximation import RBFSampler from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.tree import DecisionTreeClassifier from tpot.builtins import DatasetSelector # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=39) # Average CV score on the training set was:0.6592436040044494 exported_pipeline = make_pipeline( DatasetSelector(sel_subset=4, subset_list="subsets.csv"), RBFSampler(gamma=0.30000000000000004), DecisionTreeClassifier(criterion="gini", max_depth=1, min_samples_leaf=15, min_samples_split=20)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
is_ipython = 'inline' in matplotlib.get_backend() if is_ipython: from IPython import display env = gym.envs.make('MountainCar-v0') env.observation_space.sample() observation_examples = np.array( [env.observation_space.sample() for x in range(10000)]) scaler = sklearn.preprocessing.StandardScaler() featurizer = sklearn.pipeline.FeatureUnion([ ('rbf1', RBFSampler(gamma=5.0, n_components=100)), ('rbf2', RBFSampler(gamma=2.0, n_components=100)), ('rbf3', RBFSampler(gamma=1.0, n_components=100)), ('rbf4', RBFSampler(gamma=0.5, n_components=100)) ]) featurizer.fit(scaler.fit_transform(observation_examples)) class FunctionApproximator(): def __init__(self): self.models = [] for i in range(env.action_space.n): model = SGDRegressor(learning_rate="constant") model.partial_fit([self.featurize_state(env.reset())], [0])
class LSTDQ_Kernel(): def __init__(self, dataset, obs_dim, act_dim, gamma, horizon, value_reg, default_length_scale=0.2, random_feature_per_obs_dim=250, norm=None, scale_length_adjustment='median', dtype=np.float32, policy_net=None, separate_action_indexing=False, action_encoding_scheme='continuous'): self.obs_dim = obs_dim self.act_dim = act_dim self.gamma = gamma self.horizon = horizon self.norm = norm self.policy_net = policy_net self.value_reg = value_reg self.dtype = dtype self.separate_action_indexing = separate_action_indexing self.action_encoding_scheme = action_encoding_scheme self.n_samples = dataset['obs'].shape[0] self.n_episode = dataset['init_obs'].shape[0] self.non_terminal_idx = (dataset['info'] == False)[:, 0] self.n_samples_non_terminal = self.non_terminal_idx.sum() self.data_acts = dataset['acts'][self.non_terminal_idx] if self.policy_net is not None: self.pi_current = self.policy_net.get_probabilities(dataset['obs']) self.pi_next = self.policy_net.get_probabilities( dataset['next_obs']) self.pi_init = self.policy_net.get_probabilities( dataset['init_obs']) self.pi_term = self.policy_net.get_probabilities( dataset['term_obs']) else: self.pi_current = dataset['target_prob_obs'][self.non_terminal_idx] self.pi_next = dataset['target_prob_next_obs'][ self.non_terminal_idx] self.pi_init = dataset['target_prob_init_obs'] self.pi_term = dataset['target_prob_term_obs'] if self.norm is None: self.obs = dataset['obs'][self.non_terminal_idx] self.next_obs = dataset['next_obs'][self.non_terminal_idx] self.init_obs = dataset['init_obs'] self.term_obs = dataset['term_obs'] elif self.norm == 'std': self.obs_mean = np.mean(dataset['obs'], axis=0, keepdims=True) self.obs_std = np.std(dataset['obs'], axis=0, keepdims=True) self.obs = (dataset['obs'] - self.obs_mean) / self.obs_std self.next_obs = (dataset['next_obs'] - self.obs_mean) / self.obs_std self.init_obs = (dataset['init_obs'] - self.obs_mean) / self.obs_std self.term_obs = (dataset['term_obs'] - self.obs_mean) / self.obs_std else: raise NotImplementedError # pdb.set_trace() #* what if we only whiten over the non-terminal tuples non_terminal_idx = (dataset['info'] == False)[:, 0] obs_mean = np.mean(dataset['obs'][non_terminal_idx], axis=0, keepdims=True) obs_std = np.std(dataset['obs'][non_terminal_idx], axis=0, keepdims=True) # #* re-whiten the observations: self.obs = (self.obs - obs_mean) / obs_std self.next_obs = (self.next_obs - obs_mean) / obs_std self.init_obs = (self.init_obs - obs_mean) / obs_std self.term_obs = (self.term_obs - obs_mean) / obs_std #* if not separate action indexing, we are concatenating (s,a) as input if not self.separate_action_indexing: if self.action_encoding_scheme == 'continuous': encoded_actions = np.linspace(-1, 1, self.act_dim) # mean_action = np.mean(encoded_actions[self.data_acts[non_terminal_idx]]) # std_action = np.std(encoded_actions[self.data_acts[non_terminal_idx]]) mean_action = np.mean(encoded_actions[self.data_acts]) std_action = np.std(encoded_actions[self.data_acts]) self.encoded_actions = (encoded_actions - mean_action) / std_action # self.act = (self.data_acts / (self.act_dim-1)) * 2 -1 # self.act = (self.act - np.mean(self.act, axis=0, keepdims=True))/np.std(self.act, axis=0, keepdims=True) self.act = self.encoded_actions[self.data_acts] self.input = np.concatenate((self.obs, self.act), axis=1) self.input_dim = self.input.shape[1] else: raise NotImplementedError else: self.input = self.obs self.input_dim = self.obs.shape[1] if scale_length_adjustment == 'median': sample_num = 5000 # idx1 = np.random.choice(self.n_samples, sample_num); idx2 = np.random.choice(self.n_samples, sample_num) # idx1 = np.random.choice(np.arange(self.n_samples)[non_terminal_idx], sample_num); idx2 = np.random.choice(np.arange(self.n_samples)[non_terminal_idx], sample_num) idx1 = np.random.choice(self.n_samples_non_terminal, sample_num) idx2 = np.random.choice(self.n_samples_non_terminal, sample_num) # med_dist = np.median(np.square(self.obs[None, idx1, :] - self.obs[idx2, None, :]), axis = (0,1)) med_dist = np.median(np.square(self.input[None, idx1, :] - self.input[idx2, None, :]), axis=(0, 1)) med_dist[ med_dist < 0.01] = 0.01 # enforce a upperbound on the scale-length of the action component self.scale_length_vector = 1.0 / med_dist else: # scale_length_vector = np.ones(self.obs_dim) self.scale_length_vector = np.ones(self.input_dim) # self.scale_length_vector = np.linspace(1,2,5) self.scale_length_vector = np.ones(self.input_dim) self.z_dim = random_feature_per_obs_dim * self.input_dim self.rff = RBFSampler(n_components=self.z_dim, gamma=default_length_scale) self.rff.fit([self.input[0]]) # #* set the fourier feature # transformer_list = [] # # self.z_dim = random_feature_per_obs_dim * self.obs_dim # self.z_dim = random_feature_per_obs_dim * self.input_dim # models = [RBFSampler(n_components = random_feature_per_obs_dim, gamma = default_length_scale*dist) for dist in self.scale_length_vector] # for model in models: # # model.fit([self.obs[0]]) # model.fit([self.input[0]]) # transformer_list.append((str(model), model)) # self.rff = FeatureUnion(transformer_list) # models = [RBFSampler(n_components = random_feature_per_obs_dim, gamma = default_length_scale*dist) for dist in self.scale_length_vector] # for model in models: # # model.fit([self.obs[0]]) # model.fit([self.input[0]]) # transformer_list.append((str(model), model)) # self.rff = [RBFSampler(n_components = random_feature_per_obs_dim, gamma = default_length_scale)] # self.rff.fit([self.input[0]]) #* Some commonly used variables # self.I_sa = np.eye(self.act_dim*self.z_dim) self.rews = dataset['rews'][self.non_terminal_idx] # self.init_idx = np.arange(0, self.n_samples, self.horizon) # self.end_idx = np.arange(self.horizon-1, self.n_samples, self.horizon) self.rho = dataset['ratio'][ self. non_terminal_idx] #* make sure that the importance weights are already calculated # pdb.set_trace() def estimate(self): if self.separate_action_indexing: value_est = self.estimate_LSTDQ_separate_action_indexing() else: value_est = self.estimate_LSTDQ_concat_sa_input() return value_est def estimate_LSTDQ_concat_sa_input(self): # transformed_action = np.linspace(-1,1, self.act_dim) # n_samples = self.non_terminal_idx.sum() a_prime = np.tile(self.encoded_actions, self.n_samples_non_terminal)[:, np.newaxis] # a_prime = np.tile(self.encoded_actions, self.n_samples)[:,np.newaxis] x_prime = np.concatenate( (np.repeat(self.next_obs, self.act_dim, axis=0), a_prime), axis=1) # a0_expanded = np.tile(transformed_action,self.n_episode)[:,np.newaxis] a0_expanded = np.tile(self.encoded_actions, self.n_episode)[:, np.newaxis] x0 = np.concatenate( (np.repeat(self.init_obs, self.act_dim, axis=0), a0_expanded), axis=1) # aterm_expanded = np.tile(transformed_action, self.n_episode)[:,np.newaxis] aterm_expanded = np.tile(self.encoded_actions, self.n_episode)[:, np.newaxis] xterm = np.concatenate( (np.repeat(self.term_obs, self.act_dim, axis=0), aterm_expanded), axis=1) Z = self.rff.transform(self.input).astype(self.dtype) Z_prime = self.rff.transform(x_prime).astype(self.dtype) aprime_probs = self.pi_next.flatten()[:, np.newaxis] Z_prime = Z_prime * aprime_probs Z_prime = Z_prime.reshape((self.n_samples_non_terminal, self.act_dim, self.z_dim)).sum(axis=1) reg = self.value_reg regularized_inverse = np.linalg.inv( np.matmul(Z.T, Z - self.gamma * Z_prime) + reg * np.eye(self.z_dim)) featurized_reward = np.matmul(Z.T, self.rews) value_coef = np.matmul(regularized_inverse, featurized_reward) Z0 = self.rff.transform(x0) Q0 = np.matmul(Z0, value_coef) Z_term = self.rff.transform(xterm) Q_term = np.matmul(Z_term, value_coef) V_init = (Q0 * self.pi_init.flatten()[:, np.newaxis]).reshape( (self.n_episode, self.act_dim)).sum(axis=1) V_term = (Q_term * self.pi_term.flatten()[:, np.newaxis]).reshape( (self.n_episode, self.act_dim)).sum(axis=1) V_traj = V_init - V_term * self.gamma**self.horizon value_est = np.mean(V_traj) # pdb.set_trace() return value_est def estimate_LSTDQ_separate_action_indexing(self): #* separate action set indexing act_idx = [] for i in range(self.act_dim): act_idx.append(np.where(self.data_acts == i)[0]) #* apply transformation Z = self.rff.transform(self.obs).astype(self.dtype) Z_prime = self.rff.transform(self.next_obs).astype(self.dtype) Z_init = self.rff.transform(self.init_obs).astype(self.dtype) Z_term = self.rff.transform(self.term_obs).astype(self.dtype) # import pdb; pdb.set_trace() assert self.z_dim == Z.shape[1] Phi = np.zeros((Z.shape[0], Z.shape[1] * self.act_dim), dtype=self.dtype) Phi_pi = np.zeros((Z.shape[0], Z.shape[1] * self.act_dim), dtype=self.dtype) Phi_prime_pi = np.zeros( (Z_prime.shape[0], Z_prime.shape[1] * self.act_dim), dtype=self.dtype) Phi_init_pi = np.zeros( (Z_init.shape[0], Z_init.shape[1] * self.act_dim), dtype=self.dtype) Phi_term_pi = np.zeros( (Z_term.shape[0], Z_term.shape[1] * self.act_dim), dtype=self.dtype) for i in range(self.act_dim): Phi[act_idx[i], i * self.z_dim:(i + 1) * self.z_dim] = Z[act_idx[i]] Phi_pi[:, i * self.z_dim:(i + 1) * self.z_dim] = self.pi_current[:, i][:, None] * Z Phi_prime_pi[:, i * self.z_dim:(i + 1) * self.z_dim] = self.pi_next[:, i][:, None] * Z_prime Phi_init_pi[:, i * self.z_dim:(i + 1) * self.z_dim] = self.pi_init[:, i][:, None] * Z_init Phi_term_pi[:, i * self.z_dim:(i + 1) * self.z_dim] = self.pi_term[:, i][:, None] * Z_term I_sa = np.eye(self.act_dim * self.z_dim, dtype=self.dtype) regularized_inverse = np.linalg.inv( np.matmul(Phi.T, Phi - self.gamma * Phi_prime_pi) + self.value_reg * I_sa) featurized_reward = np.matmul(Phi.T, self.rews) reward_coef = np.matmul(regularized_inverse, featurized_reward) V_init = Phi_init_pi @ reward_coef V_term = Phi_term_pi @ reward_coef V_traj = V_init - V_term * self.gamma**self.horizon value_est = np.mean(V_traj) # import pdb; pdb.set_trace() return value_est
#prop['probability_' + ps] = [] #prop['class_' + ps] = [] # restore classifier set from file classifier = joblib.load('data/' + algorithm + '-' + ps + '.pkl') # restore robust scaler from file robust_scaler = joblib.load('data/rs-' + algorithm + '-' + ps + '.pkl') # restore classes from file classes = joblib.load('data/classes-' + algorithm + '-' + ps + '.pkl') cstatus = robust_scaler.transform(cstatus_orig) if algorithm == 'kernel-approx': rbf_feature = RBFSampler(gamma=1, random_state=1) cstatus = rbf_feature.fit_transform(cstatus) prob = None if algorithm == 'one-vs-rest' or algorithm == 'linear-svm': f = np.vectorize(platt_func) raw_predictions = classifier.decision_function(cstatus) platt_predictions = f(raw_predictions) prob = platt_predictions / platt_predictions.sum(axis=1) #prob = prob.tolist() else: prob = classifier.predict_proba(cstatus).tolist() for i in range(0,len(classes)):
import numpy as np import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.kernel_approximation import RBFSampler from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from tpot.builtins import DatasetSelector # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=87) # Average CV score on the training set was:0.6941861327400816 exported_pipeline = make_pipeline( DatasetSelector(sel_subset=17, subset_list="subsets.csv"), RBFSampler(gamma=0.65), RandomForestClassifier(bootstrap=True, criterion="gini", max_features=0.5, min_samples_leaf=1, min_samples_split=11, n_estimators=100)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
import sys import numpy as np from sklearn.linear_model import SGDClassifier from sklearn import cross_validation from sklearn import svm from sklearn.kernel_approximation import RBFSampler from sklearn.kernel_approximation import AdditiveChi2Sampler from sklearn.grid_search import GridSearchCV DIMENSION = 400 # Dimension of the original data. CLASSES = (-1, +1) # The classes that we are trying to predict. chi_feature = AdditiveChi2Sampler(sample_steps=1) chi_feature.fit(np.zeros([1,400])) rbf = RBFSampler(n_components = 15*DIMENSION, random_state = 1) rbf.fit(np.zeros([1,400])) def transform(x_original): out = np.concatenate(([1], rbf.transform(chi_feature.transform(x_original)[0])[0])) return out if __name__ == "__main__": X = [] Y = [] # initialize stochastic gradiant descent cls = SGDClassifier(alpha = 0.0001, fit_intercept=False, n_iter = 15, penalty = "l2", warm_start = "True") for line in sys.stdin: line = line.strip() (label, x_string) = line.split(" ", 1) label = int(label)
import numpy as np import pandas as pd from sklearn.kernel_approximation import RBFSampler from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.tree import DecisionTreeClassifier from tpot.builtins import DatasetSelector # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=63) # Average CV score on the training set was:0.7008379681127179 exported_pipeline = make_pipeline( DatasetSelector(sel_subset=1, subset_list="subsets.csv"), RBFSampler(gamma=0.2), DecisionTreeClassifier(criterion="gini", max_depth=6, min_samples_leaf=3, min_samples_split=8) ) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
class ExposeDetector(AnomalyDetector): """ This detector is an implementation of The EXPoSE (EXPected Similarity Estimation) algorithm as described in Markus Schneider, Wolfgang Ertel, Fabio Ramos, "Expected Similarity Estimation for Lage-Scale Batch and Streaming Anomaly Detection", arXiv 1601.06602 (2016). EXPoSE calculates the likelihood of a data point being normal by using the inner product of its feature map with kernel embedding of previous data points. This measures the similarity of a data point to previous points without assuming an underlying data distribution. There are three EXPoSE variants: incremental, windowing and decay. This implementation is based on EXPoSE with decay. All three variants have been tried on NAB but decay gives the best results.Parameters for this detector have been tuned to give the best performance. """ def __init__(self, *args, **kwargs): super(ExposeDetector, self).__init__(*args, **kwargs) self.kernel = None self.previousExposeModel = [] self.decay = 0.01 self.timestep = 0 def initialize(self): """Initializes RBFSampler for the detector""" self.kernel = RBFSampler(gamma=0.5, n_components=20000, random_state=290) def handleRecord(self, inputData): """ Returns a list [anomalyScore] calculated using a kernel based similarity method described in the comments below""" # Transform the input by approximating feature map of a Radial Basis # Function kernel using Random Kitchen Sinks approximation inputFeature = self.kernel.fit_transform( numpy.array([[inputData["value"]]])) # Compute expose model as a weighted sum of new data point's feature # map and previous data points' kernel embedding. Influence of older data # points declines with the decay factor. if self.timestep == 0: exposeModel = inputFeature else: exposeModel = ((self.decay * inputFeature) + (1 - self.decay) * self.previousExposeModel) # Update previous expose model self.previousExposeModel = exposeModel # Compute anomaly score by calculating similarity of the new data point # with expose model. The similarity measure, calculated via inner # product, is the likelihood of data point being normal. Resulting # anomaly scores are in the range of -0.02 to 1.02. anomalyScore = numpy.asscalar(1 - numpy.inner(inputFeature, exposeModel)) self.timestep += 1 return [anomalyScore]
import numpy as np import pandas as pd from sklearn.kernel_approximation import RBFSampler from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=42) # Score on the training set was:-0.6922854322689991 exported_pipeline = make_pipeline( RBFSampler(gamma=0.8), LogisticRegression(C=0.01, dual=False, penalty="l1")) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
def transform(x_original, make_np=True): orig = x_original variances_str = "0.0021246993507595866 0.0032713784391997795 0.0033522806931598247 0.0017432450192796278 0.0034743692038798537 0.003637888546929857 0.0019210039127597624 0.0021841610994196136 0.0018762718393396005 0.0034590054363498003 0.0052604099446999682 0.004508790286140099 0.0035272400244497799 0.0030404807453598324 0.0022447918038096385 0.0017851536926196112 0.0021643550482296344 0.0037976255097098874 0.0025753731081197833 0.0029230906247597055 0.0060828219621099217 0.0023575999971396813 0.0043864294801700945 0.0071589655821691772 0.0036986840015399082 0.00057556662468004468 0.0030184163825898096 0.0062797556933995476 0.0018388575003994976 0.0018222650139394971 0.0032805952842698042 0.0035132540814598752 0.0024659598304896477 0.0026319448493497136 0.003572205969799843 0.0030648003435798008 0.0021365654833496528 0.0012356635529695108 0.0021261889005796605 0.0030134591283298012 0.0016100815367798148 0.012523000339860027 0.002519218599329652 0.0052571679389798714 0.0026606913287896975 0.0028296754183797139 0.0039323969569099605 0.0020691205227195992 0.0030826525382697508 0.0020232189983895653 0.0040679867872599708 0.0018371556472196301 0.0031808009477497599 0.0034889724135098699 0.0041241983089198644 0.003466312111199805 0.00070525738208999413 0.0012962120699994075 0.0023748498468496439 0.0039468429845199238 0.0024428431670496745 0.012215355168679928 0.0012535008249493743 0.0026764566235297597 0.0043243784063398552 0.00065200872076008631 0.0022265717804095869 0.00081018893256987797 0.0027757838127496974 0.0011937874021293784 0.0033124457059298595 0.0033779817461398022 0.0026583629339898352 0.00096654598538961438 0.0021773139189896237 0.002624655562289701 0.0015705430665195477 0.0030252402714297136 0.0040940954038199478 0.0027594978981697318 0.00079096095234988185 0.0026036506797997572 0.0027190828795197546 0.0027920414767097406 0.0018699793252895133 0.002401434445989645 0.0031948320317497989 0.0028928477797297309 0.001254727068959367 0.0022096979193596154 0.0021709718136396741 0.0022725767293796106 0.0036734258169697923 0.0028088068982497589 0.00058128786511008252 0.0030860261422598389 0.0028005311404197221 0.0013144850578592786 0.00075680244248994735 0.003594669478579891 0.0032807255223097792 0.0023280524667396774 0.00318162350717981 0.0038591178877899067 0.0027019215482496691 0.00097254474824969451 0.0023080437106096615 0.0013613457456093684 0.0045951612643399054 0.0038485342457099387 0.00043219164003003777 0.001528150938759669 0.0024822021413396867 0.0018061700621995042 0.0028432498431096936 0.00055539853847006056 0.004173783897349969 0.0023134058954397316 0.0035923805665898493 0.001944158411359583 0.0022174885522996423 0.0021200232347196586 0.0014086675440495285 0.0060588732600395838 0.0020999206563196006 0.002311535350179601 0.0012081675861494046 0.0029662122591298679 0.0023064668532896651 0.00086526146860972403 0.0035453290259598483 0.0022721631862096265 0.003677016888759915 0.0027193153269396897 0.0019698620481495626 0.002072663196939612 0.0014700221401894075 0.00017158202360999703 0.0022463464680696336 0.0035194326419099174 0.0030686680423197867 0.002374867405639663 0.00042710055163003362 0.0030035550561797468 0.0037270432987298683 0.0024282900953096712 0.00086048954793975898 0.0011186456857096038 0.0027912283038996942 0.0013746583237494142 0.0043072999357398533 0.0032034503423598666 0.0025760441755196838 0.0023421858856196836 0.002131599313139612 0.0029099423010796777 0.0016998768135196812 0.0028229397603697181 0.0030535556897598208 0.003180828002529861 0.0041489816552998261 0.00056885910910004086 0.0030288286590998306 0.0002859839918500021 0.0025907458249397565 0.0019840401991995621 0.0026709580203396733 0.00018365706286999837 0.0023102737736697076 0.0019214511389595858 0.0035872736249698512 0.0030397738456597189 0.00058895044087008347 0.0011302665188195724 0.0036135427626998772 0.00096930456685965713 0.0018706273234795688 0.0028471338214996859 0.0040263350593498478 0.00041504772780003257 0.00088363138039978097 0.0027967429290597077 0.0021579785680196756 0.0032100556617598404 0.0012821952431594156 0.0011697489935395071 0.0024514963691797428 0.0024098468797296444 0.0035879574826698079 0.003169685177989759 0.0053416716965498916 0.0031958328667698248 0.003017081933489743 0.00025151594039000199 0.0022886760678696417 0.0022956408480896266 0.0041254424031998971 0.0023694221563096735 0.0047916681473398276 0.00051616048678002784 0.0009364954557196728 0.00036740167022002141 0.00078959433233993142 0.0007410161818699483 0.0028233597298397656 0.0035765694441198263 0.0057271246152496317 0.0031925037529198339 0.0014168537242193022 0.0040282638127298667 0.0027408330144697043 0.0022817147531596685 0.0013110019340695283 0.0031049698000498423 0.0014794847673093696 0.0019060075812395761 0.0023860511557697102 0.0025873434738996485 0.0018797211826496064 0.0022561836261797042 0.0041991871207300085 0.0023698767044296855 0.0022702994190196093 0.0029535219055797368 0.00079702808800984168 0.00017141315798999718 0.00071072931258999632 0.0022027503444296218 0.0023522028982396696 0.00034261818457001714 0.0030124186968896794 0.0040563730303498731 0.0018014168708095377 0.0045389503904098493 0.0025631905209596659 0.0021709940360196437 0.0031014667275497628 0.0013724805472092871 0.0011206960384995625 0.0035493743115597959 0.0025190975770797062 0.0042803605014598489 0.0009058812431398496 0.0015261301214595528 0.00043206103726003953 0.002057161621769605 0.0029676093005998037 0.00059245340563008307 0.00060303803797007931 0.0055309290333298089 0.0006063130560400961 0.0024001375326397033 0.0051570050648799921 0.003216390780179791 0.001684353076369542 0.0024379539857596923 0.0033096221900098537 0.001808786421229587 0.00036056760674001951 0.0022999638755596282 0.0042300521607298008 0.0033374014801298532 0.0041061077925497727 0.00090300806356967953 0.0027771389140698217 0.0020966763969595594 0.0022364965134396191 0.0021630341014396426 0.003362866027789783 0.0025631540862897312 0.004191926116449857 0.0023811464991296992 0.0013004726735392649 0.0038548863857898333 0.0021571933421396868 0.0032544925816697214 0.0024967996225797357 0.0039128733433798774 0.0032033546653597454 0.0035349716580698469 0.0022774309789496266 0.0020827816616296431 0.001280163236199224 0.0029566993924298487 0.0030278382394197082 0.0031458574724698 0.00073484413224997748 0.0023053594018396508 0.0032629178035998552 0.0028317322999097433 0.0033847674035998084 0.0021507182045496622 0.0013635142890994728 0.0025417732184397166 0.00046798538031004748 0.0014196850140693168 0.001775496716359453 0.00041823802366003517 0.00072539019745996431 0.0030393665008997704 0.0013936213581092793 0.0024650105378997201 0.0002448311107500009 0.0043274930097698871 0.0045290280761799487 0.0047295668273101684 0.0010168427077595955 0.0027754963934396339 0.0028934546900597821 0.0024947583902996968 0.0017947966152195337 0.002808371739829744 0.00080562592018981933 0.0014184058297892733 0.0018558152750695453 0.0018534208896895739 0.0027403346575797425 0.0031581041628497997 0.0019250669095596151 0.0017553527272695774 0.002912743471719791 0.00051881062016005577 0.0041509390442198381 0.0013269250644194269 0.002515913493569724 0.0032034703723998357 0.0015867479873494805 0.0033147417203898185 0.0032343107633697474 0.0016084849715195411 0.00041333437351003248 0.0015982072633194113 0.0014028860576195891 0.0022158183125796393 0.0029487353931697447 0.0028615529172198303 0.0012540566466694289 0.0028261495420197243 0.00017822631116999813 0.0014531231202394163 0.0025906615127396855 0.0036318312786498171 0.002825987395589701 0.0032132990932597881 0.00093148496318973544 0.0022986618991797251 0.0031201742482197584 0.0088757592945090114 0.0019739854059195429 0.0015964743898695729 0.0030620168350797899 0.0017549143672195243 0.0025403744949397296 0.0013998610671793503 0.00050658872377004334 0.0024219329259397276 0.0016578000335194041 0.0014255931402395057 0.0025947821308797258 0.0015455710208097471 0.0019424337106196282 0.0043638276133198444 0.0024791513534598046 0.002471546965979776 0.0032594199180097532 0.00081732890395981583 0.00047798563291005168 0.0026265644132597047 0.0029957660721997665 0.0033466747844698567 0.0030434931783497998 0.0032186603864098446 0.0025580746428896777 0.0074381240438289309 0.0026177068932397522 0.0010374525766094667 0.0018484145568895259 0.0032105816832397539 0.0025588880273796702 0.0011027058149395553 0.00165028316301944 0.0022621210840096185 0.00063843135713010388 0.002677249425599694 0.0011529594838495104 0.0020757956716295806 0.00063164132836008679 0.0012984328854694727 0.0030668599805997697 0.0013209850432293402 0.0017350537225995246 0.0027999960618096992 0.0045968238896799086 0.0015396509469794125 0.0026842448170297231 0.0020969214423495791 0.0032249556936598013 0.00029111348006000424 0.0011551860431694666 0.0031812251568797824" #mn = np.array(map(float, means_str.split(" "))) #mn = np.fromstring(means_str) variances = np.fromstring(variances_str, sep=' ') means_str = "3.8753948237858108e-06 1.2972946111794674e-05 1.2594051521366083e-05 5.0841523278404734e-06 1.8774317409263048e-05 6.2913210996917487e-05 1.269807222669888e-05 3.2193349475262057e-06 6.5226200570272061e-06 1.1473588836338628e-05 2.7180466935587737e-05 1.4762302565458717e-05 3.3722317512532468e-05 6.8216505240041436e-06 7.1028116499628903e-06 6.5493827073439618e-06 3.80367131264172e-06 1.4028847130371071e-05 9.3773632055309283e-06 6.493323349342037e-06 0.0012533506935897218 4.9911335763841195e-06 1.2793399333055094e-05 7.251611930188133e-05 9.5489822043414659e-06 3.8895300628186868e-06 4.173457402556971e-05 0.00011347419063456421 2.5715278760111459e-06 3.2518257183024889e-06 1.1746203655396577e-05 5.564016383146592e-05 3.6296631509353909e-06 4.3289811407316681e-06 1.6025500646546836e-05 8.7246747361516438e-06 4.2410327327645271e-06 4.3732089713098806e-06 5.9073865563619062e-06 2.4944097977347468e-05 2.6986158170267078e-05 0.00019357426874984057 5.1764074423215301e-06 3.5213588425492417e-05 5.6548098935816624e-06 4.9935937088475483e-06 2.3828362907972465e-05 3.521023866293484e-06 4.9870702736337188e-06 2.7658266039366798e-06 1.1424139609302174e-05 4.6380793952958809e-06 8.1857174384998292e-06 1.6642225648910047e-05 1.8268643132929127e-05 1.5473118685259949e-05 9.7616078787441458e-07 4.1097607144367696e-06 5.0459663323074957e-06 8.1752036387080678e-06 6.2517426726346483e-06 0.00021128251533625498 2.4441154311918049e-06 1.0193291769369655e-05 1.6000078417860217e-05 1.3360615760691735e-06 3.9318274983244583e-06 3.7424801978201094e-06 4.5859948912655592e-06 2.1863893895928264e-06 1.4465960374765088e-05 1.226800721873276e-05 1.8464105024954982e-05 1.6648636202068534e-06 3.6936226607579947e-06 6.5624020308052344e-06 8.1339303452353934e-06 9.5047711128428641e-06 1.4246167594118415e-05 6.0140973294197884e-06 1.8256200156735017e-06 1.0903757639504039e-05 5.5080914174679564e-06 5.2142169736994904e-06 2.6292604236996645e-06 4.9623024158512934e-06 1.3171420269231491e-05 5.1064782563443342e-06 2.2201233797532346e-06 3.5523146873797785e-06 4.0447453033151591e-06 3.4393314844283629e-06 1.2283374778942664e-05 1.2292876875817127e-05 1.3500473667799135e-06 1.5982740863426082e-05 5.1149263226338105e-06 3.6545265412690049e-06 4.4324293930103502e-06 1.3464151551507424e-05 8.2607323905827565e-06 5.3487969307959027e-06 7.699747933440781e-06 2.6028092053793074e-05 4.6160336251911396e-06 3.4679078250202434e-06 4.1733322591036512e-06 5.3685295356327671e-06 4.1690461279070458e-05 1.8175584863744415e-05 1.4529974714941822e-06 8.9646541680474962e-06 3.8638936584656166e-06 2.9622882868516527e-06 7.0496709821419259e-06 3.1582263769680431e-06 9.405912339046591e-06 9.0755581225100531e-06 1.6325319116706371e-05 7.4249528783198223e-06 6.4142049677004635e-06 4.5308256388559377e-06 4.3379101302365048e-06 0.00010082767573262403 3.8073220474859233e-06 3.2462395975613701e-06 2.7311928376618711e-06 1.3798802536934602e-05 4.3141812822167945e-06 1.3418830948478911e-06 1.2429912124862659e-05 4.5075176921294976e-06 5.1646366657811792e-05 4.5044907401523191e-06 3.8984503442526084e-06 3.5443432542494581e-06 2.4525978397502771e-06 9.3143290305042167e-07 5.5977615024444758e-06 1.4190797086073543e-05 6.9561233764939789e-06 8.0114861452901582e-06 1.2454920191746878e-06 5.5587154982870272e-06 1.0799672251505274e-05 5.2959102834492533e-06 1.5685688647449261e-06 4.0529428722210623e-06 1.1678512895855624e-05 3.2192802988981066e-06 3.7209970472627806e-05 1.3342539819491425e-05 7.8622903069455567e-06 5.2192321914900928e-06 3.9052134579505441e-06 5.0680769571043553e-06 7.9552828837898563e-06 6.7762118492538826e-06 8.5875102642240075e-06 2.0992545616427373e-05 2.0487505271743291e-05 4.3745997535029968e-06 7.1046977878669946e-06 7.7167495498190023e-07 1.0141932308464567e-05 4.2219873766408028e-06 4.5710190852658248e-06 1.1970402654479661e-06 7.6102614732724262e-06 4.6239298630603015e-06 4.9995946799371758e-05 5.7956634809724437e-06 1.5024720589152287e-06 6.0635032731039673e-06 1.5391627780641011e-05 2.178652052162647e-06 4.2030056647134055e-06 5.0822379579415565e-06 1.9836303495641017e-05 1.8930994307717652e-06 3.0604158961858623e-06 6.5280625603827021e-06 1.0265727137904331e-05 4.0302422231094213e-05 3.7750836192671517e-06 4.0367914908354297e-06 8.0446362665366717e-06 4.7656248380853414e-06 8.6978972436276061e-06 7.9679700766206762e-06 2.9451374286812033e-05 1.3111273739035649e-05 6.6028118897700181e-06 1.4941804584231896e-06 3.9528326512917906e-06 3.847295383196301e-06 3.5756600152130488e-05 6.10565382283349e-06 3.5891435340776665e-05 3.6066217532076844e-06 2.0888559126779404e-06 1.0755002920858641e-06 5.8998610038911923e-06 1.9512692088167549e-06 5.0713400804749472e-06 1.4585512351101608e-05 4.822908984311966e-05 9.8016096252778945e-06 1.9911328814957375e-06 2.4764204976600043e-05 6.4805250037636707e-06 3.7935478658080509e-06 5.1083549212952252e-06 1.1189053457458745e-05 2.5200508287861594e-06 7.8373349366817099e-06 7.9847294470099685e-06 4.3095275213819756e-06 5.0268163315597379e-06 7.4832981742681862e-06 9.2408501776852945e-06 4.784135850487231e-06 5.6532252724841891e-06 5.3930817570733614e-06 1.2687973462442569e-06 1.0372124095824449e-06 4.1435096417718113e-06 3.9981959056867675e-06 4.3520178967986713e-06 1.4659748060826231e-06 5.3366902864809163e-06 3.1416765924193689e-05 2.9749844077512922e-06 3.1381515491784522e-05 4.3260417959669591e-06 4.197030498717592e-06 5.306570430382929e-06 1.8883854746421685e-06 4.1937519548496871e-06 6.9194038197555032e-06 7.1767073252994241e-06 2.4833484439498967e-05 5.8383610252210572e-06 3.6243330253608428e-06 1.18902799300137e-06 3.8963636200265115e-06 1.2883918919165478e-05 1.3605525456692033e-06 1.7407965336936251e-06 0.00016857768522088627 1.4100686994311071e-06 7.1668903489840609e-06 2.7108318215380169e-05 5.4590558436375845e-06 6.2033647867466643e-06 5.6859033955868132e-06 4.3241078188076546e-05 9.0432098151017242e-06 1.0594888618529579e-06 4.0484870451845699e-06 2.9548153849811755e-05 9.410471996079331e-06 2.1009809505791367e-05 1.4939978216919125e-06 2.1026313371938338e-05 2.912760631269843e-06 4.1130865661336849e-06 4.0964425120045752e-06 1.0334704132812778e-05 1.1639088987295558e-05 2.0866544215744135e-05 4.7665503013673322e-06 2.4282885077105844e-06 2.4696946110127049e-05 5.8943453758772547e-06 7.0559765519393299e-06 8.6495232917104309e-06 2.4674527585132413e-05 6.5466440985476235e-06 1.4291488938382783e-05 4.0363838996778781e-06 7.5171096440058871e-06 1.7659216070078882e-06 2.3552682868282767e-05 6.0075484731317116e-06 2.9678689121826856e-05 4.5688281985000224e-06 4.2587818969459276e-06 1.2282850125910679e-05 5.6981633973611215e-06 1.2193919548692016e-05 7.7909581862542261e-06 4.1995999932004883e-06 4.2310001927379966e-06 1.4034983645177226e-06 2.2253775626039904e-06 2.5484625453534006e-06 1.5024773624760737e-06 1.1886813960082901e-06 8.943485028332714e-06 1.802211533446878e-06 8.7804607030574995e-06 1.4714171056899874e-06 3.039182778117474e-05 2.9469599285561173e-05 1.6190782721728404e-05 2.1980748656966054e-06 9.1492500963304843e-06 1.3139192984142854e-05 5.5841754669416901e-06 3.2663084979403296e-06 7.8300182408015622e-06 1.4650747681293603e-06 1.8418132244867557e-06 3.1634249051793445e-06 4.2879811205541378e-06 6.821776038991282e-06 7.2547994800721606e-06 4.5762861000866325e-06 4.0033741553487421e-06 1.3944663969273685e-05 1.5205123797572826e-06 3.7950333845819879e-05 4.6914603422440762e-06 4.3642212832058213e-06 1.6888537402380868e-05 5.3097299301474431e-06 8.5974973592752354e-06 1.0183715675617148e-05 4.4233012671049924e-06 2.8020268713604479e-06 2.4903519176724564e-06 6.0367933560789913e-06 3.6066482258866671e-06 5.5465358638439433e-06 1.6406145480373579e-05 3.6475034103942783e-06 7.545922378704344e-06 1.0510117913470496e-06 6.383917613657175e-06 4.1469930879045612e-06 1.5104979761103841e-05 7.9249357960338965e-06 6.6303162793237734e-06 2.3058946881412919e-06 6.93384276789908e-06 6.2217404008410318e-06 0.00053927751612010478 2.7688222907463807e-06 5.1593082062395665e-06 9.4327080926443393e-06 3.3336519843947502e-06 5.1198323130590842e-06 4.5094438342118166e-06 2.6237274608190453e-06 5.1693775448212788e-06 2.1082108591551617e-06 6.8329929120308474e-06 6.2018823452726071e-06 2.1240415994925091e-05 4.0243827456115514e-06 3.0522891049621393e-05 1.4011920974680818e-05 1.7239640547533074e-05 1.7993086639426091e-05 1.4355334226673438e-06 1.1012319919274514e-06 1.0614538321433708e-05 7.2890435254277739e-06 8.5872764781091643e-06 1.3084966706891505e-05 1.1094006709484758e-05 4.2456925142930984e-06 3.6872244517667462e-05 1.0859154502284048e-05 1.5319903891298572e-06 2.7727900163087534e-06 7.2483213769211959e-06 5.1159362377455894e-06 2.6822480525986132e-06 2.889767166323531e-06 3.55288675821463e-06 1.3380456162305463e-06 5.2278105015869388e-06 2.3031150972921671e-06 4.1508531796520333e-06 1.8528326040776206e-06 6.3815646996712558e-06 6.9338240811962186e-06 2.5793558575700516e-06 4.3737400474318956e-06 6.0837447954729297e-06 4.7903414469400619e-05 2.8013740155544375e-06 4.7622560053896967e-06 3.250652556381526e-06 9.5664014501971676e-06 8.2542503434926804e-07 2.5912870572853299e-06 6.0526418572379129e-06" #variances = np.array(map(float, variances_str.split(" "))) means = np.fromstring(means_str, sep=' ') x_original = np.array(x_original) #x_original -= means #x_original /= variances x_original -= means x_original /= variances #x_original = np.delete(xxxx_original, features_ordered_by_importance2[-1:]) #most_important_features1 = np.delete(x_original, features_ordered_by_importance2[5:]) x = [] def sqr(x): return x * x def sqr3(x): return x * x * x def e_pow(x): return math.exp(x) def me_pow(x): return math.exp(-x) def fred(x): return round(math.fabs(x) * 1000) def extend_x(arr, additions=True, extension=True): if extension: x.extend(arr) if additions: x.append(scipy.std(arr)) x.append(scipy.var(arr)) x.append(sum(arr) / len(arr)) x.append(sum(np.abs(arr)) / len(arr)) x.append(min(arr)) x.append(max(arr)) x.append(scipy.mean(arr)) x.append(scipy.median(arr)) def count_smaller_ratio(arr, delta): return sum(1 if el <= delta else 0 for el in arr) / len(arr) if True: extend_x(x_original) extend_x(np.sqrt(np.abs(x_original))) extend_x(np.abs(x_original)) #rbf_feature = RBFSampler(gamma=0.0025, random_state=2, n_components=20) #zzz = rbf_feature.fit_transform(np.array(x))[0] #extend_x(list(zzz)) if False: extend_x(x_original) extend_x(np.sqrt(np.abs(x_original))) extend_x(np.abs(x_original)) sampler1 = SkewedChi2Sampler(skewedness=0.022, n_components=50, random_state=1) zzz1 = sampler1.fit_transform(np.array(orig))[0] #sampler2 = SkewedChi2Sampler(skewedness=8.5, n_components=50, random_state=1) #zzz2 = sampler2.fit_transform(np.array([i + 1.0 for i in x]))[0] sampler3 = RBFSampler(gamma=0.0025, random_state=2, n_components=20) zzz3 = sampler3.fit_transform(np.array(x))[0] x = [] extend_x(x_original) #extend_x(np.abs(x_original)) #extend_x(np.sqrt(np.abs(x_original))) extend_x(list(zzz1)) #extend_x(list(zzz2)) extend_x(list(zzz3)) if False: #rbf_feature = RBFSampler(gamma=0.0025, random_state=2, n_components=100) #zzz = rbf_feature.fit_transform(np.array(x_original))[0] #extend_x(list(zzz)) pass if False: extend_x(x_original) extend_x(np.sqrt(np.abs(x_original))) extend_x(np.abs(x_original)) #for i in x_original: # print i # # x.append(count_smaller_ratio(x_original, 0.1)) # x.append(count_smaller_ratio(x_original, 0.2)) # x.append(count_smaller_ratio(x_original, 0.3)) # x.append(count_smaller_ratio(x_original, 0.4)) # x.append(count_smaller_ratio(x_original, 0.5)) # x.append(count_smaller_ratio(x_original, 0.6)) # x.append(count_smaller_ratio(x_original, 0.7)) # x.append(count_smaller_ratio(x_original, 0.8)) # x.append(count_smaller_ratio(x_original, 0.9)) # x.append(count_smaller_ratio(x_original, 1.0)) # x.append(count_smaller_ratio(x_original, -0.1)) # x.append(count_smaller_ratio(x_original, -0.2)) # x.append(count_smaller_ratio(x_original, -0.3)) # x.append(count_smaller_ratio(x_original, -0.4)) # x.append(count_smaller_ratio(x_original, -0.5)) # x.append(count_smaller_ratio(x_original, -0.6)) # x.append(count_smaller_ratio(x_original, -0.7)) # x.append(count_smaller_ratio(x_original, -0.8)) # x.append(count_smaller_ratio(x_original, -0.9)) # x.append(count_smaller_ratio(x_original, -1.0)) #x.append(count_smaller_ratio(x_original, 0.01)) #x.append(count_smaller_ratio(x_original, 0.001))for i in x_original: print i #x.append(count_smaller_ratio(x_original, 0.0001)) #x.append(count_smaller_ratio(x_original, 0.00001)) #x.append(count_smaller_ratio(x_original, 0.000001)) #x.append(count_smaller_ratio(x_original, 0.00000000001)) # Do something with most_important_features1 #extend_x(np.expm1(x_original)) #extend_x(np.square(x_original)) #extend_x(map(me_pow, x_original)) #extend_x(np.sqrt(np.sqrt(np.abs(x_original)))) #extend_x((np.sqrt(np.sqrt(orig)) - np.sqrt(np.sqrt(means))) / np.sqrt(np.sqrt(variances))) #extend_x([(-1 if i < 0 else (0 if i == 0 else 1)) for i in x_original]) #x.append(sum([i if i > 0 else 0 for i in x_original]) / len(x_original)) #x.append(sum([i if i < 0 else 0 for i in x_original]) / len(x_original)) #extend_x(np.tanh(x_original)) #extend_x(np.cos(x_original)) #extend_x(map(e_pow, x_original)) #extend_x(np.sqrt()) #extend_x(np.sqrt(np.abs(x_original))) #extend_x((np.sqrt(orig) - np.sqrt(means)) / np.sqrt(variances)) #extend_x(map(e_pow, x_original)) #extend_x(map(sqr, map(e_pow, x_original))) #x.append(sum(np.abs(x_original)) / len(x_original)) #x.append(1.) #x.extend(map(math.sin, x_original)) #x.extend(map(math.sin, map(math.sqrt, x_original))) #extend_x(map(math.sqrt, map(e_pow, x_original))) #extend_x(map(math.sqrt, map(math.sqrt, x_original))) #x.extend(map(fred, x_original)) #x.extend(map(sqr3, x_original)) #x.extend(map(me_pow, x_original)) #x.extend(map(math.log, x_original)) if make_np: return np.array(x) return x