def __init__(self, name, actions): self.debug = True self.actions = actions self.initial_data = np.array([[3, 3.80, 0.40, 1.50], [3.0783744000000004, 1.7999999999999998, 0.04, 2.5], [3.6603792000000004, 5.8500000000000005, 0.08, -1.5], [2.8383936000000003, 5.8500000000000005, 0.04, -3.0], [4.5679104000000015, 5.8500000000000005, 0.04, -2.0], [2.885976, 4.05, 0.04, 1.0]]) self.initial_labels = np.array([[-1.0], [-0.2], [-0.5], [-.25], [0.0], [-2.1]]) self.model_name = str(name) self.observation_samples = np.array([self.sample_states() for obv in range(10000)]) self.featurizer = sklearn.pipeline.FeatureUnion([("rbf1", RBFSampler(gamma=5.0, n_components=100)), ("rbf2", RBFSampler(gamma=2.0, n_components=100)), ("rbf3", RBFSampler(gamma=1.0, n_components=100)), ("rbf4", RBFSampler(gamma=0.5, n_components=100))]) self.featurizer.fit(self.observation_samples) self.feature_scaler = StandardScaler() self.feature_scaler.fit(self.observation_samples) if self.model_name == 'svr': self.model = SVR(kernel='rbf') #self.model.fit(self.initial_data, self.initial_labels) elif self.model_name == 'extra_trees': self.model = ExtraTreesRegressor().fit(self.initial_data, self.initial_labels) elif self.model_name == 'sgd': self.models = {} for a in range(len(self.actions)): model = SGDRegressor(learning_rate="constant") model.partial_fit([ self.featurize_state([3.6603792000000004, 2.8500000000000005, 0.08]) ], [0]) self.models[a] = model #self.model = SGDRegressor(penalty='none') #self.model.fit(self.feature_scaler.transform(self.initial_data), self.initial_labels) else: self.model = None
def __init__(self, env, feature_transformer, learning_rate): self.env = env self.models = [] self.feature_transformer = feature_transformer for i in range(env.action_space.n): model = SGDRegressor(learning_rate=learning_rate) model.partial_fit(feature_transformer.transform( [env.reset()] ), [0]) self.models.append(model)
def __init__(self): # We create a separate model for each action in the environment's # action space. Alternatively we could somehow encode the action # into the features, but this way it's easier to code up. self.models = [] for _ in range(env.action_space.n): model = SGDRegressor(learning_rate="constant") # We need to call partial_fit once to initialize the model # or we get a NotFittedError when trying to make a prediction # This is quite hacky. model.partial_fit([self.featurize_state(env.reset())], [0]) self.models.append(model)
def sgd(X_train, y_train, X_validate, y_validate, X_test, cw, alpha, regression=False): #cw = 2.5 if regression: clf = SGDRegressor(alpha=alpha) else: #clf = SGDClassifier(class_weight = {1:cw}, alpha=alpha) clf = SGDClassifier(class_weight = {1:cw}, alpha=alpha, loss='log') print clf training_data_size = y_train.shape[0] n_iter = 3 mb_size = 100 iter_mb = minibatch_generator(training_data_size, mb_size = mb_size, n_iter = n_iter) total = 0 n_total_batch = n_iter*training_data_size/mb_size t0 = time() recent_auc = [] for n_batch, batch in enumerate(iter_mb): x, y = X_train[batch], y_train[batch] if regression: sw = np.ones(y.shape[0]) sw[np.where(y==1)[0]] = cw clf.partial_fit(x, y, sample_weight=sw) else: clf.partial_fit(x, y, classes = [1, 0]) total += y.shape[0] if (n_batch+1)%1000 == 0: if regression: #y_pred_validate_val = clf.decision_function(X_validate) y_pred_validate_val = clf.predict(X_validate) else: #y_pred_validate_val = clf.decision_function(X_validate) y_pred_validate_val = clf.predict_proba(X_validate)[:,1] print 'auc:%.3f, %d samples in %ds (cw: %.2f)' %(AUC(y_validate, y_pred_validate_val), total, time()-t0, cw) if n_batch>n_total_batch-100: if regression: y_pred_validate_val = clf.predict(X_validate) else: y_pred_validate_val = clf.predict_proba(X_validate)[:,1] recent_auc.append(AUC(y_validate, y_pred_validate_val)) latest_auc_avg = np.mean(recent_auc) print 'cw=%.2f, avg auc of last %d bathes: %.3f' %(cw, len(recent_auc), latest_auc_avg) if regression: return clf.predict(X_test) else: return clf.predict_proba(X_test)[:,1]
def test_multi_target_regression_partial_fit(): X, y = datasets.make_regression(n_targets=3) X_train, y_train = X[:50], y[:50] X_test, y_test = X[50:], y[50:] references = np.zeros_like(y_test) half_index = 25 for n in range(3): sgr = SGDRegressor(random_state=0) sgr.partial_fit(X_train[:half_index], y_train[:half_index, n]) sgr.partial_fit(X_train[half_index:], y_train[half_index:, n]) references[:, n] = sgr.predict(X_test) sgr = MultiOutputRegressor(SGDRegressor(random_state=0)) sgr.partial_fit(X_train[:half_index], y_train[:half_index]) sgr.partial_fit(X_train[half_index:], y_train[half_index:]) y_pred = sgr.predict(X_test) assert_almost_equal(references, y_pred)
batchSeparator += sizeB batches = createBatches(sizeB=1) Model = SGDRegressor(learning_rate='constant', alpha=0, eta0=0.01, shuffle=True) chunks = list(batches) for _ in range(4): random.shuffle(chunks) for X_chunk, y_chunk in chunks: Model.partial_fit( X_chunk, y_chunk) # partially fit the model using the current batch #y_predicted = Model.predict(X) # SkLearn SGD classifier (normal sgd) """n_iter=1000 Model = SGDRegressor(max_iter=n_iter) Model.fit(trainInput, trainOutput)""" yPredicted_SK_SGD = Model.predict(testInput) plt.scatter(testOutput, yPredicted_SK_SGD) plt.grid() plt.xlabel('Actual y') plt.ylabel('Predicted y') plt.title('Scatter plot from actual y and predicted y' ) # must form a diagonal line (best case)
random_state=42, learning_rate='optimal', warm_start=False, average=False ) total_clicks = train['clicks'].sum() k = 2 # number of folds for validaton cwsd = 0 # placeholder for click-weighted squared distance coefs = DataFrame(index=train_d.columns, columns=range(k)) # container for coefficient results from folds n = 0 kf = KFold(n_splits=k, random_state=42, shuffle=True) for train_index, test_index in kf.split(train_d): print n + 1, y_train, y_test = train[outcome_cols].iloc[train_index], train[outcome_cols].iloc[test_index] x_train, x_test = train_d.iloc[train_index, :], train_d.iloc[test_index, :] # do partial_fit to accomodate a large data set for i in xrange(0, x_train.shape[0], 100000): end = i + 100000 if (i + 100000) < x_train.shape[0] else x_train.shape[0] # print x_train.shape[0] - end sgdr.partial_fit(x_train.iloc[i:end, :], train['log_revenue_per_click'].iloc[i:end]) coefs.loc[:, n] = sgdr.coef_ # click weighted squared distance cwsd += ((y_test['revenue_per_click'] - exp(sgdr.predict(x_test) - 1)).pow(2) * y_test['clicks']).sum() n += 1 # divide click-weighted squared distance by total clicks to get average acwsd = cwsd / float(total_clicks)
for i in range(0, 200): #len(stftFilePathList)): tmp = loadmat(stftFilePathList[i]) X_song = np.ndarray.transpose(tmp['X']) tmp = loadmat(pseudoLabelFilePathList[i]) Y_song = np.ndarray.transpose(tmp['HD']) assert (len(X) == len( Y)), 'dimensionality mismatch between STFT and Pseudo-Labels!' ''' ==== Concatenating matrices ''' X = np.concatenate((X, X_song), axis=0) Y = np.concatenate((Y, Y_song), axis=0) ''' ==== Training ''' [y_hh_scaled, dump, dump] = scaleData(Y[:, 0]) [y_bd_scaled, dump, dump] = scaleData(Y[:, 1]) [y_sd_scaled, dump, dump] = scaleData(Y[:, 2]) #y_all = np.concatenate((y_hh_scaled, y_kd_scaled, y_sd_scaled), axis=1) print '==== training ====\n' clf_hh.partial_fit(X, y_hh_scaled) clf_bd.partial_fit(X, y_bd_scaled) clf_sd.partial_fit(X, y_sd_scaled) svrModel = [clf_hh, clf_bd, clf_sd] ''' ==== Save the trained DNN model ''' np.save(savepath, svrModel)
def partial_fit(self, X, y, *args, **kw): X = sp.csr_matrix(X) return SGDRegressor.partial_fit(self, X, y, *args, **kw)
class SGDRegressorMod(LocalModelLinear): def __init__( self, x_explain, chi_explain, y_p_explain, feature_names, target_names, class_index, r, tol_importance=0.001, tol_error=0.001, scale_data=False, save_samples=False, grid_search=False, l1_ratio=0.15, max_iter=10000, tol=0.001, learning_rate="adaptive", eta0=0.001, early_stopping=False, n_iter_no_change=10000, average=False, **kwargs ): super().__init__( x_explain, chi_explain, y_p_explain, feature_names, target_names, class_index, r, tol_importance, tol_error, scale_data, save_samples, ) self.model = SGDRegressor( l1_ratio=l1_ratio, alpha=0.001, max_iter=max_iter, tol=1.0e-3, learning_rate=learning_rate, eta0=eta0, n_iter_no_change=n_iter_no_change, early_stopping=early_stopping, average=average, warm_start=True, **kwargs ) self.grid_search = grid_search def partial_fit(self, x_set, y_set, weight_set=None): super().partial_fit(x_set, y_set, weight_set) self.scaler.partial_fit(x_set) x_set = self.scaler.transform(x_set) if self.grid_search: self.grid_search = False parameters = { "alpha": 10.0 ** (-np.arange(2, 7)), "eta0": 1, "loss": ["squared_loss", "huber", "epsilon_insensitive"], } grid_search = GridSearchCV(model, parameters, n_jobs=-1) grid_search.fit(x_train, y_train) self.model.partial_fit(x_set, y_set, sample_weight=weight_set)
for e in range(n_epochs): for i in range(0, rat_num, batch_size): given = all_V[i:min(i + batch_size, rat_num)] u_mean = user_avg[all_I[i:min(i + batch_size, rat_num)]] m_mean = movie_avg[all_J[i:min(i + batch_size, rat_num)]] times = np.log(R_u_t[all_I[i:min(i + batch_size, rat_num)], all_J[i:min(i + batch_size, rat_num)]]) u_mean = np.array([u_mean]).T m_mean = np.array([m_mean]).T times = times.T preding = np.concatenate((u_mean, m_mean, times), axis=1) lin_model.partial_fit(preding, given) joblib.dump(lin_model, baseline_fitter_filename) print 'incorporating learned deviations' num_rats = len(I) for i in range(0, num_rats, batch_size): u_mean = user_avg[I[i:min(i + batch_size, num_rats)]] m_mean = movie_avg[J[i:min(i + batch_size, num_rats)]] times = np.log(R_u_t[I[i:min(i + batch_size, num_rats)], J[i:min(i + batch_size, num_rats)]]) u_mean = np.array([u_mean]).T m_mean = np.array([m_mean]).T times = times.T V[i:min(i + batch_size, num_rats )] = V[i:min(i + batch_size, num_rats)] - lin_model.predict(
def train(self): if (self.existing_model): model = joblib.load(self.model_file) else: model = SGDRegressor() lowest_err = float('inf') stop_iter = 0 manager = Manager() results = manager.Queue() total_processes = int(self.train_members // (self.chunk_size * self.max_num_processes)) remain_ligands = self.train_members % (self.chunk_size * self.max_num_processes) remain_processes = remain_ligands // (self.chunk_size) final_process_ligands = remain_ligands % self.chunk_size while True: self.shuffle_train_data() jobs = [] process_count = 0 for i in range(self.train_db_index, self.chunk_size * self.max_num_processes, self.chunk_size): p = Process(target=self.next_train_chunk, args=(i, results)) jobs.append(p) p.start() print("starting process: ", process_count) process_count += 1 self.train_db_index += self.chunk_size * self.max_num_processes processing_epoch = True chunk_num = 1 while (chunk_num < total_processes + 1): self.train_receiver = results.get(True) if results.empty() and chunk_num < total_processes - 1: for p in jobs: p.terminate() p.join() print("did we at least finish joining holy f**k") for i in range(self.train_db_index, self.chunk_size * self.max_num_processes, self.chunk_size): print("are we getting to p assignment") p = Process(target=self.next_train_chunk, args=(i, results)) jobs.append(p) print("is this where the deadlock is") p.start() print("starting process: ", process_count) process_count += 1 self.train_db_index += self.chunk_size * self.max_num_processes chunk_num += 1 if chunk_num == total_processes - 1: for i in range(self.train_db_index, self.chunk_size * remain_processes, self.chunk_size): p = Process(target=self.next_train_chunk, args=(i, results)) jobs.append(p) p.start() chunk_num += 1 self.train_db_index = 0 chunk_size = self.train_receiver[1].shape[0] self.train_chunk_index = 0 for batch in tqdm(range(self.train_steps), desc="Training Model " + str(self.id) + " - Epoch " + str(self.epochs + 1)): ligands, labels = self.next_train_batch(chunk_size) model.partial_fit(ligands, labels) print("reached validation") #val_err = self.validate(model) val_err = 5 self.epochs += 1 if (val_err < lowest_err): lowest_err = val_err joblib.dump(model, self.model_file) stop_iter = 0 self.optimal_epochs = self.epochs else: stop_iter += 1 if (stop_iter > self.stop_threshold): print("Finished Training...\n") print("\nValidation Set Error:", lowest_err) return
class SGDPolyDualCartPoleSolver: def __init__(self, n_episodes=1000, max_env_steps=None, gamma=0.9, epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.005, alpha=0.0001, batch_size=32, c=10, monitor=False): self.memory = deque(maxlen=100000) self.env = gym.make('CartPole-v0') if monitor: # whether or not to display video self.env = gym.wrappers.Monitor(self.env, '../data/cartpole-1', force=True) # hyper-parameter setting self.gamma = gamma self.epsilon = epsilon self.epsilon_min = epsilon_min self.epsilon_decay = epsilon_decay self.alpha = alpha self.n_episodes = n_episodes self.batch_size = batch_size self.c = c self.feature_tuning = PolynomialFeatures(interaction_only=True) if max_env_steps is not None: self.env._max_episode_steps = max_env_steps # Init model self.model = SGDRegressor(alpha=self.alpha, learning_rate='optimal', shuffle=False, warm_start=True) # Init dual model self.model2 = SGDRegressor(alpha=self.alpha, learning_rate='optimal', shuffle=False, warm_start=True) # Initialize feature tunning self.feature_tuning.fit( np.reshape(np.hstack((self.env.reset(), 0)), [1, 5])) # Initialize model self.model.partial_fit(self.preprocess_state(self.env.reset(), 0), [0]) # Initialize dual model self.model2.partial_fit(self.preprocess_state(self.env.reset(), 0), [0]) def remember(self, state, action, reward, next_state, done): """In this method, the (s, a, r, s') tuple is stored in the memory""" self.memory.append((state, action, reward, next_state, done)) def choose_action(self, state, epsilon): """Chooses the next action according to the model trained and the policy""" qsa = np.asarray([ self.model.predict(self.preprocess_state(state, a)) for a in range(self.env.action_space.n) ]).flatten() return self.env.action_space.sample() if (np.random.random() <= epsilon) else \ np.argmax(qsa) # exploits the current knowledge if the random number > epsilon, otherwise explores def get_epsilon(self, episode): """Returns an epsilon that decays over time until a minimum epsilon value is reached; in this case the minimum value is returned""" return max(self.epsilon_min, self.epsilon * math.exp(-self.epsilon_decay * episode)) def preprocess_state(self, state, action): """State and action are stacked horizontally and its features are combined as a polynomial to be passed as an input of the approximator""" # poly_state converts the horizontal stack into a combination of its parameters i.e. # [1, s_1, s_2, s_3, s_4, a_1, s_1 s_2, s_1 s_3, ...] poly_state = self.feature_tuning.transform( np.reshape(np.hstack((state, action)), [1, 5])) return poly_state def replay(self, batch_size): """Previously stored (s, a, r, s') tuples are replayed (that is, are added into the model). The size of the tuples added is determined by the batch_size parameter""" x_batch, y_batch = [], [] minibatch = random.sample(self.memory, min(len(self.memory), batch_size)) for state, action, reward, next_state, done in minibatch: # q(s', a) is predicted by model 2 qsa_s_prime = np.asarray([ self.model2.predict(self.preprocess_state(next_state, a)) for a in range(self.env.action_space.n) ]) qsa_s = reward if done \ else reward + self.gamma * np.max(qsa_s_prime) x_batch.append(self.preprocess_state(state, action)[0]) y_batch.append(qsa_s) # the replayed experience is fit into model 1 self.model.partial_fit(np.array(x_batch), np.array(y_batch)) def run(self): """Main loop that controls the execution of the agent""" scores100 = deque(maxlen=100) scores = [] j = 0 # used for model2 update every c steps for e in range(self.n_episodes): state = self.env.reset() done = False i = 0 while not done: action = self.choose_action(state, self.get_epsilon(e)) next_state, reward, done, _ = self.env.step(action) self.remember(state, action, reward, next_state, done) self.replay(self.batch_size) state = next_state i += 1 j += 1 # update second model if j % self.c == 0: self.model2.coef_ = self.model.coef_ self.model2.intercept_ = self.model.intercept_ scores100.append(i) scores.append(i) mean_score = np.mean(scores100) if e % 100 == 0: print( '[Episode {}] - Mean survival time over last 100 episodes was {} ticks.' .format(e, mean_score)) # noinspection PyUnboundLocalVariable print( '[Episode {}] - Mean survival time over last 100 episodes was {} ticks.' .format(e, mean_score)) return scores
priorPrices = seq[index:(index + 8)].reshape(1, -1) targetPrice = np.array(seq[index + 8 + 7]).reshape(1, -1) currentPrice = seq[index + 8] X = (priorPrices - currentPrice) / currentPrice Y = (targetPrice - currentPrice) / currentPrice if hasFit: Yp = regressor.predict(X)[0] actualSign = Y / abs(Y) if Y != 0 else 0 predictedSign = Yp / abs(Yp) if Yp != 0 else 0 score += actualSign * predictedSign if j % 1000 == 0: print score regressor.partial_fit(X, Y) hasFit = True hasFit = False score = 0 for index in range(len(seq) - 356, len(seq) - 8 - 7): priorPrices = seq[index:(index + 8)].reshape(1, -1) targetPrice = np.array(seq[index + 8 + 7]).reshape(1, -1) currentPrice = seq[index + 8] X = (priorPrices - currentPrice) / currentPrice Y = (targetPrice - currentPrice) / currentPrice if hasFit: Yp = regressor.predict(X)[0]
if (1 - epislon) <= np.random.uniform(0, 1): return np.random.choice(env.action_space.n, 1)[0] else: return np.argmax(predict(s)) # In[78]: MAX_EPISODE = 500 EPISLON = 0.5 EPISLON_DECAY = 0.99 GAMMA = 0.95 for _ in range(env.action_space.n): model = SGDRegressor(learning_rate="constant") model.partial_fit([featurelize(env.reset())], [0]) models.append(model) # In[79]: for episode in range(MAX_EPISODE + 1): state = env.reset() EPISLON *= EPISLON_DECAY for t in itertools.count(): #env.render() action = epislon_policy(state, EPISLON) next_state, reward, done, _ = env.step(action) td_error = reward + GAMMA * np.max(predict(next_state)) update(state, action, td_error) print("\rStep {} @ Episode {}/{} ({})".format(t, episode + 1,
knn = KNeighborsRegressor(n_neighbors=5, weights="distance") knn.fit(X_train, y_train) print(knn, knn.score(X_vali, y_vali)) knn_y_pred = knn.predict(X_vali) ''' mlp = MLPRegressor(hidden_layer_sizes=(32,)) for iter in range(1000): mlp.partial_fit(X_train, y_train) print(mlp, mlp.score(X_vali, y_vali)) ''' sgd = SGDRegressor() for iter in range(1000): sgd.partial_fit(X_train, y_train) print(sgd, sgd.score(X_vali, y_vali)) ## Lab TODO: # Mandatory: # - Try some other regression models. # Options: # - Try all the other regression models. # - Research the AirQualityUCI dataset to see what the best approaches are! # - Try at least one, plot a (y_pred, y_actual) scatter plot (e.g., visualize correlation / R**2) # - [Difficult] see the brute-force kNN below, try to refactor the loops out of python. import matplotlib.pyplot as plt
def main(): emails = list( cur.execute("""SELECT ( COALESCE(email_from, '') || ' ' || COALESCE(email_to, '') || ' ' || COALESCE(email_cc, '') || ' ' || COALESCE(email_bcc, '') || ' ' || COALESCE(email_subject, '') || ' ' || COALESCE(email_message, '') ) FROM emails_main WHERE folder_directory IS NOT NULL""")) emails = [item[0] for item in emails] emails_folder = list( cur.execute("""SELECT folder_directory FROM emails_main WHERE folder_directory IS NOT NULL""")) emails_folder = [item[0] for item in emails_folder] emails_tobeprocessed = list( cur.execute("""SELECT ( COALESCE(email_from, '') || ' ' || COALESCE(email_to, '') || ' ' || COALESCE(email_cc, '') || ' ' || COALESCE(email_bcc, '') || ' ' || COALESCE(email_subject, '') || ' ' || COALESCE(email_message, '') ) FROM emails_main WHERE folder_directory IS NULL""")) emails_tobeprocessed = [item[0] for item in emails_tobeprocessed] emails_tobeprocessed_messageid = list( cur.execute("""SELECT message_id FROM emails_main WHERE folder_directory IS NULL""" )) emails_tobeprocessed_messageid = [ item[0] for item in emails_tobeprocessed_messageid ] emails_tobeprocessed_tuple = list( zip(emails_tobeprocessed_messageid, emails_tobeprocessed)) X_train = emails y_train = emails_folder labelencoder = LabelEncoder() labelencoder.fit(emails_folder) labelencoder_dict = dict( zip(labelencoder.classes_, labelencoder.transform(labelencoder.classes_))) y_train = labelencoder.transform(emails_folder) from sklearn.feature_extraction.text import CountVectorizer vectorizer = CountVectorizer() vectorizer.fit(X_train) X_train = vectorizer.transform(X_train) X_predict = [email[1] for email in emails_tobeprocessed_tuple] X_predict = vectorizer.transform(X_predict) # OneClassSVM - to weed out outliers from sklearn.svm import OneClassSVM oneclasssvm = OneClassSVM() oneclasssvm.fit(X_train) oneclass_preds = list(oneclasssvm.predict(X_predict)) # Get indexes of outliers # Outliers = -1, Inliers = 1 outliers_indexes = [i for i, x in enumerate(oneclass_preds) if x == -1] if len(oneclass_preds) != len(outliers_indexes): # Delete the outliers, in reverse order so it doesn't throw off the subsequent indexes for index in sorted(outliers_indexes, reverse=True): del emails_tobeprocessed_tuple[index] # New value for X_predict after deletion of outliers X_predict = [email[1] for email in emails_tobeprocessed_tuple] X_predict = vectorizer.transform(X_predict) if path.exists('supervised_model.pkl') == False: # SGDRegressor Model model = SGDRegressor(warm_start=True) model.partial_fit(X_train, y_train) else: model = joblib.load('supervised_model.pkl') model.partial_fit(X_train, y_train) folder_directory = list(model.predict(X_predict)) folder_directory = list( labelencoder.inverse_transform(folder_directory)) message_id, email = zip(*emails_tobeprocessed_tuple) supervised_temp_df = pd.DataFrame({ 'message_id': message_id, 'folder': folder_directory }) # Create a temporary table to store the results of supervised learning supervised_temp_df.to_sql('supervised_temp', con, if_exists='replace') # Update folder_directory in emails table and delete temporary table for supervised learning cur.executescript("""UPDATE emails_main SET folder_directory = ( SELECT folder FROM supervised_temp WHERE message_id = emails_main.message_id) WHERE emails_main.folder_directory IS NULL; DROP TABLE supervised_temp;""") con.commit() joblib.dump(model, 'supervised_model.pkl')
def create(X, X_column_types, y, y_column_types, arm, **kwargs): categorical_cols = [ c for c, t in zip(X.columns, X_column_types) if t in [DATATYPE_CATEGORY_INT, DATATYPE_CATEGORY_STRING] ] numerical_cols = [ c for c, t in zip(X.columns, X_column_types) if t == DATATYPE_NUMBER ] categorical = X[categorical_cols] numerical = X[numerical_cols] # discritize the numerical features num_discretizer = pd.DataFrame() for i in range(numerical.shape[1]): d_f = pd.DataFrame(pd.cut(numerical.iloc[:, i], 10, labels=False)) d_f2 = pd.DataFrame(pd.cut(numerical.iloc[:, i], 5, labels=False)) d_f3 = pd.DataFrame(pd.cut(numerical.iloc[:, i], 4, labels=False)) d_f4 = pd.DataFrame(pd.cut(numerical.iloc[:, i], 3, labels=False)) num_discretizer = pd.concat([num_discretizer, d_f, d_f2, d_f3, d_f4], axis=1) # function to rename the duplicate columns def df_column_uniquify(df): df_columns = df.columns new_columns = [] for item in df_columns: counter = 0 newitem = item while newitem in new_columns: counter += 1 newitem = "{}_{}".format(item, counter) new_columns.append(newitem) df.columns = new_columns return df num_discretizer = df_column_uniquify(num_discretizer) # Categorical features encoding cat_list = [] for i in range(categorical.shape[1]): if (len(categorical.iloc[:, i].unique()) >= 2): cat_list.append(categorical.keys()[i]) categorical = categorical[cat_list] # One hot encode the categorical_features #Data_cat = pd.get_dummies(categorical) enc = OneHotEncoder() enc.fit(categorical) Data_cat = pd.DataFrame(enc.transform(categorical).toarray()) original_feats = pd.concat([numerical, Data_cat], axis=1) num_discret = pd.concat([numerical, Data_cat, num_discretizer], axis=1) #Select the best half of discretized features by Mini batch gradient descent #clf = SGDClassifier(loss="log", penalty="l1") mini_batches = [] batch_size = 32 data = np.hstack((num_discret, (y.values).reshape(-1, 1))) #data =pd.concat([num_discretizer, y], axis=1) np.random.shuffle(data) n_minibatches = data.shape[0] // batch_size i = 0 for i in range(n_minibatches + 1): mini_batch = data[i * batch_size:(i + 1) * batch_size, :] X_mini = mini_batch[:, :-1] Y_mini = mini_batch[:, -1].reshape((-1, 1)) mini_batches.append((X_mini, Y_mini)) if data.shape[0] % batch_size != 0: mini_batch = data[i * batch_size:data.shape[0]] X_mini = mini_batch[:, :-1] Y_mini = mini_batch[:, -1].reshape((-1, 1)) mini_batches.append((X_mini, Y_mini)) if (y_column_types[0] == DATATYPE_NUMBER): model = SGDRegressor(loss="squared_loss", penalty="l1") for X_mini, Y_mini in mini_batches: model.partial_fit(X_mini, Y_mini) coefs = model.coef_ else: model = SGDClassifier(loss="log", penalty="l1") for X_mini, Y_mini in mini_batches: model.partial_fit(X_mini, Y_mini, classes=np.unique(y)) coefs = model.coef_[0] num = len(numerical.columns) + len(Data_cat.columns) #coefs=model.coef_ h = np.argsort(coefs[num:])[::-1][:int(num_discretizer.shape[1] / 2)] best_half_sorted = [x + num for x in h] best_dicretized = num_discret.iloc[:, best_half_sorted] total = pd.concat([categorical, best_dicretized], axis=1) # one hot encode the interger discretized features enc = OneHotEncoder() enc.fit(best_dicretized) dicretized_ohe = pd.DataFrame(enc.transform(best_dicretized).toarray()) # combine cat_ohe and disretized_ohe features Data = pd.concat([Data_cat, dicretized_ohe], axis=1) # Rename the features which has duplicates Data = df_column_uniquify(Data) second_order = pd.DataFrame() final_feats = pd.DataFrame() cnt = 0 cnt_1 = 0 for i in range(len(total.columns) - 1): a = Data.iloc[:, [ o for o in range(cnt, cnt + len(total.iloc[:, i].unique())) ]] cnt = cnt + len(total.iloc[:, i].unique()) cnt_1 = cnt for j in range(i + 1, len(total.columns)): b = Data.iloc[:, [ p for p in range(cnt_1, cnt_1 + len(total.iloc[:, j].unique())) ]] cnt_1 = cnt_1 + len(total.iloc[:, j].unique()) first = pd.DataFrame() for k in range(a.shape[0]): c = a.iloc[[k]].values d = b.iloc[[k]].values result = np.outer(c, d).ravel() first = first.append(pd.Series(result), ignore_index=True) second_order = pd.concat([second_order, first], axis=1) second_order = df_column_uniquify(second_order) firstorder_select = pd.concat([original_feats, second_order], axis=1) # slect the second order features using Logistic regression #clf = SGDClassifier(loss="log", penalty="l1") mini_batches = [] batch_size = 32 data = np.hstack((firstorder_select, (y.values).reshape(-1, 1))) #data = pd.concat([second_order, y], axis=1) np.random.shuffle(data) n_minibatches = data.shape[0] // batch_size i = 0 for i in range(n_minibatches + 1): mini_batch = data[i * batch_size:(i + 1) * batch_size, :] X_mini = mini_batch[:, :-1] Y_mini = mini_batch[:, -1].reshape((-1, 1)) mini_batches.append((X_mini, Y_mini)) if data.shape[0] % batch_size != 0: mini_batch = data[i * batch_size:data.shape[0]] X_mini = mini_batch[:, :-1] Y_mini = mini_batch[:, -1].reshape((-1, 1)) mini_batches.append((X_mini, Y_mini)) #create_mini_batches(gen_feats, y, 32) if (y_column_types[0] == DATATYPE_NUMBER): model = SGDRegressor(loss="squared_loss", penalty="l1") for X_mini, Y_mini in mini_batches: model.partial_fit(X_mini, Y_mini) coefs = model.coef_ else: model = SGDClassifier(loss="log", penalty="l1") for X_mini, Y_mini in mini_batches: model.partial_fit(X_mini, Y_mini, classes=np.unique(y)) coefs = model.coef_[0] num1 = len(original_feats.columns) #selected top 10 features g = np.argsort(coefs[num1:])[::-1][:10] selected_sorted = [x + num1 for x in g] selected_best = firstorder_select.iloc[:, selected_sorted] selected = selected_best.copy() new_col_types = X_column_types + [DATATYPE_CATEGORY_INT] * len( selected_best.columns) total_feats = pd.concat([original_feats, selected_best], axis=1) final_feats = pd.concat([X, selected_best], axis=1) # higher order features generation if len(categorical.columns) > 2: for i in range(len(categorical.columns) - 2): cnt = 0 Higher_order = pd.DataFrame() for i in range(len(total.columns)): a = Data.iloc[:, [ o for o in range(cnt, cnt + len(total.iloc[:, i].unique())) ]] cnt = cnt + len(total.iloc[:, i].unique()) for j in range(selected_best.shape[1]): b = selected_best.iloc[:, j] second = pd.DataFrame() for k in range(a.shape[0]): c = a.iloc[[k]].values d = b.iloc[[k]].values result_1 = np.outer(c, d).ravel() second = second.append(pd.Series(result_1), ignore_index=True) Higher_order = pd.concat([Higher_order, second], axis=1) Higher_order = df_column_uniquify(Higher_order) High_order_sel = pd.concat([total_feats, Higher_order], axis=1) mini_batches = [] batch_size = 32 data = np.hstack((High_order_sel, (y.values).reshape(-1, 1))) #data = pd.concat([Higher_order, y], axis=1) np.random.shuffle(data) n_minibatches = data.shape[0] // batch_size i = 0 for i in range(n_minibatches + 1): mini_batch = data[i * batch_size:(i + 1) * batch_size, :] X_mini = mini_batch[:, :-1] Y_mini = mini_batch[:, -1].reshape((-1, 1)) mini_batches.append((X_mini, Y_mini)) if data.shape[0] % batch_size != 0: mini_batch = data[i * batch_size:data.shape[0]] X_mini = mini_batch[:, :-1] Y_mini = mini_batch[:, -1].reshape((-1, 1)) mini_batches.append((X_mini, Y_mini)) #create_mini_batches(gen_feats, y, 32) if (y_column_types[0] == DATATYPE_NUMBER): model = SGDRegressor(loss="squared_loss", penalty="l1") for X_mini, Y_mini in mini_batches: model.partial_fit(X_mini, Y_mini) coefs = model.coef_ else: model = SGDClassifier(loss="log", penalty="l1") for X_mini, Y_mini in mini_batches: model.partial_fit(X_mini, Y_mini, classes=np.unique(y)) coefs = model.coef_[0] #coefs=model.coef_ num2 = len(total_feats.columns) sort = np.argsort(coefs[num2:])[::-1][:5] selected_sorted = [x + num2 for x in sort] selected_best = High_order_sel.iloc[:, selected_sorted] selected = pd.concat([selected, selected_best], axis=1) total_feats = pd.concat([total_feats, selected_best], axis=1) final_feats = pd.concat([final_feats, selected_best], axis=1) transformed_X = final_feats new_col_types = X_column_types + [DATATYPE_CATEGORY_INT] * len( selected.columns) else: transformed_X = final_feats return None, transformed_X, new_col_types
class epsilonGreedyContextualBandit(object): def __init__(self, epsilon=0.1, fit_intercept=True, penalty='l2', ips=True, learning_rate=0.01, n_features=1024, mode='online', batch_size=128, burn_in=1000): self.config = { 'epsilon': epsilon, 'fit_intercept': fit_intercept, 'penalty': penalty, 'learning_rate': learning_rate, 'mode': mode, 'batch_size': batch_size, 'ips': ips, 'burn_in': burn_in } self.arms = {} self.n_arms = 0 self.n_features = n_features self.vectorizer = HashingVectorizer(self.n_features) self.batch = [] self.batch_counter = 0 self.epoch = 0 self.model = SGDRegressor(fit_intercept=self.config['fit_intercept'], penalty=self.config['penalty'], max_iter=1, eta0=self.config['learning_rate'], learning_rate='constant', tol=None) def _explode_features(self, context, choice, return_array=True): prefixed_words = [choice + '_' + w for w in context.split(' ')] context = ' '.join(prefixed_words) if return_array: return [context] else: return context def _explode_features_batch(self, context, choices): exploded_contexts = [] for c in choices: prefixed_words = [c + '_' + w for w in context.split(' ')] exploded_features = ' '.join(prefixed_words) exploded_contexts.append(exploded_features) return exploded_contexts def _weight(self, reward, prob): if self.config['ips']: return self._ips_weight(reward, prob) else: return -reward def _ips_weight(self, reward, prob): return (-reward) / prob def _prob_dist(self, n, opt_idx, randomise=False): epsilon = self.config['epsilon'] dist = np.full(n, epsilon / n) if randomise: opt_idx = numpy.random.randint(0, n) dist[opt_idx] = (1 - epsilon) + (epsilon / n) return dist def _get_prob(self, n, choice, opt_choice): epsilon = self.config['epsilon'] if choice == opt_choice: return (1 - epsilon) + (epsilon / n) else: return epsilon / n def select_arm(self, context, choices): self.epoch += 1 contexts = self._explode_features_batch(context, choices) contexts = self.vectorizer.fit_transform(contexts) try: predictions = self.model.predict(contexts) opt_idx = np.argmin(predictions) except NotFittedError: predictions = [] opt_idx = 0 choice = np.random.choice(choices, p=self._prob_dist(len(choices), opt_idx)) decision = base64.b64encode( json.dumps({ 'choices': choices, 'choice': choice, 'prob': self._get_prob(len(choices), choice, choices[opt_idx]) }).encode()) return (choice, predictions, decision) def reward(self, context, reward, decision): decision = json.loads(base64.b64decode(decision)) choice = decision['choice'] choices = decision['choices'] choices.remove(choice) cost = self._weight(reward, decision['prob']) if self.config['mode'] == 'online': exploded_context = self._explode_features(context, choice) self.model.partial_fit( self.vectorizer.fit_transform(exploded_context), [cost]) if self.config['ips']: exploded_contexts = self._explode_features_batch( context, choices) self.model.partial_fit( self.vectorizer.fit_transform(exploded_contexts), np.full(len(choices), 0)) else: exploded_context = self._explode_features(context, choice, return_array=False) self.batch.append((exploded_context, cost)) if self.config['ips']: exploded_contexts = self._explode_features_batch( context, choices) for item in exploded_contexts: self.batch.append((item, 0)) self.batch_counter += 1 if self.batch_counter == self.config['batch_size']: self._batch_reward(self.batch) self.batch = [] self.batch_counter = 0 def _batch_reward(self, batch): contexts, costs = map(list, zip(*batch)) self.model.partial_fit(self.vectorizer.fit_transform(contexts), costs) def reset(self): self.__init__(epsilon=self.config['epsilon'], fit_intercept=self.config['fit_intercept'], penalty=self.config['penalty'], learning_rate=self.config['learning_rate'], n_features=self.n_features, mode=self.config['mode'], batch_size=self.config['batch_size'], ips=self.config['ips'], burn_in=self.config['burn_in'])
action_space = 4 state_space = 8 # Return scaled and featurized state def preprocess(state): return featurizer.transform(scaler.transform([state])) # Keep a separate Q function for each action # (implementation detail) q_functions = [] for a in range(action_space): m = SGDRegressor(learning_rate="constant") m.partial_fit(preprocess(np.zeros(state_space)), [0]) q_functions.append(m) # Return an estimation of Q(s, a) def get_q_estimation(state, action): preprocessed = preprocess(state) return q_functions[action].predict(preprocessed)[0] # Perform an SGD step to bring Q(s, a) closer to the given value def update_estimation(state, action, value): preprocessed = preprocess(state) q_functions[action].partial_fit(preprocessed, [value])
def main(): updates_file = '/scratch/cluster/aish/rl_for_oal/linear/updates.txt' with open(updates_file) as handle: updates = list() for line in handle: line = re.sub('array\(\[', '', line.strip()) line = re.sub('\]\)', '', line.strip()) updates.append(ast.literal_eval(line.strip())) features = [update['feature'] for update in updates] targets = [update['target'] for update in updates] for i in range(len(targets)): if targets[i] == 100: targets[i] = 400 # if targets[i] == -100: # targets[i] = -1000 feature_names = \ [ 'Min kappa in current predicates', 'Max kappa in current predicates', 'Second max kappa in current predicates', 'Avg kappa in current predicates', 'Positive score (normalized) of top region', 'Positive score (normalized) of second best region', 'Avg positive score (normalized) of regions', 'Decision of top classifier for top region', 'Decision of second best classifier for top region', 'Decision of top classifier for second best region', 'Decision of second best classifier for second best region', 'Avg decision of top classifier', 'Avg decision of second best classifier', 'Evaluating score of make_guess (0-1)', 'Evaluating score of ask_positive_example (0-1)', 'Question is on-topic (0-1)', 'Predicate has a classifier (0-1)', 'Margin of object', 'Density of object', 'Fraction of k nearest neighbours of the object which are unlabelled', 'Prev kappa of classifier of predicate', 'Frequency of use of the predicate - normalized', 'Number of system turns used - normalized', 'Fraction of previous dialogs using this predicate that have succeeded' ] # Feature 13 is 0-1 for make guess print 'Selected:', feature_names[13], feature_names[22] init_weights = np.zeros(len(feature_names)) init_weights[13] = init_weights[22] = 1.0 random_features = np.random.randn(10, len(feature_names)) random_targets = np.random.randn(10) features_np = np.array(features) features_min = np.amin(features_np, axis=0) features_max = np.amax(features_np, axis=0) for (idx, feature_name) in enumerate(feature_names): print feature_name, ':', features_min[idx], '-', features_max[idx] print 'Target :', min(targets), '-', max(targets) x = raw_input() indices_evaluating_guess = [idx for idx in range(len(features)) if features[idx][13] == 1] print 'Num indices evaluating make guess =', len(indices_evaluating_guess) x = raw_input() num_batches = 100 batch_size = len(features) / num_batches classifier = SGDRegressor() # classifier.partial_fit(random_features, random_targets) # classifier.coef_ = init_weights for batch_num in range(num_batches): batch_features = features[batch_num * batch_size: (batch_num + 1) * batch_size] batch_targets = targets[batch_num * batch_size: (batch_num + 1) * batch_size] batch_features_evaluating_guess = [batch_features[idx - (batch_num * batch_size)] for idx in indices_evaluating_guess if idx >= batch_num * batch_size and idx < (batch_num + 1) * batch_size] batch_targets_evaluating_guess = [batch_targets[idx - (batch_num * batch_size)] for idx in indices_evaluating_guess if idx >= batch_num * batch_size and idx < (batch_num + 1) * batch_size] if batch_num > 0: preds = classifier.predict(batch_features) mse = calc_mse(preds, batch_targets) print 'Batch', batch_num, ', test mse =', mse print 'preds :', preds[:5] print 'targets :', batch_targets[:5] print preds = classifier.predict(batch_features_evaluating_guess) mse = calc_mse(preds, batch_targets_evaluating_guess) print 'Batch', batch_num, ', test guess mse =', mse print print 'preds :', preds[:5] print 'targets :', batch_targets_evaluating_guess[:5] print # x = raw_input() classifier.partial_fit(batch_features, batch_targets) preds = classifier.predict(batch_features) mse = calc_mse(preds, batch_targets) print 'Batch', batch_num, ', train mse =', mse preds = classifier.predict(batch_features_evaluating_guess) mse = calc_mse(preds, batch_targets_evaluating_guess) print 'Batch', batch_num, ', train guess mse =', mse
yield x_points yield y_points batch_num = int(len(y) / size) n_itr = 1000 x_batch, y_batch = create_batch(X, y, size) #Creating SGD mb_sgd = SGDRegressor(eta0=0.001, random_state=15, tol=1e-2) for ep in range(n_itr): batch_indexer = 0 for i in range(1, batch_num + 1): mb_sgd.partial_fit( x_batch[batch_indexer:i * size].reshape(-1, 1) / min(X), y_batch[batch_indexer:i * size] / min(y)) batch_indexer += size #Getting the coefficients and intercepts pre_coef = mb_sgd.coef_ pre_inter = mb_sgd.intercept_ * min(y) print(' COEF :: ', pre_coef) print(' INTERCEPT :: ', pre_inter) y_pred = mb_sgd.predict(X / min(X)) y_regressor = pre_inter + pre_coef * X r2 = met.r2_score(y / min(y), y_pred) rmse = calc_rmse(y, y_regressor)
class Model(object): def __init__(self, params): self.model_class = params['class'] self.model = {} self.feature_constructor = None self.all_possible_decisions = [] self.X = [] self.y = [] self.buffer = 0 def initialize(self): if self.model_class == 'scikit': self.model = SGDRegressor(loss='squared_loss', alpha=0.1, n_iter=10, shuffle=True, eta0=0.0001) self.feature_constructor = FeatureHasher(n_features=200, dtype=np.float64, non_negative=False, input_type='dict') elif self.model_class == 'lookup': self.model = {} def clean_buffer(self): self.X = [] self.y = [] self.buffer = 0 def return_design_matrix(self, all_decision_states, reward=None): if self.model_class == 'lookup_table': return all_decision_states, reward elif self.model_class == 'scikit': X, y = [], [] for decision_state in all_decision_states: information, decision_taken = decision_state tr = {} tr['-'.join([str(information[1]), decision_taken])] = 1 tr['-'.join([str(information[0]), decision_taken])] = 1 tr['-'.join( [str(information[0]), str(information[1]), decision_taken])] = 1 X.append(tr) y.extend([reward]) X = self.feature_constructor.transform(X).toarray() return X, y def fit(self, X, y): if self.model_class == 'scikit': # X, y = self.shuffle_data(X, y) self.model.partial_fit(X, y) print self.model.score(X, y) if self.model_class == 'lookup_table': for decision_state in X: if decision_state not in self.model: for d in self.all_possible_decisions: self.model[(decision_state[0], d)] = DecisionState() self.model[decision_state].count += 1 updated_value = self.model[decision_state].value_estimate + ( 1.0 / self.model[decision_state].count) * ( y - self.model[decision_state].value_estimate) self.model[decision_state].value_estimate = updated_value def predict(self, X): if self.model_class == 'scikit': return self.model.predict(X) if self.model_class == 'lookup_table': if X not in self.model: for d in self.all_possible_decisions: self.model[(X[0], d)] = DecisionState() return self.model[X].value_estimate @staticmethod def shuffle_data(a, b): assert len(a) == len(b) p = np.random.permutation(len(a)) return a[p], b[p]
def __init__(self): self.models = [] for _ in range(env.action_space.n): model = SGDRegressor(learning_rate="constant") model.partial_fit([self.feature_state(env.reset())], [0]) self.models.append(model)
clf = SGDRegressor(penalty='l1', alpha=i, eta0=.00000001, l1_ratio=1, random_state=0) for j in range(epochs): for k in range(num_steps): if k != num_steps - 1: batch_x = transformer.transform( relData[k * batch_size:(k + 1) * batch_size, :]) batch_y = target[k * batch_size:(k + 1) * batch_size] else: batch_x = transformer.transform( relData[k * batch_size:relData.shape[0], :]) batch_y = target[k * batch_size:target.shape[0]] clf.partial_fit(batch_x, batch_y) f = open("./output.txt", "a") f.write(str(j) + "\n") f.close() pred = clf.predict(batch_x) mse = mean_squared_error(batch_y, pred) print("The MSE of the last batch was: " + str(mse) + " for epoch " + str(j + 1) + " was " + str(mse)) #-------------- #Print model evaluation statistics #-------------- #see how many of the coefficients of the model were zero, and how many were close to zero: params = clf.coef_ numZero = 0 total = 0
def epsilonGreedyPolicy(epsilon, nA): def policy_fn(observation): A = np.ones(nA, dtype=float) * epsilon / nA q_values = predict(observation) best_action = np.argmax(q_values) A[best_action] += (1.0 - epsilon) return A return policy_fn models = [] for _ in range(env.action_space.n): model = SGDRegressor(learning_rate="constant") model.partial_fit( [approximate.transform(scaler.transform([env.reset()]))[0]], [0]) models.append(model) graph = itersGraph(iter_lengths=np.zeros(iters), iter_rewards=np.zeros(iters)) # Q-learning for i in range(iters): policy = epsilonGreedyPolicy(epsilon * epsilon_decay**i, env.action_space.n) last_reward = graph.iter_rewards[i - 1] sys.stdout.flush() state = env.reset()
class Model(object): def __init__(self, params): self.model_class = params['class'] self.model = {} self.feature_constructor = None self.all_possible_decisions = [] self.X = [] self.y = [] self.buffer = 0 self.base_folder_name = params['base_folder_name'] self.design_matrix_cache = {} self.exists = False def finish(self): "Let's pickle only if we are running vw" if self.model_class == 'vw_python': # Want python object for later use self.X = [ex.finish() for ex in self.X] self.model.finish() self.X = None self.y = None self.model = None self.design_matrix_cache = {} with open(self.base_folder_name + '/model_obs.pkl', mode='wb') as model_file: pickle.dump(self, model_file) def initialize(self): if self.model_class == 'scikit': self.model = SGDRegressor(loss='squared_loss', alpha=0.1, n_iter=10, shuffle=True, eta0=0.0001) self.feature_constructor = FeatureHasher(n_features=200, dtype=np.float64, non_negative=False, input_type='dict') elif self.model_class == 'lookup': self.model = {} # This thing crawls,, too much python overhead for subprocess and pipe elif self.model_class == 'vw': self.model = None self.model_path = self.base_folder_name + "/model.vw" self.cache_path = self.base_folder_name + "/temp.cache" self.f1 = open(self.base_folder_name + "/train.vw", 'a') self.train_vw_cmd = [ '/usr/local/bin/vw', '--save_resume', '--holdout_off', '-c', '--cache_file', self.cache_path, '-f', self.model_path, '--passes', '20', '--loss_function', 'squared' ] self.train_vw_resume_cmd = [ '/usr/local/bin/vw', '--save_resume', '-i', self.model_path, '-f', self.model_path ] # self.remove_vw_files() elif self.model_class == 'vw_python': # TODO interactions, lrq, dropout etc commands go here # TODO Need to pass model path and throw finish somewhere to store the final model self.model_path = self.base_folder_name + "/model.vw" self.cache_path = self.base_folder_name + "/temp.cache" #self.f1 = open(self.base_folder_name + "/train.vw", 'a') self.model = pyvw.vw(quiet=True, l2=0.00000001, loss_function='squared', passes=1, holdout_off=True, cache=self.cache_path, f=self.model_path, lrq='sdsd7', lrqdropout=True) def remove_vw_files(self): if os.path.isfile(self.cache_path): os.remove(self.cache_path) if os.path.isfile(self.f1): os.remove(self.f1) if os.path.isfile(self.model_path): os.remove(self.model_path) # def if_model_exists(self): # exists = False # if self.model_class == 'lookup_table': # if self.model: # self.exists = True # # elif self.model_class == 'scikit': # if hasattr(self.model, 'intercept_'): # self.exists = True # # elif self.model_class == 'vw': # if os.path.isfile(self.model_path): # self.exists = True # # elif self.model_class == 'vw_python': # return self.exists # # return exists def clean_buffer(self): self.X = [] self.y = [] self.buffer = 0 # TODO Store design matrix in cache so we don't have to compute it all the time # @do_profile def return_design_matrix(self, decision_state, reward=None): """ Design matrix can simply return catesian product of state and decision For now all categorical features """ # TODO Kill game specific features if self.model_class == 'lookup_table': return decision_state, reward else: train_test_mode = 'train' if reward else 'test' cache_key = (decision_state, train_test_mode) if cache_key in self.design_matrix_cache: fv, reward = self.design_matrix_cache[cache_key] else: state, decision_taken = decision_state # Decision pixel tuple is our design matrix # TODO Do interaction via vw namespaces may be? # Right now features are simply state X decision interaction + single interaction feature representing state try: _ = len(state[0]) all_features = [ 'feature' + str(idx) + '-' + '-'.join(str(x) for x in obs) + '-' + decision_taken for idx, obs in enumerate(state) ] # Hmm design matrix for blackjack is different except TypeError: all_features = [ '-'.join([i, str(j), decision_taken]) for i, j in zip(state._fields, state) ] tag = '_'.join(all_features) all_features_with_interaction = all_features + [tag] if self.model_class == 'scikit': tr = { fea_value: 1 for fea_value in all_features_with_interaction } fv = self.feature_constructor.transform([tr]).toarray() fv = fv[0] elif self.model_class == 'vw' or self.model_class == 'vw_python': input = " ".join(all_features_with_interaction) if reward: output = str(reward) #+ " " + tag fv = output + " |sd " + input + '\n' #self.f1.write(fv) else: fv = " |sd " + input + '\n' if self.model_class == 'vw_python': fv = self.model.example(fv) # Store only training examples # TODO: pyvw for blackjack is somehow still screwed up for cache # TODO Something is messed here NEED TO FIX HOw COME ONLY blackjack fails? if 'hit' not in self.all_possible_decisions: self.design_matrix_cache[cache_key] = (fv, reward) return fv, reward #@do_profile def fit(self, X, y): if self.model_class == 'scikit': # X, y = self.shuffle_data(X, y) self.model.partial_fit(X, y) print self.model.score(X, y) self.exists = True elif self.model_class == 'lookup_table': for decision_state in X: if decision_state not in self.model: for d in self.all_possible_decisions: self.model[(decision_state[0], d)] = bandit.DecisionState() self.model[decision_state].count += 1 updated_value = self.model[decision_state].value_estimate + ( 1.0 / self.model[decision_state].count) * ( y - self.model[decision_state].value_estimate) self.model[decision_state].value_estimate = updated_value self.exists = True elif self.model_class == 'vw': # if model file exists do --save resume # http://stackoverflow.com/questions/13835055/python-subprocess-check-output-much-slower-then-call with NamedTemporaryFile() as f: cmd = self.train_vw_resume_cmd if os.path.isfile( self.model_path) else self.train_vw_cmd p = Popen(cmd, stdout=f, stdin=PIPE, stderr=STDOUT) tr = '\n'.join(X) res = p.communicate(tr) f.seek(0) res = f.read() print res self.exists = True elif self.model_class == 'vw_python': # TODO create example and fit # TODO Remember X is list of examples (not a single example) SO How to go about that? # TODO Or just use Scott's awesome scikit learn interface # vw = pyvw.vw(quiet=True, lrq='aa7', lrqdropout=True, l2=0.01) # Let's use vw as good'old sgd solver for _ in xrange(10): # May be shuffling not necessary here #random.shuffle(X) res = [fv.learn() for fv in X] self.exists = True #print 'done' # @do_profile def predict(self, test): if self.model_class == 'scikit': test = test.reshape(1, -1) # Reshape for single sample return self.model.predict(test)[0] elif self.model_class == 'lookup_table': if test not in self.model: for d in self.all_possible_decisions: self.model[(test[0], d)] = bandit.DecisionState() return self.model[test].value_estimate elif self.model_class == 'vw': with NamedTemporaryFile() as f: cmd = [ '/usr/local/bin/vw', '-t', '-i', self.model_path, '-p', '/dev/stdout', '--quiet' ] p = Popen(cmd, stdout=f, stdin=PIPE, stderr=STDOUT) res = p.communicate(test) f.seek(0) res = f.readline().strip() return float(res) elif self.model_class == 'vw_python': # TODO Create example and fit test.learn( ) # Little wierd that we have to call learn at all for a prediction res = test.get_simplelabel_prediction() return res @staticmethod def shuffle_data(a, b): assert len(a) == len(b) p = np.random.permutation(len(a)) return a[p], b[p]
class Model(object): def __init__(self, params): self.model_class = params['class'] self.model = {} self.feature_constructor = None self.all_possible_decisions = [] self.X = [] self.y = [] self.buffer = 0 def initialize(self): if self.model_class == 'scikit': self.model = SGDRegressor(loss='squared_loss', alpha=0.1, n_iter=10, shuffle=True, eta0=0.0001) self.feature_constructor = FeatureHasher(n_features=200, dtype=np.float64, non_negative=False, input_type='dict') elif self.model_class == 'lookup': self.model = {} def clean_buffer(self): self.X = [] self.y = [] self.buffer = 0 def return_design_matrix(self, all_decision_states, reward=None): if self.model_class == 'lookup_table': return all_decision_states, reward elif self.model_class == 'scikit': X, y = [], [] for decision_state in all_decision_states: information, decision_taken = decision_state tr = {} tr['-'.join([str(information[1]), decision_taken])] = 1 tr['-'.join([str(information[0]), decision_taken])] = 1 tr['-'.join([str(information[0]), str(information[1]), decision_taken])] = 1 X.append(tr) y.extend([reward]) X = self.feature_constructor.transform(X).toarray() return X, y def fit(self, X, y): if self.model_class == 'scikit': # X, y = self.shuffle_data(X, y) self.model.partial_fit(X, y) print self.model.score(X, y) if self.model_class == 'lookup_table': for decision_state in X: if decision_state not in self.model: for d in self.all_possible_decisions: self.model[(decision_state[0], d)] = DecisionState() self.model[decision_state].count += 1 updated_value = self.model[decision_state].value_estimate + (1.0 / self.model[decision_state].count) * ( y - self.model[decision_state].value_estimate) self.model[decision_state].value_estimate = updated_value def predict(self, X): if self.model_class == 'scikit': return self.model.predict(X) if self.model_class == 'lookup_table': if X not in self.model: for d in self.all_possible_decisions: self.model[(X[0], d)] = DecisionState() return self.model[X].value_estimate @staticmethod def shuffle_data(a, b): assert len(a) == len(b) p = np.random.permutation(len(a)) return a[p], b[p]
class Defender: ''' Makes Q estimates based on entire state of the network. ''' def __init__(self, network, linear=False, learning="sarsa", feature_set="full", defend=True): self.attacker_node = network.start_node self.linear = linear self.Qnet = None self.feature_set = feature_set self.reward = 0 self.network = network self.f = 0 self.defense_val = 1 if not defend: self.defense_val = 0 if self.feature_set == 'full': self.perc_act_size = network.n_nodes * network.v_per_node #full security of network if self.feature_set == 'slim': self.perc_act_size = network.n_nodes # security level of vulnerability per node self.learning = learning if not linear: inputs = Input(shape=(self.perc_act_size, )) h1 = Dense(units=10, activation='relu')(inputs) y = Dense(units=1, name='output', activation='linear')(h1) self.Qnet = tf.keras.Model(inputs=inputs, outputs=y) self.Qnet.compile(loss='mse', optimizer='adam', metrics=['mse']) init_x = np.random.random(size=self.perc_act_size).reshape(1, -1) init_y = np.random.random(1).reshape(-1, 1) self.Qnet.fit(init_x, init_y, epochs=1, verbose=0) else: self.Qnet = SGDRegressor(loss='squared_loss', max_iter=5, learning_rate='constant', eta0=0.1) self.Qnet.partial_fit( np.random.random(size=self.perc_act_size).reshape(1, -1) * 10, np.random.random(size=1).reshape(-1) * 10) self.Qnet.coef_ = np.zeros(self.perc_act_size) self.Qnet.intercept_ = np.zeros(1) return def reset(self): self.attacker_node = self.network.start_node def observe(self, network, node=None): if self.feature_set == "full": w = network.V.reshape(-1).copy() w[np.where(w == 2)] = 2 return w elif self.feature_set == "slim": o = [] for i in range(network.n_nodes): o.append(min(network.V[i])) return np.array(o) def get_weights(self): return self.Qnet.coef_ def make_obs_action_pairs( self, actlist, network): # what the network security will look like if self.feature_set == 'full': o = self.observe(network) observations = [] for i in actlist: tmp = o.copy() if tmp[i] == 1: tmp[i] = 2 if tmp[i] == 0: tmp[i] = 1 observations.append(tmp) return np.array(observations) elif self.feature_set == 'slim': o = self.observe(network) observations = [] for i in actlist: tmp = o.copy() if tmp[i] == 1: tmp[i] = 2 if tmp[i] == 0: tmp[i] = 1 observations.append(tmp) return np.array(observations) def select_action(self, network, e): actlist = None if self.feature_set == 'full': actlist = list(range(network.v_per_node * network.n_nodes)) elif self.feature_set == 'slim': actlist = list(range(network.n_nodes)) pairs = self.make_obs_action_pairs(actlist, network) x = np.random.random(1) if x <= e: action = np.random.randint(self.perc_act_size) return action else: vals = self.Qnet.predict(pairs) action = np.argmax(vals) return action def set_reward(self, r): self.reward = r def do_action(self, network, action): #return reward if self.feature_set == 'full': node = int(np.floor(action / network.v_per_node)) #print(node,action) v = action % network.v_per_node network.V[node][v] = min(2, network.V[node][v] + self.defense_val) self.set_reward(0) if np.sum(network.V[1:]) == (network.n_nodes - 1) * network.v_per_node * 2: #print("all") self.set_reward(100) return elif self.feature_set == 'slim': v = np.argmin(network.V[action]) network.V[action][v] = min(2, network.V[action][v] + self.defense_val) self.set_reward(0) if np.sum(network.V[1:]) == (network.n_nodes - 1) * network.v_per_node * 2: #print("all") self.set_reward(100) return def QsarsaUpdate(self, sarsa, gamma): perc_act, reward, next_perc_act = sarsa a = 0.1 #print(perc_act) curr = self.Qnet.predict(perc_act.reshape(1, -1)) next_ = self.Qnet.predict(next_perc_act.reshape(1, -1)) ret = np.array([curr + a * (reward + gamma * next_ - curr) ]).reshape(-1) if self.linear: self.Qnet.partial_fit(perc_act.reshape(1, -1), ret) else: self.Qnet.fit(perc_act.reshape(1, -1), ret, verbose=0) def _calculate_returns(self, sarsas, gamma, alpha=1): all_rets = [] s = 0 _sarsas = sarsas.copy() _sarsas.reverse() x = [] for sarsa in _sarsas: perc_act, reward, next_perc_act = sarsa s = 0 if len(all_rets) > 0: s = alpha * (gamma * all_rets[-1] + reward) else: s = reward all_rets.append(s) x.append(perc_act) all_rets.reverse() x.reverse() return np.array(all_rets), np.array(x) def QMonteCarlo(self, sarsas, gamma): returns, x = self._calculate_returns(sarsas, gamma) if self.linear: self.Qnet.partial_fit(x, returns) else: self.Qnet.fit(x, returns, verbose=0)
before_action_observation = observation observation, reward, done, info = env.step(action) n_frames_reward += reward rewards = np.hstack((rewards, reward)) train_data_n_frames = u.concatNewStep(train_data_n_frames, before_action_observation, action) #input('---Press any key...') train_data_n_frames = train_data_n_frames[1:] rewards = rewards[1:] print('---preparations finished') rf.fit([u.to1D(train_data_n_frames)], [n_frames_reward])#ttt # START FITTING for step in range(10000 - nframes): print('---step ' + str(step)) rf.partial_fit([u.to1D(train_data_n_frames)], [n_frames_reward]) # forget the oldest train_data_n_frames = train_data_n_frames[1:] render() n_frames_reward -= rewards[0] rewards = rewards[1:] # try predict all actions action = 0#env.action_space.sample() curr_max_reward_for_action = 0.; before_action_observation = observation for try_action in range(env.action_space.n): try_data = u.concatNewStep(train_data_n_frames, observation, try_action)
print(features.head(3)) print(submission_features.head(3)) # # Prediction # In[58]: X_test = np.asarray(submission_features)[:, :-2] y_true = np.asarray(submission_features)[:, -2] clf = SGDRegressor() y_pred = np.zeros(len(X_test)) local_df = features[features.DATE < df2.DATE[0] - DateOffset(days=3)] X_train = np.asarray(local_df)[:, :-2] y_train = np.asarray(local_df)[:, -2] clf.partial_fit(X_train, y_train) y_pred[0] = clf.predict(X_test[0]) for i in trange(1, len(X_test)): local_df = features[(features.DATE > df2.DATE[i - 1]) & (features.DATE < (df2.DATE[i] - DateOffset(days=3)))] X_train = np.asarray(local_df)[:, :-2] y_train = np.asarray(local_df)[:, -2] if X_train.shape[0] != 0: clf.partial_fit(X_train, y_train) y_pred[i] = clf.predict([X_test[i]])[0] # In[59]: y_pred_round = [int(math.ceil(x)) if x > 0 else 0 for x in y_pred] # print(y_pred_round)
def __init__(self): self.models=[] for _ in range(env.action_space.n): model = SGDRegressor(learning_rate="constant") model.partial_fit([self.feature_state(env.reset())],[0]) self.models.append(model)
scores = [] for train_index, test_index in kf: logging.info("Training") # Train using chunks for chunk in chunks(train_index, chunksize): X = pd.read_hdf('output/train.h5', 'train', where=pd.Index(chunk)) X = X.drop('Unnamed: 0', axis=1) cols = X.columns.tolist() cols.remove('duration') y = np.ravel(X['duration']) X = X.drop('duration', axis=1) clf.partial_fit(X, y) logging.info("Validatiing on holdout fold") # Test on the holdout set for chunk in chunks(test_index, chunksize): X = pd.read_hdf('output/train.h5', 'train', where=pd.Index(chunk)) X = X.drop('Unnamed: 0', axis=1) cols = X.columns.tolist() cols.remove('duration') y_true = np.ravel(X['duration']) X = X.drop('duration', axis=1) score = clf.score(X, y_true) scores.append(score)
batch_size=1, shuffle=True, num_workers=2, pin_memory=True) validloader = DataLoader(valid_set, batch_size=1, shuffle=False, num_workers=2, pin_memory=True) def rmse(y, yhat): return np.sqrt(np.mean((y - yhat)**2)) if __name__ == "__main__": total_loss = 0 for j, (x, y) in enumerate(trainloader): x = x.squeeze().view(-1, c.IMG_SIZE**2).numpy().T y = y.squeeze().view(c.IMG_SIZE**2).numpy() model.partial_fit(x, y) total_loss = 0 for j, (x, y) in enumerate(validloader): x = x.squeeze().view(-1, c.IMG_SIZE**2).numpy().T y = y.squeeze().view(c.IMG_SIZE**2).numpy() predictions = model.predict(x) total_loss += rmse(y, predictions) print("Test loss:", total_loss / j)
scaling = StandardScaler() scaling.fit(polyX) scaled_X = scaling.transform(polyX) ######################Partial Fit#################### from sklearn.metrics import mean_squared_error from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.20, random_state=2) SGD = SGDRegressor(loss='squared_loss', penalty='12', alpha=0.0001, l1_ratio=0.15, n_iter=2000) improvements = list() for z in range(1000): SGD.partial_fit(X_train, y_train) improvements.append(mean_squared_error(y_test, SGD.predict(X_test))) import matplotlib.pyplot as plt plt.subplot(1, 2, 1) plt.plot(range(1, 11), np.abs(improvements[:10]), 'o--') plt.xlabel('Partial fit initial iterations') plt.ylabel('Test set mean squared error') plt.subplot(1, 2, 2) plt.plot(range(100, 1000, 100), np.abs(improvements[100:1000:100]), 'o--') plt.xlabel('Partial fit ending iterations') plt.show()