def train_epoch(rollouts): actor.train() critic.train() loss = 0 total_len = 0 for states, actions, rewards, next_states in rollouts: # 2nd step: use advantage function, estimated by critic # bootstrap estimated next state values with rewards TD-1 values = critic(states) last_state = next_states[-1].unsqueeze(0) last_value = critic(last_state).item() next_values = bootstrap(rewards, last_value, discounting) advantage = normalize(next_values - values).flatten() loss_critic = .5 * (advantage**2).sum() # Get probabilities, shape (episode_length * num_actions) # Then select only the probabilities corresponding to sampled actions probabilities = actor(states) probabilities = probabilities[range(states.shape[0]), actions.flatten()] loss_actor = (-torch.log(probabilities) * advantage).sum() # Take the weighted average (helps convergence) loss += loss_critic + loss_actor total_len += states.shape[0] loss = loss / total_len optim_actor.zero_grad() optim_critic.zero_grad() loss.backward() optim_actor.step() optim_critic.step() actor.eval() critic.eval()
def random_forest_classifier(dataset, random_number=None, tree_number=10): rf = split_tree.RandomForest() end, feature = dataset.shape feature = range(feature - 1) if random_number == None: dataset_number = end / 2 for i in range(tree_number): sample_set = bootstrap(dataset, dataset_number) sample_feature = random.sample(feature, len(feature) / 2) tree = choose_feature(sample_set, 0, end / 2, sample_feature) rf.Add_Tree(tree) return rf
def main(): bootstrap()
('/skia-telemetry/skia_telemetry_info_page?', skia_telemetry.TelemetryInfoPage), ('/skia-telemetry/skia_try', skia_telemetry.SkiaTryPage), ('/skia-telemetry/update_admin_tasks?', skia_telemetry.UpdateAdminTasksPage), ('/skia-telemetry/update_chromium_build_tasks?', skia_telemetry.UpdateChromiumBuildTasksPage), ('/skia-telemetry/update_chromium_try_tasks?', skia_telemetry.UpdateChromiumTryTasksPage), ('/skia-telemetry/update_skia_try_tasks?', skia_telemetry.UpdateSkiaTryTasksPage), ('/skia-telemetry/update_telemetry_tasks?', skia_telemetry.UpdateTelemetryTasksPage), ('/skia-telemetry/update_lua_tasks?', skia_telemetry.UpdateLuaTasksPage), ('/skia-telemetry/update_telemetry_info?', skia_telemetry.UpdateInfoPage), ('/update_gpu_sheriffs_schedule', sheriff.UpdateGpuSheriffsSchedule), ('/update_robocops_schedule', sheriff.UpdateRobocopsSchedule), ('/update_sheriffs_schedule', sheriff.UpdateSheriffsSchedule), ('/update_troopers_schedule', sheriff.UpdateTroopersSchedule), ] APPLICATION = webapp.WSGIApplication(URLS, debug=True) # Do some one-time initializations. base_page.bootstrap() builder_status.bootstrap() commit_queue.bootstrap() status.bootstrap() sheriff.bootstrap() skia_telemetry.bootstrap() utils.bootstrap()
def cli(): bootstrap()
def test_bootstrap(): with mock.patch("numpy.random.randint", return_value=np.arange(len(x))): X_subset, z_subset = bootstrap(X, z, 1) assert (np.allclose(X_subset, X) and np.allclose(z, z_subset))
def singleDriverTrainer2(file_to_classify, training_files, threshold=0.2, in_model=RandomForestClassifier()): """ Takes in the file path of the driver file we want to classify (the target), the paths of the files we will use as our 'noise' files, and the input model First, trains the input model on all of the files, with file_to_classify as class 1 and training_files as class 0 Then, uses the model to make probabilistic predictions on file_to_classify Changes: 1. Upsamples target data to balance classes for model training 2. Uses probabilistic predictions relabels 1s to 0s based on threshhold """ # first, grab the target data x_target, y_target, id_target = extractCSV(file_to_classify, target=1) # remove na's x_target = np.nan_to_num(x_target) y_target = np.nan_to_num(y_target) # copy target data x_target_upsampled = copy.copy(x_target) y_target_upsampled = copy.copy(y_target) #upsample target to balance classes if len(training_files) > 1: num_samples = len(x_target_upsampled) * len(training_files) x_target_upsampled, y_target_upsampled = bootstrap( x_target, y_target, num_samples) x_trains = None y_trains = None # loop through all of our training/noise files, keep separate from target for filepath in training_files: # open the file x_current, y_current, ids = extractCSV(filepath, target=0) # and add the contents to our training data if x_trains is None or y_trains is None: x_trains = x_current y_trains = y_current else: x_trains = np.concatenate((x_trains, x_current)) y_trains = np.concatenate((y_trains, y_current)) # repeat for every filepath in our training files list # remove NAs from train data x_trains = np.nan_to_num(x_trains) y_trains = np.nan_to_num(y_trains) # now combine with target data x_all = np.concatenate((x_target_upsampled, x_trains)) y_all = np.concatenate((y_target_upsampled, y_trains)) # with all of our data, now we can train our model in_model.fit(x_all, y_all) # now we are ready to provide class probabilities for our predictions predictions = in_model.predict_proba(x_target) # note that we must extract the index of the class 1 probability prob_idx = np.where(in_model.classes_ == 1)[0][0] class_probs = [pred[prob_idx] for pred in predictions] #get new data labels by comparing threshold to class probs new_labels = np.array([1 if p > threshold else 0 for p in class_probs]) #redo upsampling if len(training_files) > 1: num_samples = len(x_target) * len(training_files) x_target_relabeled, y_target_relabeled = bootstrap( x_target, new_labels, num_samples) else: x_target_relabeled = copy.copy(x_target) y_target_relabeled = copy.copy(new_labels) #combine with non-target data from before x_all_new = np.concatenate((x_target_relabeled, x_trains)) y_all_new = np.concatenate((y_target_relabeled, y_trains)) # refit model in_model.fit(x_all_new, y_all_new) # provide class probabilities for our predictions predictions = in_model.predict_proba(x_target) # extract the index of the class 1 probability prob_idx = np.where(in_model.classes_ == 1)[0][0] class_probs = [pred[prob_idx] for pred in predictions] # and return a matrix of the id's and the corresponding probabilities return_mat = [[id_target[idx], class_probs[idx]] \ for idx in xrange(len(class_probs))] # report print 'completed driver %s' % file_to_classify return np.asarray(return_mat)
def random_forest(N, M, F, table, attr_indexes, attr_domains, class_index, strat_index): random.shuffle(table) test, remainder = test_remainder_stratified(table, strat_index) boot_samples = [] attr_subsets = [] trees = [] accuracies = [] trees = [] #setup boot straps for _ in range(N): attr_subsets.append(utils.rand_attributes(attr_indexes, F)) boot = utils.bootstrap(remainder) valid = [] #build validator set for item in remainder: if item not in boot: valid.append(item) boot_samples.append([boot, valid]) #build trees for i in range(N): #returns predictions, tree pred, tree = train_test_tree(boot_samples[i][0], boot_samples[i][1], attr_subsets[i], attr_domains, class_index) correct = 0 for j in range(len(boot_samples[i][1])): if boot_samples[i][1][j][class_index] == pred[j]: correct += 1 trees.append([tree, utils.div(correct, len(boot_samples[i][1]))]) trees.sort(key=lambda x: x[1]) mtrees = trees[len(trees) - M:] #predict and determine accuracy print(" grouping test set") minutes, groups = utils.groupBy(test, 1) print(" running classifier") accuracies = [] overall_correct = 0 total_instance = len(test) for count in range(len(minutes)): correct = 0 for item in groups[count]: votes = [] for tree in mtrees: votes.append(classify_tdidt(tree[0], item)) vote = utils.majority_vote(votes) if item[class_index] == vote: correct += 1 overall_correct += 1 accuracies.append([ minutes[count], correct / len(groups[count]), correct, len(groups[count]) ]) print("Sorting accuracies") accuracies.sort(key=lambda x: x[0]) count = 0 for item in accuracies: print('Minute: ', item[0]) print(' Accuracy: ', item[1]) print(' Correct: ', item[2]) print(' Instances: ', item[3]) print() count += 1 print("Overll Accurracy: ", overall_correct / total_instance) print("Instances: ", total_instance) print("Correct: ", overall_correct) return accuracies
def resample(models, lmd, X, z, nboots, split_size=0.2): """ Dictionaires to keep track of the results """ z_test = {"ridge": [], "lasso": [], "ols": []} z_pred_test = {"ridge": [], "lasso": [], "ols": []} bias = {"ridge": [], "lasso": [], "ols": []} var = {"ridge": [], "lasso": [], "ols": []} beta = {"ridge": [], "lasso": [], "ols": []} mse_test = {"ridge": [], "lasso": [], "ols": []} #r2_test = {"ridge": [], "lasso": [], "ols": []} " ----------------------" mse_train = {"ridge": [], "lasso": [], "ols": []} #r2_train = {"ridge": [], "lasso": [], "ols": []} np.random.seed(2018) # Spilt the data in tran and split X_train, X_test, z_train, z_test_ = train_test_split(X, z, test_size=split_size) # # extract data from design matrix # x = X[:, 1] # y = X[:, 2] # x_test = X_test[:, 1] # y_test = X_test[:, 2] for name, model in models.items(): # creating a model with the previosly known best lmd estimator = model(lmd[name]) # Train a model for this pair of lambda and random state """ Keeping information for test """ estimator.fit(X_train, z_train) z_pred_test_ = np.empty((z_test_.shape[0], nboots)) z_pred_train_ = np.empty((z_train.shape[0], nboots)) beta_ = np.empty((X.shape[1], nboots)) for i in range(nboots): X_, z_ = bootstrap( X_train, z_train, i) # i is now also the random state for the bootstrap estimator.fit(X_, z_) # Evaluate the new model on the same test data each time. z_pred_test_[:, i] = np.squeeze(estimator.predict(X_test)) z_pred_train_[:, i] = np.squeeze(estimator.predict(X_train)) beta_[:, i] = np.squeeze(estimator.coef_) beta[name] = beta_ z_pred_test[name] = z_pred_test_ z_test_ = z_test_.reshape((z_test_.shape[0], 1)) z_test[name] = z_test_ mse_test[name] = (np.mean( np.mean((z_test_ - z_pred_test_)**2, axis=1, keepdims=True))) bias[name] = np.mean( (z_test_ - np.mean(z_pred_test_, axis=1, keepdims=True))**2) var[name] = np.mean(np.var(z_pred_test_, axis=1, keepdims=True)) z_train = z_train.reshape((z_train.shape[0], 1)) mse_train[name] = np.mean( np.mean((z_train - z_pred_train_)**2, axis=1, keepdims=True)) # print('Error:', mse_test) # print('Bias^2:', bias) # print('Var:', var) # print('{} >= {} + {} = {}'.format(mse_test, bias, variance, bias + variance)) # plt.figure(1, figsize=(11, 7)) # plt.subplot(121) # plt.plot(x, z, label='f') # plt.scatter(x_test, z_test, label='Data points') # plt.scatter(x_test, np.mean(z_pred, axis=1), label='Pred') # plt.legend() # plt.xlabel('x') # plt.ylabel('z') # # plt.subplot(122) # plt.plot(y, z, label='f') # plt.scatter(y_test, z_test, label='Data points') # plt.scatter(y_test, np.mean(z_pred, axis=1), label='Pred') # plt.legend() # plt.xlabel('y') # plt.ylabel('z') # plt.show() # Confidence intervals ci_beta = np.empty((2, beta_.shape[0])) poly = [] for p in range(beta_.shape[0]): ci_beta[:, p] = np.array(ci(beta_[p, :])).T poly.append(p) # plt.plot(poly, ci_beta[0, :], label='Upper CI (95%)') # --> Vise i tabell # plt.plot(poly, np.mean(beta, axis=1), label='Beta') # plt.plot(poly, ci_beta[1, :], label='Lower CI (95%)') # plt.legend() # plt.show() return z_test, z_pred_test, bias, var, beta, mse_test, mse_train, ci_beta
import pickle from functools import partial from utils import breakout_strategy, bootstrap, m_proc import sys sys.path.append('/home/tomek/ib_tools/') from datastore_pytables import Store # noqa store = Store() contract = store.read('/cont/min/NQ_20191220_GLOBEX_USD').sort_index() table = bootstrap(contract, start='20180701', end='20181231', paths=100) func = partial( breakout_strategy, time_int=30, periods=[ 5, 10, 20, 40, 80, 160, ], ema_fast=10, ema_slow=120, atr_periods=80, sl_atr=1, ) results = m_proc(table, func)