def featurize_start( infile, outfile, exp_config, exp_name, ): config = wrenconfig.default() invoker = invokers.AWSBatchInvoker() job_max_runtime = 3000 wrenexec = Executor(invoker, config, job_max_runtime) CHUNK_SIZE = exp_config['chunk_size'] MOL_LIMIT = exp_config['mol_limit'] #feature_func, feature_arg = [] df = pickle.load(open(infile, 'rb'))['df'] print(df.head()) # create the list arg_list = [] for df_chunk_i, df_chunk in enumerate( util.split_df(df.iloc[:MOL_LIMIT], CHUNK_SIZE)): arg_list.append((df_chunk.molecule_id.values, df_chunk.rdmol, df_chunk.conf_idx.values, exp_config['featurizer'], exp_config['args'])) print("There are", len(arg_list), "chunks") fs = wrenexec.map(featurize_multiple_mols, arg_list) #[f.result() for f in fs] #res = list(map(featurize_multiple_mols, arg_list)) pickle.dump({ 'futures': fs, 'exp_config': exp_config, 'infile': infile }, open(outfile, 'wb'))
csv_file = CSVFile( csv_directory + "rmse.csv", headers=["Company Name", "Regressor Name", "RMSE", "MAE"]) # For each company in the DJIA for company_name, stock_ticker in company_info.items(): print("Company: " + company_name) stock_details_df = pd.read_csv(input_directory + "{}_20.csv".format(stock_ticker)) stock_details_df = stock_details_df.set_index("date") no_of_features = len(stock_details_df.columns) stock_details_df = normalize_every_columns(stock_details_df) stock_details_df = stock_details_df.dropna() train_df, predict_df = split_df(stock_details_df, 0.2) X_train, y_train = convert_to_nn_input(train_df, look_back=look_back, look_forward=look_forward) X_test, y_test = convert_to_nn_input(predict_df, look_back=look_back, look_forward=look_forward) model = Sequential() model.add(LSTM(10, input_shape=(look_back * no_of_features, 1))) model.add(Dense(1, activation='relu')) model.compile(optimizer='adam', loss='mse') X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1)) X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1)) history = model.fit(X_train,
avg_train = [] avg_test = [] for company_name, stock_ticker in company_info.items(): # print("Company: " + company_name) stock_details_df = pd.read_csv(input_directory + "{}_20.csv".format(stock_ticker)) stock_details_df = stock_details_df.set_index("date") stock_details_df["label"] = stock_details_df[label_target].\ shift(-look_forward) stock_details_df.dropna(inplace=True) # Split the time series data 80-20 split stock_details_df_train, stock_details_df_test = \ split_df(stock_details_df, 0.8) # Fetch training and testing data for the model. No need for # validation data X_train, y_train = fetch_X_y(stock_details_df_train) X_test, y_test = fetch_X_y(stock_details_df_test) # Train the model training_start_time = time.time() regressor.fit(X_train, y_train) training_end_time = time.time() # Predict values testing_start_time = time.time() y_test_predictions = regressor.predict(X_test) testing_end_time = time.time()
def run_stuff(): data_train_loader = torch.utils.data.DataLoader( my_dataset, batch_size=BATCH_SIZE, pin_memory=True, shuffle=True, num_workers=16, drop_last=True) #. , num_workers=16) print("BATCH_SIZE=", BATCH_SIZE) print("An epoch is", len(my_dataset), "images") for epoch in range(5000): # loop over the dataset multiple times total_points = 0 t1 = time.time() running_loss = 0.0 time_in_inner = 0 for X_batch, y_batch in tqdm(data_train_loader): # get the inputs t1_inner = time.time() X_batch = X_batch.cuda(non_blocking=True) y_batch = y_batch.cuda(non_blocking=True) #print(type(X_batch), type(y_batch), X_batch.shape, y_batch.shape, \ # X_batch.device, y_batch.device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(X_batch.float()) loss = criterion(outputs, y_batch.unsqueeze(1)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() * len(y_batch) total_points += X_batch.shape[0] time_in_inner += (time.time() - t1_inner) t2 = time.time() writer.add_scalar("train_loss", running_loss / len(train_idx), epoch) if epoch % 1 == 0: print( "epoch {:3d} took {:3.1f}s, time_in_inner= {:3.1f}s, {:3.1f} img/sec, loss={:3.1f}" .format(epoch, t2 - t1, time_in_inner, total_points / (t2 - t1), running_loss / len(train_idx))) if epoch % 5 == 0: # DEBUG test_idx_chunks = util.split_df(test_idx, BATCH_SIZE) test_res = [] for idx in test_idx_chunks: X = np.stack([row_feat_to_img(X_data[i]) for i in idx]) test_est = net(torch.Tensor( proc_x(X)).cuda()).detach().cpu().numpy().flatten() test_res.append(test_est) test_est = np.concatenate(test_res) delta = test_est - Y[test_idx] writer.add_scalar("test_std_err", np.std(delta), epoch) print("std(delta)={:3.2f}".format(np.std(delta))) if epoch % 20 == 0: torch.save(net.state_dict(), "network_bench.model.{:08d}".format(epoch))