Python split_df Beispiele, util.split_df Python Beispiele

Beispiel #1

0

Datei anzeigen

def featurize_start(
    infile,
    outfile,
    exp_config,
    exp_name,
):

    config = wrenconfig.default()

    invoker = invokers.AWSBatchInvoker()
    job_max_runtime = 3000
    wrenexec = Executor(invoker, config, job_max_runtime)

    CHUNK_SIZE = exp_config['chunk_size']
    MOL_LIMIT = exp_config['mol_limit']
    #feature_func, feature_arg = []
    df = pickle.load(open(infile, 'rb'))['df']
    print(df.head())
    # create the list

    arg_list = []
    for df_chunk_i, df_chunk in enumerate(
            util.split_df(df.iloc[:MOL_LIMIT], CHUNK_SIZE)):
        arg_list.append((df_chunk.molecule_id.values, df_chunk.rdmol,
                         df_chunk.conf_idx.values, exp_config['featurizer'],
                         exp_config['args']))

    print("There are", len(arg_list), "chunks")
    fs = wrenexec.map(featurize_multiple_mols, arg_list)
    #[f.result() for f in fs]

    #res = list(map(featurize_multiple_mols, arg_list))

    pickle.dump({
        'futures': fs,
        'exp_config': exp_config,
        'infile': infile
    }, open(outfile, 'wb'))

Beispiel #2

0

Datei anzeigen

Datei: lstm.py Projekt: kthoma46/masters-thesis

    csv_file = CSVFile(
        csv_directory + "rmse.csv",
        headers=["Company Name", "Regressor Name", "RMSE", "MAE"])

    # For each company in the DJIA
    for company_name, stock_ticker in company_info.items():
        print("Company: " + company_name)

        stock_details_df = pd.read_csv(input_directory +
                                       "{}_20.csv".format(stock_ticker))

        stock_details_df = stock_details_df.set_index("date")
        no_of_features = len(stock_details_df.columns)
        stock_details_df = normalize_every_columns(stock_details_df)
        stock_details_df = stock_details_df.dropna()
        train_df, predict_df = split_df(stock_details_df, 0.2)
        X_train, y_train = convert_to_nn_input(train_df,
                                               look_back=look_back,
                                               look_forward=look_forward)
        X_test, y_test = convert_to_nn_input(predict_df,
                                             look_back=look_back,
                                             look_forward=look_forward)

        model = Sequential()
        model.add(LSTM(10, input_shape=(look_back * no_of_features, 1)))
        model.add(Dense(1, activation='relu'))
        model.compile(optimizer='adam', loss='mse')

        X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
        X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
        history = model.fit(X_train,

Beispiel #3

0

Datei anzeigen

    avg_train = []
    avg_test = []
    for company_name, stock_ticker in company_info.items():
        # print("Company: " + company_name)

        stock_details_df = pd.read_csv(input_directory +
                                       "{}_20.csv".format(stock_ticker))

        stock_details_df = stock_details_df.set_index("date")
        stock_details_df["label"] = stock_details_df[label_target].\
            shift(-look_forward)
        stock_details_df.dropna(inplace=True)

        # Split the time series data 80-20 split
        stock_details_df_train, stock_details_df_test = \
            split_df(stock_details_df, 0.8)

        # Fetch training and testing data for the model. No need for
        # validation data
        X_train, y_train = fetch_X_y(stock_details_df_train)
        X_test, y_test = fetch_X_y(stock_details_df_test)

        # Train the model
        training_start_time = time.time()
        regressor.fit(X_train, y_train)
        training_end_time = time.time()

        # Predict values
        testing_start_time = time.time()
        y_test_predictions = regressor.predict(X_test)
        testing_end_time = time.time()

Beispiel #4

0

Datei anzeigen

def run_stuff():

    data_train_loader = torch.utils.data.DataLoader(
        my_dataset,
        batch_size=BATCH_SIZE,
        pin_memory=True,
        shuffle=True,
        num_workers=16,
        drop_last=True)  #. , num_workers=16)

    print("BATCH_SIZE=", BATCH_SIZE)
    print("An epoch is", len(my_dataset), "images")
    for epoch in range(5000):  # loop over the dataset multiple times
        total_points = 0
        t1 = time.time()
        running_loss = 0.0

        time_in_inner = 0
        for X_batch, y_batch in tqdm(data_train_loader):
            # get the inputs
            t1_inner = time.time()
            X_batch = X_batch.cuda(non_blocking=True)
            y_batch = y_batch.cuda(non_blocking=True)
            #print(type(X_batch), type(y_batch), X_batch.shape, y_batch.shape, \
            #      X_batch.device, y_batch.device)
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(X_batch.float())
            loss = criterion(outputs, y_batch.unsqueeze(1))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item() * len(y_batch)
            total_points += X_batch.shape[0]

            time_in_inner += (time.time() - t1_inner)
        t2 = time.time()
        writer.add_scalar("train_loss", running_loss / len(train_idx), epoch)

        if epoch % 1 == 0:
            print(
                "epoch {:3d} took {:3.1f}s, time_in_inner= {:3.1f}s, {:3.1f} img/sec, loss={:3.1f}"
                .format(epoch, t2 - t1, time_in_inner,
                        total_points / (t2 - t1),
                        running_loss / len(train_idx)))
        if epoch % 5 == 0:  # DEBUG
            test_idx_chunks = util.split_df(test_idx, BATCH_SIZE)
            test_res = []
            for idx in test_idx_chunks:

                X = np.stack([row_feat_to_img(X_data[i]) for i in idx])
                test_est = net(torch.Tensor(
                    proc_x(X)).cuda()).detach().cpu().numpy().flatten()
                test_res.append(test_est)
            test_est = np.concatenate(test_res)
            delta = test_est - Y[test_idx]
            writer.add_scalar("test_std_err", np.std(delta), epoch)

            print("std(delta)={:3.2f}".format(np.std(delta)))
        if epoch % 20 == 0:
            torch.save(net.state_dict(),
                       "network_bench.model.{:08d}".format(epoch))