Esempio n. 1
0
    def prepare_data(self):
        if self.hparams.combined:
            standardize = "./saved_models/scaler_combined.pkl"
        else:
            standardize = "./saved_models/scaler.pkl"

        self.train_data = CSVDataset("../data/expanded/",
                                     standardize=standardize)
        self.validation_data = CSVDataset(
            "../data/expanded/",
            csv_features="dev_features.csv",
            csv_labels="dev_labels.csv",
            standardize=standardize,
        )

        self.combined_data = torch.utils.data.ConcatDataset(
            [self.train_data, self.validation_data])
Esempio n. 2
0
def data_loaders(dev_per=0.2, batch_size=8):
    csv_dataset = CSVDataset("x.csv", "y.csv")
    # train dev split
    indices = list(range(len(csv_dataset)))
    split = int(dev_per * len(csv_dataset))
    validation_idx = np.random.choice(indices, size=split, replace=False)
    train_idx = list(set(indices) - set(validation_idx))
    train_sampler = SubsetRandomSampler(train_idx)
    validation_sampler = SubsetRandomSampler(validation_idx)
    train_loader = DataLoader(dataset=csv_dataset,
                              batch_size=batch_size,
                              sampler=train_sampler)
    validation_loader = DataLoader(dataset=csv_dataset,
                                   batch_size=len(validation_idx),
                                   sampler=validation_sampler)
    return train_loader, validation_loader
Esempio n. 3
0
def main():
    args = get_args()

    dataset = CSVDataset(args.csv_file, args.root_dir, args.cols,
                         args.preprocess_fn, args.state, args.next_state,
                         args.reward, args.info, args.done)

    agent = ValueAgentDataset(args.network, dataset, args.batch_size,
                              args.gamma, args.device, args.optimizer, args.lr,
                              args.hidden_dim, args.criterion, args.update)

    summary = SummaryWriter(args.summary_dir)
    #summary.add_hparams()

    num_iter = 0
    for epoch in range(args.num_epochs):

        value_loss, value_mean, value_std = agent.train()
        summary.add_scalar('Loss/Value', value_loss, num_iter)
        summary.add_scalar('Stats/Value Mean', value_mean, num_iter)
        summary.add_scalar('Stats/Value Std', value_std, num_iter)
        num_iter += 1
    summary.close()
Esempio n. 4
0
        op.append( (pv.item(), o1[2].item(), o2[2].item()) )
    elapsed = time.time() - start

    n = 0 
    for data in loader :
        n = n + 1
        label = data[1].item()
        ac = op.pop(0)
        px = ac[0]
        print( "{:6.2f} {:6.2f} {:6.2f} {:6.2f} {:6.4f}".format( label, px, ac[1], ac[2], abs(px-label) ) )

    print('Done Testing in', int(1000000 * elapsed / n), "uS per px")



if __name__ == "__main__" :

    model = Net().to( device )
    model.load_state_dict(torch.load("model.pt"))
    model.eval()

    df_test = pd.read_csv( 'pricing-test.csv' )
    test_labels = df_test.iloc[:, 0]
    test_data = df_test.iloc[:, 1:]

    test_dataset = CSVDataset( test_data, labels=test_labels, device=device ) 
    test_loader = DataLoader(test_dataset, batch_size=1 )

    test( model, test_loader ) 

Esempio n. 5
0
    parser.add_argument("--do_train", action="store_true", help="Do training")
    parser.add_argument("--do_eval", action="store_true", help="Eval test")

    args = parser.parse_args()
    args.do_train = True
    args.do_eval = True

    train_path = f"data/{args.dataset_name}/train.csv"
    test_path = f"data/{args.dataset_name}/test.csv"
    save_path = f"tmp/mlp_{args.dataset_name}"

    numeric_feature_names = get_numeric_feature_names(train_path)

    train_data = CSVDataset(train_path,
                            numeric_feature_names=numeric_feature_names,
                            label_name="target",
                            is_reg=args.task == "reg")
    dev_data = CSVDataset(test_path,
                          numeric_feature_names=numeric_feature_names,
                          label_name="target",
                          is_reg=args.task == "reg")

    if args.do_train:
        model = MLP(len(numeric_feature_names),
                    train_data.num_label if args.task == "cls" else 1,
                    task=args.task,
                    hidden_units=[128, 64, 32],
                    device=args.device)
        trainer = Trainer(train_data,
                          model,
                          dev_data=dev_data,
Esempio n. 6
0
from mlp import MLP
from trainer import Trainer

# here we use the test set that is simply sampled 10% from the original iris dataset.
train_path = "./data/iris/train.csv"
test_path = "./data/iris/test.csv"

save_path = "tmp/mlp_iris"  # where the model's checkpoints are saved to

numeric_feature_names = [
    'sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
    'petal width (cm)'
]
# load train data and dev data
train_data = CSVDataset(train_path,
                        numeric_feature_names=numeric_feature_names,
                        label_name="target")
dev_data = CSVDataset(test_path,
                      numeric_feature_names=numeric_feature_names,
                      label_name="target")
# initialize a model
model = MLP(len(numeric_feature_names),
            train_data.num_label,
            hidden_units=[64, 32, 16],
            device="cpu")
# initialize a trainer
trainer = Trainer(train_data,
                  model,
                  dev_data=dev_data,
                  eval_on="accuracy",
                  loss_fn="ce",
Esempio n. 7
0
            # set the parameter gradients to zero
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            # propagate the loss backward
            loss.backward()
            # update the gradients
            optimizer.step()

            running_loss += loss.item()
        scheduler.step()
        print('Epoch', (e + 1), " loss:", running_loss / len(loader))

    print('Done Training')


if __name__ == "__main__":
    model = Net().to(device)

    df_train = pd.read_csv('pricing.csv', dtype=np.float32)
    train_labels = df_train.iloc[:, 0]
    train_data = df_train.iloc[:, 1:]

    train_dataset = CSVDataset(train_data, labels=train_labels, device=device)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

    train(model, train_loader)

    torch.save(model.state_dict(), "model.pt")