Ejemplo n.º 1
0
def _reading_data():
    print(config.USER)

    # step2 the way to load_data
    # load data contains :
    # the way to load data
    # the way to preprocess with data
    # doing some special data cleaning process
    trainFilepath = os.path.join(os.getcwd(), "data", config.FILENAME)
    trainDataLoader = DataLoader(trainFilepath)
    train_data = trainDataLoader.load_data(useSpark=False, interactive=False)

    train_data.save_data(os.getcwd())
Ejemplo n.º 2
0
            }, os.path.join(args.exp_dir , 'unfinished_model.pt'))
        epoch += 1

    cost_time = time.time() - since
    print ('Training complete in {:.0f}m {:.0f}s'.format(cost_time//60,cost_time%60))
    print ('Best Train Acc is {:.4f}'.format(best_train_acc))
    print ('Best Val Acc is {:.4f}'.format(best_acc))
    model.load_state_dict(best_model)
    return model,cost_time,best_acc,best_train_acc


if __name__ == '__main__':
    print ('DataSets: '+args.dataset)
    print ('ResNet Depth: '+str(args.depth))
    loader = DataLoader(args.dataset,batch_size=args.batch_size)
    dataloaders,dataset_sizes = loader.load_data()
    num_classes = 10
    if args.dataset == 'cifar-10':
        num_classes = 10
    if args.dataset == 'cifar-100':
        num_classes = 100

    model = resnet_cifar(depth=args.depth, num_classes=num_classes)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1,
                                momentum=0.9, nesterov=True, weight_decay=1e-4)

    # define loss and optimizer
    criterion = nn.CrossEntropyLoss()
    scheduler = MultiStepLR(optimizer, milestones=[args.epoch*0.4, args.epoch*0.6, args.epoch*0.8], gamma=0.1)

    use_gpu = torch.cuda.is_available()
Ejemplo n.º 3
0
        epoch += 1

    cost_time = time.time() - since
    print('Training complete in {:.0f}h{:.0f}m{:.0f}s'.format(
        (cost_time // 60) // 60, (cost_time // 60) % 60, cost_time % 60))

    return model, cost_time, best_acc, best_train_acc


if __name__ == '__main__':

    loader = DataLoader(args.dataset,
                        batch_size=args.batch_size,
                        seed=args.seed)
    dataloaders, dataset_sizes = loader.load_data(args.img_size)

    num_classes = 10
    if args.dataset == 'cifar-10':
        num_classes = 10
    if args.dataset == 'cifar-100':
        num_classes = 100
    if args.dataset == 'VOCpart':
        num_classes = len(dataloaders['train'].dataset.classes)

    assert args.img_size == 128, 'only supports --img_size 128'
    model = resnet_std(depth=args.depth,
                       num_classes=num_classes,
                       ifmask=args.ifmask,
                       pretrained=True)
Ejemplo n.º 4
0
def Run_SRNN_NormalCase(args, no_dataset):

    data_path, graph_path = Data_path(no_dataset)
    log_path = Log_path(no_dataset)

    # Construct the DataLoader object that loads data
    dataloader = DataLoader(args)
    dataloader.load_data(data_path)

    # Construct the ST-graph object that reads graph
    stgraph = ST_GRAPH(args)
    stgraph.readGraph(dataloader.num_sensor, graph_path)

    # Initialize net
    net = SRNN(args)
    net.setStgraph(stgraph)

    print('- Number of trainable parameters:',
          sum(p.numel() for p in net.parameters() if p.requires_grad))

    # optimizer = torch.optim.Adam(net.parameters(), lr=args.learning_rate)
    # optimizer = torch.optim.RMSprop(net.parameters(), lr=args.learning_rate, momentum=0.0001, centered=True)
    optimizer = torch.optim.Adagrad(net.parameters())

    best_eval_loss = 10000
    best_epoch = 0

    print('')
    print('---- Train and Evaluation ----')

    eval_loss_res = np.zeros((args.num_epochs + 1, 2))
    for e in range(args.num_epochs):
        epoch = e + 1

        ####  Training ####
        print('-- Training, epoch {}/{}'.format(epoch, args.num_epochs))
        loss_epoch = 0

        # For each batch
        for b in range(dataloader.num_batches_train):
            batch = b + 1
            start = time.time()

            # Get batch data
            x = dataloader.next_batch_train()

            # Loss for this batch
            loss_batch = 0

            # For each sequence in the batch
            for sequence in range(dataloader.batch_size):

                # put node and edge features
                stgraph.putSequenceData(x[sequence])

                # get data to feed
                data_nodes, data_temporalEdges, data_spatialEdges = stgraph.getSequenceData(
                )

                # put a sequence to net
                loss_output, data_nodes, outputs = forward(
                    net, optimizer, args, stgraph, data_nodes,
                    data_temporalEdges, data_spatialEdges)
                loss_output.backward()
                loss_batch += loss_RMSE(data_nodes[-1], outputs[-1],
                                        dataloader.scaler)

                # Clip gradients
                torch.nn.utils.clip_grad_norm_(net.parameters(),
                                               args.grad_clip)

                # Update parameters
                optimizer.step()

            end = time.time()
            loss_batch = loss_batch / dataloader.batch_size
            loss_epoch += loss_batch

            print('Train: {}/{}, train_loss = {:.3f}, time/batch = {:.3f}'.
                  format(e * dataloader.num_batches_train + batch,
                         args.num_epochs * dataloader.num_batches_train,
                         loss_batch, end - start))
        # Compute loss for the entire epoch
        loss_epoch /= dataloader.num_batches_train
        print('(epoch {}), train_loss = {:.3f}'.format(epoch, loss_epoch))

        # Save the model after each epoch
        save_path = Save_path(no_dataset, epoch)
        print('Saving model to ' + save_path)
        torch.save(
            {
                'epoch': epoch,
                'state_dict': net.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()
            }, save_path)

        #### Evaluation ####
        print('-- Evaluation, epoch {}/{}'.format(epoch, args.num_epochs))
        loss_epoch = 0
        for b in range(dataloader.num_batches_eval):
            batch = b + 1
            start = time.time()

            # Get batch data
            x = dataloader.next_batch_eval()

            # Loss for this batch
            loss_batch = 0

            for sequence in range(dataloader.batch_size):

                # put node and edge features
                stgraph.putSequenceData(x[sequence])

                # get data to feed
                data_nodes, data_temporalEdges, data_spatialEdges = stgraph.getSequenceData(
                )

                # put a sequence to net
                _, data_nodes, outputs = forward(net, optimizer, args, stgraph,
                                                 data_nodes,
                                                 data_temporalEdges,
                                                 data_spatialEdges)

                loss_batch += loss_RMSE(data_nodes[-1], outputs[-1],
                                        dataloader.scaler)

            end = time.time()
            loss_batch = loss_batch / dataloader.batch_size
            loss_epoch += loss_batch

            print(
                'Eval: {}/{}, eval_loss = {:.3f}, time/batch = {:.3f}'.format(
                    e * dataloader.num_batches_eval + batch,
                    args.num_epochs * dataloader.num_batches_eval, loss_batch,
                    end - start))
        loss_epoch /= dataloader.num_batches_eval
        eval_loss_res[e] = (epoch, loss_epoch)

        # Update best validation loss until now
        if loss_epoch < best_eval_loss:
            best_eval_loss = loss_epoch
            best_epoch = epoch

        print('(epoch {}), eval_loss = {:.3f}'.format(epoch, loss_epoch))

    # Record the best epoch and best validation loss overall
    print('Best epoch: {}, Best evaluation loss {:.3f}'.format(
        best_epoch, best_eval_loss))
    eval_loss_res[-1] = (best_epoch, best_eval_loss)
    np.savetxt(log_path, eval_loss_res, fmt='%d, %.3f')
    print('- Eval result has been saved in ', log_path)
    print('')
Ejemplo n.º 5
0
if __name__ == "__main__":
    # Ensure exactly 3 arguments
    if len(sys.argv) != 3:
        print(
            'USAGE: python inspection.py TRAIN_INPUT_FILE INSPECTION_OUT_FILE')
        sys.exit(1)

    TRAIN_INPUT_FILE = sys.argv[1]
    INSPECTION_OUT_FILE = sys.argv[2]

    # Check the input file type
    if not TRAIN_INPUT_FILE.endswith('.tsv'):
        print('Error: TRAIN_INPUT_FILE must be .tsv files')
        sys.exit(1)

    if not INSPECTION_OUT_FILE.endswith('.txt'):
        print('Error: INSPECTION_OUT_FILE must be .txt file')
        sys.exit(1)

    # Load the input file
    data_loader = DataLoader()
    data_loader.load_data(TRAIN_INPUT_FILE)

    inspection = Inspection(data_loader)
    entropy, error_rate, _ = inspection.evaluate()

    # Output the result
    with open(INSPECTION_OUT_FILE, mode='w+') as f:
        f.write('entropy: ' + str(entropy) + '\n')
        f.write('error: ' + str(error_rate))
    # for num in random_list[:int(len(random_list) / 2)]:
    #     bloom_two.add(chr(num))
    #
    # estimate_num_of_elem_A = bloom_one.estimate_num_of_elem()
    # estimate_num_of_elem_B = bloom_two.estimate_num_of_elem()
    # print("estimate_num_of_elem_A: " + str(estimate_num_of_elem_A))
    # print("estimate_num_of_elem_B: " + str(estimate_num_of_elem_B))
    #
    # estimate_size_of_union = bloom_one.estimate_size_of_union(bloom_two)
    # print("estimate_size_of_union: " + str(estimate_size_of_union))
    #
    # estimate_size_of_intersection = bloom_one.estimate_size_of_intersection(bloom_two)
    # print("estimate_size_of_intersection: " + str(estimate_size_of_intersection))

    loader = DataLoader('columns.txt')
    cols = loader.load_data()

    block_cnt = 20
    block_len = 30
    n = block_cnt * block_len  # code space. set it to the max size of a col for now
    p = 0.01  # false positive probability

    # build bloom filter for all cols
    bloom_filter_list = []
    for col in cols:
        bloom_filter = BloomFilter(n, p)
        for num in col:
            bloom_filter.add(chr(num))
        bloom_filter_list.append(bloom_filter)

    # write each bloom filter to file