Ejemplo n.º 1
0
def get_data_loaders(batch_size):
    data_dir = os.path.join(get_data_path(), 'pytorch', 'mnist')

    train_dataset = datasets.MNIST(root=data_dir,
                                   train=True,
                                   download=True,
                                   transform=transforms.ToTensor())
    x_train_mnist = train_dataset.train_data.type(torch.FloatTensor)
    y_train_mnist = train_dataset.train_labels

    test_dataset = datasets.MNIST(root=data_dir,
                                  train=False,
                                  download=True,
                                  transform=transforms.ToTensor())
    x_test_mnist = test_dataset.test_data.type(torch.FloatTensor)
    y_test_mnist = test_dataset.test_labels

    logging.info('Training Data Size: ', x_train_mnist.size(), '-',
                 y_train_mnist.size())
    logging.info('Testing Data Size: ', x_test_mnist.size(), '-',
                 y_test_mnist.size())

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=batch_size,
                                              shuffle=False)

    return train_loader, train_dataset, test_loader, test_dataset
Ejemplo n.º 2
0
def pretrained_embedding_layer(max_len, trainable=True):
    """
    Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors.

    Arguments:
    word_to_vec_map -- dictionary mapping words to their GloVe vector representation.
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    embedding_layer -- pretrained layer Keras instance
    """

    data_path = get_data_path()
    if data_path is None:
        data_path = "./data"
    path = os.path.join(data_path, "Reviews.h5")
    print("Loading glove from", path)

    with h5py.File(path, "r") as fp:
        embedding_matrix = fp["embedding_matrix"].value

    # Define Keras embedding layer with the correct output/input sizes, make it trainable.
    embedding_layer = Embedding(embedding_matrix.shape[0],
                                embedding_matrix.shape[1],
                                weights=[embedding_matrix],
                                input_length=max_len,
                                trainable=trainable)

    return embedding_layer
Ejemplo n.º 3
0
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')
    args = parser.parse_args()
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    model = Network()
    if args.cuda:
        model.cuda()

    data_dir = os.path.join(get_data_path(), 'pytorch', 'mnist')

    logging.info('Downloading data ...')
    train_loader = get_train_loader(data_dir, args.batch_size, args.cuda)
    test_loader = get_test_loader(data_dir, args.test_batch_size, args.cuda)
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum)
    logging.info('Start training ...')
    for epoch in range(1, args.epochs + 1):
        train(model=model,
              train_loader=train_loader,
              epoch=epoch,
              cuda=args.cuda,
              optimizer=optimizer,
              log_interval=args.log_interval)
Ejemplo n.º 4
0
                        type=float,
                        default=0.997,
                        help='Decay for batch norm.')
    parser.add_argument('--batch-norm-epsilon',
                        type=float,
                        default=1e-5,
                        help='Epsilon for batch norm.')
    args = parser.parse_args()

    if args.num_gpus < 0:
        raise ValueError(
            'Invalid GPU count: \"--num-gpus\" must be 0 or a positive integer.'
        )
    if args.num_gpus == 0 and args.variable_strategy == 'GPU':
        raise ValueError(
            'num-gpus=0, CPU must be used as parameter server. Set'
            '--variable-strategy=CPU.')
    if (args.num_layers - 2) % 6 != 0:
        raise ValueError('Invalid --num-layers parameter.')
    if args.num_gpus != 0 and args.train_batch_size % args.num_gpus != 0:
        raise ValueError('--train-batch-size must be multiple of --num-gpus.')
    if args.num_gpus != 0 and args.eval_batch_size % args.num_gpus != 0:
        raise ValueError('--eval-batch-size must be multiple of --num-gpus.')

    data_dir = os.path.join(get_data_path(), 'cifar-10-data')
    # We create data for the project if it does not exists
    if not os.path.exists(os.path.join(data_dir, 'train.tfrecords')):
        generate_data(data_dir)

    train(job_dir=get_outputs_path(), data_dir=data_dir, **vars(args))