def get_data_loaders(batch_size): data_dir = os.path.join(get_data_path(), 'pytorch', 'mnist') train_dataset = datasets.MNIST(root=data_dir, train=True, download=True, transform=transforms.ToTensor()) x_train_mnist = train_dataset.train_data.type(torch.FloatTensor) y_train_mnist = train_dataset.train_labels test_dataset = datasets.MNIST(root=data_dir, train=False, download=True, transform=transforms.ToTensor()) x_test_mnist = test_dataset.test_data.type(torch.FloatTensor) y_test_mnist = test_dataset.test_labels logging.info('Training Data Size: ', x_train_mnist.size(), '-', y_train_mnist.size()) logging.info('Testing Data Size: ', x_test_mnist.size(), '-', y_test_mnist.size()) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) return train_loader, train_dataset, test_loader, test_dataset
def pretrained_embedding_layer(max_len, trainable=True): """ Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors. Arguments: word_to_vec_map -- dictionary mapping words to their GloVe vector representation. word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words) Returns: embedding_layer -- pretrained layer Keras instance """ data_path = get_data_path() if data_path is None: data_path = "./data" path = os.path.join(data_path, "Reviews.h5") print("Loading glove from", path) with h5py.File(path, "r") as fp: embedding_matrix = fp["embedding_matrix"].value # Define Keras embedding layer with the correct output/input sizes, make it trainable. embedding_layer = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], weights=[embedding_matrix], input_length=max_len, trainable=trainable) return embedding_layer
type=int, default=10, metavar='N', help='how many batches to wait before logging training status') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) model = Network() if args.cuda: model.cuda() data_dir = os.path.join(get_data_path(), 'pytorch', 'mnist') logging.info('Downloading data ...') train_loader = get_train_loader(data_dir, args.batch_size, args.cuda) test_loader = get_test_loader(data_dir, args.test_batch_size, args.cuda) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) logging.info('Start training ...') for epoch in range(1, args.epochs + 1): train(model=model, train_loader=train_loader, epoch=epoch, cuda=args.cuda, optimizer=optimizer, log_interval=args.log_interval)
type=float, default=0.997, help='Decay for batch norm.') parser.add_argument('--batch-norm-epsilon', type=float, default=1e-5, help='Epsilon for batch norm.') args = parser.parse_args() if args.num_gpus < 0: raise ValueError( 'Invalid GPU count: \"--num-gpus\" must be 0 or a positive integer.' ) if args.num_gpus == 0 and args.variable_strategy == 'GPU': raise ValueError( 'num-gpus=0, CPU must be used as parameter server. Set' '--variable-strategy=CPU.') if (args.num_layers - 2) % 6 != 0: raise ValueError('Invalid --num-layers parameter.') if args.num_gpus != 0 and args.train_batch_size % args.num_gpus != 0: raise ValueError('--train-batch-size must be multiple of --num-gpus.') if args.num_gpus != 0 and args.eval_batch_size % args.num_gpus != 0: raise ValueError('--eval-batch-size must be multiple of --num-gpus.') data_dir = os.path.join(get_data_path(), 'cifar-10-data') # We create data for the project if it does not exists if not os.path.exists(os.path.join(data_dir, 'train.tfrecords')): generate_data(data_dir) train(job_dir=get_outputs_path(), data_dir=data_dir, **vars(args))