def run(args): """ Distributed Synchronous SGD Example """ device = torch.device( 'cuda' if torch.cuda.is_available() and not args.no_cuda else 'cpu') torch.manual_seed(random_seed) np.random.seed(random_seed) feature_file_name = "{}/features_{}_{}.npy".format(args.root, args.rank, args.world_size) label_file_name = "{}/labels_{}_{}.npy".format(args.root, args.rank, args.world_size) print("read feature file {}".format(feature_file_name)) print("read label file {}".format(label_file_name)) train_loader, test_loader = partition_vgg16_cifar100_fc( args.batch_size, feature_file_name, label_file_name, validation_ratio, args.shuffle) num_batches = ceil(len(train_loader.dataset) / float(args.batch_size)) model = LogisticRegression(args.features, args.classes).float() optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate) criterion = torch.nn.CrossEntropyLoss() trainer = Trainer(model, optimizer, train_loader, test_loader, device) trainer.fit(args.epochs, is_dist=dist_is_initialized())
def run(args): """ Distributed Synchronous SGD Example """ device = torch.device('cuda' if torch.cuda.is_available() and not args.no_cuda else 'cpu') torch.manual_seed(1234) train_loader, bsz, test_loader = partition_mnist(args.batch_size, args.root, download=False) num_batches = ceil(len(train_loader.dataset) / float(bsz)) model = LogisticRegression() optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0.9) trainer = Trainer(model, optimizer, train_loader, test_loader, device) trainer.fit(args.epochs, is_dist=dist_is_initialized())
def run(args): """ Distributed Synchronous SGD Example """ device = torch.device('cuda' if torch.cuda.is_available() and not args.no_cuda else 'cpu') torch.manual_seed(1234) train_loader, bsz, test_loader = partition_cifar10(args.batch_size, args.root, download=True) num_batches = ceil(len(train_loader.dataset) / float(bsz)) print("[{}] {} steps per epoch, local batch size:{}, num_batches:{}".format(args.rank, len(train_loader), bsz, num_batches)) model = ResNet50() optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0.9) trainer = Trainer(model, optimizer, train_loader, test_loader, device) trainer.fit(args.epochs, is_dist=dist_is_initialized())
def run(args): """ Distributed Synchronous SGD Example """ device = torch.device( 'cuda' if torch.cuda.is_available() and not args.no_cuda else 'cpu') torch.manual_seed(1234) file_name = "{}/{}_{}".format(args.root, args.rank, args.world_size) print("read file {}".format(file_name)) train_loader, test_loader = partition_higgs(args.batch_size, file_name, validation_ratio) num_batches = ceil(len(train_loader.dataset) / float(args.batch_size)) model = LogisticRegression() optimizer = optim.SGD(model.parameters(), lr=args.learning_rate) trainer = Trainer(model, optimizer, train_loader, test_loader, device) trainer.fit(args.epochs, is_dist=dist_is_initialized())
def run(args): """ Distributed Synchronous SGD Example """ device = torch.device('cuda' if torch.cuda.is_available() and not args.no_cuda else 'cpu') torch.manual_seed(1234) f_id_start = args.rank * args.num_files f_id_end = f_id_start + args.num_files f_path_list = ["{}/{}".format(args.root, i) for i in range(f_id_start, f_id_end)] print("read file {}".format(f_path_list)) train_loader, test_loader = partition_yfcc100m(f_path_list, args.features, args.pos_tag, args.batch_size, validation_ratio) num_batches = ceil(len(train_loader.dataset) / float(args.batch_size)) model = LogisticRegression(args.features, args.classes) optimizer = optim.SGD(model.parameters(), lr=args.learning_rate) trainer = Trainer(model, optimizer, train_loader, test_loader, device) trainer.fit(args.epochs, is_dist=dist_is_initialized())