def run(args):
    """ Distributed Synchronous SGD Example """
    device = torch.device(
        'cuda' if torch.cuda.is_available() and not args.no_cuda else 'cpu')
    torch.manual_seed(random_seed)
    np.random.seed(random_seed)

    feature_file_name = "{}/features_{}_{}.npy".format(args.root, args.rank,
                                                       args.world_size)
    label_file_name = "{}/labels_{}_{}.npy".format(args.root, args.rank,
                                                   args.world_size)

    print("read feature file {}".format(feature_file_name))
    print("read label file {}".format(label_file_name))
    train_loader, test_loader = partition_vgg16_cifar100_fc(
        args.batch_size, feature_file_name, label_file_name, validation_ratio,
        args.shuffle)
    num_batches = ceil(len(train_loader.dataset) / float(args.batch_size))

    model = LogisticRegression(args.features, args.classes).float()
    optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate)
    criterion = torch.nn.CrossEntropyLoss()

    trainer = Trainer(model, optimizer, train_loader, test_loader, device)

    trainer.fit(args.epochs, is_dist=dist_is_initialized())
def run(args):
    """ Distributed Synchronous SGD Example """
    device = torch.device('cuda' if torch.cuda.is_available() and not args.no_cuda else 'cpu')
    torch.manual_seed(1234)

    train_loader, bsz, test_loader = partition_mnist(args.batch_size, args.root, download=False)
    num_batches = ceil(len(train_loader.dataset) / float(bsz))

    model = LogisticRegression()
    optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0.9)

    trainer = Trainer(model, optimizer, train_loader, test_loader, device)

    trainer.fit(args.epochs, is_dist=dist_is_initialized())
def run(args):
    """ Distributed Synchronous SGD Example """
    device = torch.device('cuda' if torch.cuda.is_available() and not args.no_cuda else 'cpu')
    torch.manual_seed(1234)

    train_loader, bsz, test_loader = partition_cifar10(args.batch_size, args.root, download=True)
    num_batches = ceil(len(train_loader.dataset) / float(bsz))
    print("[{}] {} steps per epoch, local batch size:{}, num_batches:{}".format(args.rank, len(train_loader), bsz, num_batches))

    model = ResNet50()
    optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0.9)

    trainer = Trainer(model, optimizer, train_loader, test_loader, device)

    trainer.fit(args.epochs, is_dist=dist_is_initialized())
Exemple #4
0
def run(args):
    """ Distributed Synchronous SGD Example """
    device = torch.device(
        'cuda' if torch.cuda.is_available() and not args.no_cuda else 'cpu')
    torch.manual_seed(1234)

    file_name = "{}/{}_{}".format(args.root, args.rank, args.world_size)
    print("read file {}".format(file_name))
    train_loader, test_loader = partition_higgs(args.batch_size, file_name,
                                                validation_ratio)
    num_batches = ceil(len(train_loader.dataset) / float(args.batch_size))

    model = LogisticRegression()
    optimizer = optim.SGD(model.parameters(), lr=args.learning_rate)

    trainer = Trainer(model, optimizer, train_loader, test_loader, device)

    trainer.fit(args.epochs, is_dist=dist_is_initialized())
Exemple #5
0
def run(args):
    """ Distributed Synchronous SGD Example """
    device = torch.device('cuda' if torch.cuda.is_available() and not args.no_cuda else 'cpu')
    torch.manual_seed(1234)

    f_id_start = args.rank * args.num_files
    f_id_end = f_id_start + args.num_files
    f_path_list = ["{}/{}".format(args.root, i) for i in range(f_id_start, f_id_end)]
    print("read file {}".format(f_path_list))
    train_loader, test_loader = partition_yfcc100m(f_path_list, args.features, args.pos_tag,
                                                   args.batch_size, validation_ratio)
    num_batches = ceil(len(train_loader.dataset) / float(args.batch_size))

    model = LogisticRegression(args.features, args.classes)
    optimizer = optim.SGD(model.parameters(), lr=args.learning_rate)

    trainer = Trainer(model, optimizer, train_loader, test_loader, device)

    trainer.fit(args.epochs, is_dist=dist_is_initialized())