Example #1
0
def _worker_loop(dataset, index_queue, data_queue, collate_fn, seed, init_fn, worker_id):
    global _use_shared_memory
    _use_shared_memory = True

    # Intialize C side signal handlers for SIGBUS and SIGSEGV. Python signal
    # module's handlers are executed after Python returns from C low-level
    # handlers, likely when the same fatal signal happened again already.
    # https://docs.python.org/3/library/signal.html Sec. 18.8.1.1
    _set_worker_signal_handlers()

    torch.set_num_threads(1)
    torch.manual_seed(seed)
    np.random.seed(seed)

    if init_fn is not None:
        init_fn(worker_id)

    while True:
        r = index_queue.get()
        if r is None:
            break
        idx, batch_indices = r
        try:
            samples = collate_fn([dataset[i] for i in batch_indices])
        except Exception:
            data_queue.put((idx, ExceptionWrapper(sys.exc_info())))
        else:
            data_queue.put((idx, samples))
Example #2
0
def test_word_embed_lookup_d2_1():
    """ 1 point(s) """

    global test_sent, gold, word_to_ix, vocab
    torch.manual_seed(1)

    embedder = VanillaWordEmbeddingLookup(word_to_ix, TEST_EMBEDDING_DIM)
    embeds = embedder(test_sent)
    assert len(embeds) == len(test_sent)
    assert isinstance(embeds, list)
    assert isinstance(embeds[0], ag.Variable)
    assert embeds[0].size() == (1, TEST_EMBEDDING_DIM)

    embeds_list = make_list(embeds)

    true = ([-1.8661,  1.4146, -1.8781, -0.4674],
    [-0.9596,  0.5489, -0.9901, -0.3826],
    [0.5237,  0.0004, -1.2039,  3.5283],
    [0.3056,  1.0386,  0.5206, -0.5006],
    [0.4434,  0.5848,  0.8407,  0.5510],
    [-0.7576,  0.4215, -0.4827, -1.1198],
    [0.3056,  1.0386,  0.5206, -0.5006],
    [-2.9718,  1.7070, -0.4305, -2.2820],
    [0.3863,  0.9124, -0.8410,  1.2282] )
    pairs = zip(embeds_list, true)
    check_tensor_correctness(pairs)
Example #3
0
def test_bilstm_word_embeds_d4_1():
    """ 1 point(s) / 0.5 point(s) (section dependent) """

    global test_sent, word_to_ix, vocab
    torch.manual_seed(1)

    embedder = BiLSTMWordEmbeddingLookup(word_to_ix, TEST_EMBEDDING_DIM, TEST_EMBEDDING_DIM, 1, 0.0)
    embeds = embedder(test_sent)
    assert len(embeds) == len(test_sent)
    assert isinstance(embeds, list)
    assert isinstance(embeds[0], ag.Variable)
    assert embeds[0].size() == (1, TEST_EMBEDDING_DIM)

    embeds_list = make_list(embeds)
    true = ( 
        [ .4916, -.0168, .1719, .6615 ],
        [ .3756, -.0610, .1851, .2604 ],
        [ -.2655, -.1289, .1009, -.0016 ],
        [ -.1070, -.3971, .2414, -.2588 ],
        [ -.1717, -.4475, .2739, -.0465 ], 
        [ 0.0684, -0.2586,  0.2123, -0.1832 ], 
        [ -0.0775, -0.4308,  0.1844, -0.1146 ], 
        [ 0.4366, -0.0507,  0.1018,  0.4015 ], 
        [ -0.1265, -0.2192,  0.0481,  0.1551 ])

    pairs = zip(embeds_list, true)
    check_tensor_correctness(pairs)
Example #4
0
    def setUp(self, size=(2, 5), batch=3, dtype=torch.float64, device=None,
              seed=None, mu=None, cov=None, A=None, b=None):
        '''Test the correctness of batch implementation of mean().

        This function will stack `[1 * mu, 2 * mu, ..., batch * mu]`.
        Then, it will see whether the batch output is accurate or not.

        Args:
            size: Tuple size of matrix A.
            batch: The batch size > 0.
            dtype: data type.
            device: In which device.
            seed: Seed for the random number generator.
            mu: To test a specific mean mu.
            cov: To test a specific covariance matrix.
            A: To test a specific A matrix.
            b: To test a specific bias b.
        '''
        if seed is not None:
            torch.manual_seed(seed)
        if A is None:
            A = torch.rand(size, dtype=dtype, device=device)
        if b is None:
            b = torch.rand(size[0], dtype=dtype, device=device)
        if mu is None:
            mu = torch.rand(size[1], dtype=dtype, device=device)
        if cov is None:
            cov = rand.definite(size[1], dtype=dtype, device=device,
                                positive=True, semi=False, norm=10**2)
        self.A = A
        self.b = b
        var = torch.diag(cov)
        self.batch_mean = torch.stack([(i + 1) * mu for i in range(batch)])
        self.batch_cov = torch.stack([(i + 1) * cov for i in range(batch)])
        self.batch_var = torch.stack([(i + 1) * var for i in range(batch)])
Example #5
0
def main():
    args = parser.parse_args()

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    if args.dist_url == "env://" and args.world_size == -1:
        args.world_size = int(os.environ["WORLD_SIZE"])

    args.distributed = args.world_size > 1 or args.multiprocessing_distributed

    ngpus_per_node = torch.cuda.device_count()
    if args.multiprocessing_distributed:
        # Since we have ngpus_per_node processes per node, the total world_size
        # needs to be adjusted accordingly
        args.world_size = ngpus_per_node * args.world_size
        # Use torch.multiprocessing.spawn to launch distributed processes: the
        # main_worker process function
        mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
    else:
        # Simply call main_worker function
        main_worker(args.gpu, ngpus_per_node, args)
Example #6
0
def set_seed(seed):
    """Sets random seed everywhere."""
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
Example #7
0
    def setUp(self, length=3, factor=10, count=1000000,
              seed=None, dtype=torch.float64, device=None):
        '''Set up the test values.

        Args:
            length: Size of the vector.
            factor: To multiply the mean and standard deviation.
            count: Number of samples for Monte-Carlo estimation.
            seed: Seed for the random number generator.
            dtype: The data type.
            device: In which device.
        '''
        if seed is not None:
            torch.manual_seed(seed)

        # variables
        self.A = torch.randn(length, length, dtype=dtype, device=device)
        self.b = torch.randn(length, dtype=dtype, device=device)

        # input mean and covariance
        self.mu = torch.randn(length, dtype=dtype, device=device) * factor
        self.cov = rand.definite(length, dtype=dtype, device=device,
                                 positive=True, semi=False, norm=factor**2)

        # Monte-Carlo estimation of the output mean and variance
        normal = torch.distributions.MultivariateNormal(self.mu, self.cov)
        samples = normal.sample((count,))
        out_samples = samples.matmul(self.A.t()) + self.b
        self.mc_mu = torch.mean(out_samples, dim=0)
        self.mc_var = torch.var(out_samples, dim=0)
        self.mc_cov = cov(out_samples)
Example #8
0
    def __init__(self, input_size, output_size, seed=1):
        super(NN, self).__init__()

        torch.manual_seed(seed)

        self.input_size = input_size
        self.output_size = output_size
        h_size = 50

        # self.net = nn.Sequential(
        #   nn.Linear(self.input_size,h_size),
        #   nn.ReLU(),
        #   nn.Linear(h_size,self.output_size)
        # )

        # self.net = nn.Sequential(
        #   nn.Linear(self.input_size,h_size),
        #   # nn.Tanh(),
        #   # nn.Linear(h_size,h_size),
        #   nn.Tanh(),
        #   nn.Linear(h_size,self.output_size),
        #   # nn.Tanh(),
        #   # nn.Linear(h_size,self.output_size)
        # )

        self.net = nn.Sequential(
          nn.Linear(self.input_size,h_size),
          # nn.Tanh(),
          # nn.Linear(h_size,h_size),
          nn.LeakyReLU(),
          nn.Linear(h_size,h_size),
          nn.LeakyReLU(),
          nn.Linear(h_size,self.output_size)
        )
Example #9
0
def prepare_environment(params: Params):
    """
    Sets random seeds for reproducible experiments. This may not work as expected
    if you use this from within a python project in which you have already imported Pytorch.
    If you use the scripts/run_model.py entry point to training models with this library,
    your experiments should be reasonably reproducible. If you are using this from your own
    project, you will want to call this function before importing Pytorch. Complete determinism
    is very difficult to achieve with libraries doing optimized linear algebra due to massively
    parallel execution, which is exacerbated by using GPUs.

    Parameters
    ----------
    params: Params object or dict, required.
        A ``Params`` object or dict holding the json parameters.
    """
    seed = params.pop_int("random_seed", 13370)
    numpy_seed = params.pop_int("numpy_seed", 1337)
    torch_seed = params.pop_int("pytorch_seed", 133)

    if seed is not None:
        random.seed(seed)
    if numpy_seed is not None:
        numpy.random.seed(numpy_seed)
    if torch_seed is not None:
        torch.manual_seed(torch_seed)
        # Seed all GPUs with the same seed if available.
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(torch_seed)

    log_pytorch_version_info()
Example #10
0
def init_platform():
    config_file = cfg_from_file('config.yml')
    default_file = cfg_from_file('default.yml')
    logger.info(pprint.pformat(default_file))
    logger.info(pprint.pformat(config_file))

    merge_a_into_b(config_file, config)
    merge_a_into_b(default_file, default)
    default.best_model_path = ''

    if default.gpu == '':
        default.gpu = None
    if default.gpu is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = default.gpu

    default.distributed = default.world_size > 1
    if default.distributed:
        dist.init_process_group(backend=default.dist_backend, init_method=default.dist_url,
                                world_size=default.world_size)

    default.lr_epoch = [int(ep) for ep in default.lr_step.split(',')]

    if default.seed is not None:
        seed = default.seed
        np.random.seed(seed)
        random.seed(seed)
        torch.manual_seed(seed)
        cudnn.deterministic = True
Example #11
0
def train_model(args):
    """Load the data, train the model, test the model, export / save the model
    """
    torch.manual_seed(args.seed)

    # Open our dataset
    train_loader, test_loader = data_utils.load_data(args.test_split,
                                                     args.batch_size)

    # Create the model
    net = model.SonarDNN().double()
    optimizer = optim.SGD(net.parameters(), lr=args.lr,
                          momentum=args.momentum, nesterov=False)

    # Train / Test the model
    for epoch in range(1, args.epochs + 1):
        train(net, train_loader, optimizer, epoch)
        test(net, test_loader)

    # Export the trained model
    torch.save(net.state_dict(), args.model_name)

    if args.model_dir:
        # Save the model to GCS
        data_utils.save_model(args.model_dir, args.model_name)
Example #12
0
def main(argv):
    (opt, args) = parser.parse_args(argv)
    print(opt)
    config = get_config(opt.config)

    if opt.manualSeed is None:
        opt.manualSeed = random.randint(1, 10000)
    print('Random Seed: ', opt.manualSeed)
    random.seed(opt.manualSeed)
    torch.manual_seed(opt.manualSeed)
    if opt.cuda:
        torch.cuda.manual_seed_all(opt.manualSeed)
        torch.cuda.set_device(opt.gpu_ids)
    cudnn.benchmark = True

    # loading data set
    transform = transforms.Compose([transforms.Resize((config['fineSizeH'], config['fineSizeW'])),
                                    transforms.ToTensor()])
    dataset = Aligned_Dataset(config['dataPath'], direction='AtoB', transform=transform)
    train_loader = torch.utils.data.DataLoader(dataset, batch_size=config['batchSize'],
                                             shuffle=True, num_workers=int(4))
    # setup model
    trainer = trainer_gan(config, train_loader, resume_epoch=opt.resume_epoch)
    if opt.cuda:
        trainer.cuda()
    if opt.resume_epoch:
        trainer.resume()
    # training
    for epoch in range(opt.resume_epoch, config['nepoch']):
        trainer.train(epoch)
        trainer.update_learning_rate(epoch)
        if epoch % 10 == 0:
            trainer.save(epoch)
Example #13
0
def seed_everything(seed=1029):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
Example #14
0
    def __init__(self, hyper_config, seed=1):
        super(VAE, self).__init__()

        torch.manual_seed(seed)


        self.z_size = hyper_config['z_size']
        self.x_size = hyper_config['x_size']
        self.act_func = hyper_config['act_func']

        self.q_dist = hyper_config['q_dist'](self, hyper_config=hyper_config)

        # for aaa in self.q_dist.parameters():
        #     # print (aaa)
        #     print (aaa.size())

        # # fasdfs


        if torch.cuda.is_available():
            self.dtype = torch.cuda.FloatTensor
            self.q_dist.cuda()
        else:
            self.dtype = torch.FloatTensor
            

        #Decoder
        self.decoder_weights = []
        for i in range(len(hyper_config['decoder_arch'])):
            self.decoder_weights.append(nn.Linear(hyper_config['decoder_arch'][i][0], hyper_config['decoder_arch'][i][1]))

        count =1
        for i in range(len(self.decoder_weights)):
            self.add_module(str(count), self.decoder_weights[i])
            count+=1
    def __init__(self, seed=1):
        super(NN_drop, self).__init__()

        torch.manual_seed(seed)

        self.input_size = 1
        self.output_size = 1
        h_size = 50


        # #this samples a mask for each datapoint in the batch
        # self.net = nn.Sequential(
        #   nn.Linear(self.input_size,h_size),
        #   nn.ReLU(),
        #   nn.Dropout(p=0.5),
        #   nn.Linear(h_size,self.output_size)
        # )

        #want to keep mask constant for batch

        self.l1 = nn.Linear(self.input_size,h_size)
        self.a1 = nn.ReLU()
        # nn.Dropout(p=0.5),
        self.l2 = nn.Linear(h_size,self.output_size)

        



        self.optimizer = optim.Adam(self.parameters(), lr=.01)
def set_random_seed(seed=13370):
    if seed > 0:
        random.seed(seed)
        np.random.seed(int(seed / 10))
        torch.manual_seed(int(seed / 100))
        torch.cuda.manual_seed(int(seed / 100))
        torch.cuda.manual_seed_all(int(seed / 100))
Example #17
0
    def __init__(self, hyper_config, seed=1):
        super(VAE, self).__init__()

        torch.manual_seed(seed)


        self.z_size = hyper_config['z_size']
        self.x_size = hyper_config['x_size']
        self.act_func = hyper_config['act_func']
        self.flow_bool = hyper_config['flow_bool']

        self.q_dist = hyper_config['q_dist'](self, hyper_config=hyper_config)


        if torch.cuda.is_available():
            self.dtype = torch.cuda.FloatTensor
            self.q_dist.cuda()
        else:
            self.dtype = torch.FloatTensor
            

        #Decoder
        self.fc4 = nn.Linear(self.z_size, 200)
        self.fc5 = nn.Linear(200, 200)
        self.fc6 = nn.Linear(200, self.x_size)
Example #18
0
def test_bilstm_word_embeds_d4_1():
    global test_sent, word_to_ix, vocab
    torch.manual_seed(1)

    embedder = BiLSTMWordEmbedding(word_to_ix, TEST_EMBEDDING_DIM, TEST_EMBEDDING_DIM, 1, 0.0)
    embeds = embedder(test_sent)
    assert len(embeds) == len(test_sent)
    assert isinstance(embeds, list)
    assert isinstance(embeds[0], ag.Variable)
    assert embeds[0].size() == (1, TEST_EMBEDDING_DIM)

    embeds_list = make_list(embeds)
    true = (
            [0.09079286456108093, 0.06577987223863602, 0.26242679357528687, -0.004267544485628605],
            [0.16868481040000916, 0.2032647728919983, 0.23663431406021118, -0.11785736680030823],
            [0.35757705569267273, 0.3805052936077118, -0.006295515224337578, 0.0010524550452828407],
            [0.26692214608192444, 0.3241712749004364, 0.13473030924797058, -0.026079852133989334],
            [0.23157459497451782, 0.13698695600032806, 0.04000323265790939, 0.1107199415564537],
            [0.22783540189266205, -0.02211562544107437, 0.06239837780594826, 0.08553065359592438],
            [0.24633683264255524, 0.09283821284770966, 0.0987505242228508, -0.07646450400352478],
            [0.05530695244669914, -0.4060348570346832, -0.060150448232889175, -0.003920700401067734],
            [0.2099054455757141, -0.304738312959671, -0.01663055270910263, -0.05987118184566498]
            )

    pairs = zip(embeds_list, true)
    check_tensor_correctness(pairs)
Example #19
0
def test_suff_word_embeds_d4_2():
    global test_sent, word_to_ix, vocab
    torch.manual_seed(1)
    test_suff_to_ix = build_suff_to_ix(word_to_ix)

    suff_word_embedder = SuffixAndWordEmbedding(word_to_ix, test_suff_to_ix, TEST_EMBEDDING_DIM)
    embeds = suff_word_embedder(test_sent)
    assert len(embeds) == len(test_sent)
    assert isinstance(embeds, list)
    assert isinstance(embeds[0], ag.Variable)
    assert embeds[0].size() == (1, TEST_EMBEDDING_DIM)

    embeds_list = make_list(embeds)
    true = ([-0.45190597, -0.16613023,  1.37900829,  2.5285573 ],
            [-1.02760863, -0.56305277,  1.59870028, -1.27700698],
            [-0.89229053, -0.05825018,  0.32550153, -0.47914493],
            [ 0.42241532,  0.267317  ,  1.37900829,  2.5285573 ],
            [-1.5227685 ,  0.38168392,  0.41074166, -0.98800713],
            [-0.42119515, -0.51069999,  0.11025489, -2.2590096 ],
            [ 0.42241532,  0.267317  ,  1.37900829,  2.5285573 ],
            [-0.19550958, -0.96563596, -0.90807337,  0.54227364],
            [ 0.66135216,  0.26692411,  3.5869894 , -1.83129013])

    pairs = zip(embeds_list, true)
    check_tensor_correctness(pairs)
Example #20
0
def main():
    torch.manual_seed(1234)
    np.random.seed(1234)
    queryLen = 10
    docLen = 12
    embDim = 128
    encDim = 256

    print "Load Train Data"
    savePath="./model_lstm"
    trainFile = "./data/min_word/train"
    devFile = "./data/min_word/dev"
    vocFile = "./data/min_word/vocab"
    trainData = SimDataset(trainFile,vocFile,queryLen,docLen,2,10000)
    trainLoader = DataLoader(trainData, 100)
    
    print "Load Dev Data"
    devData = SimDataset(devFile,vocFile,queryLen,docLen,2)
    devLoader = DataLoader(devData, 10000)
    devData = None
    for batch in devLoader:
        devData = batch
        break

    print "Creaet Model"
    model,criterion,optimizer = SimLSTMPrj(trainData.getVocLen(),embDim,encDim,savePath)
    print "Train ... "
    train(model,trainLoader,criterion,optimizer,evalData=devData,epoch=50,savePath=savePath)
Example #21
0
def main(argv):
    (opt, args) = parser.parse_args(argv)
    print(opt)
    config = get_config(opt.config)

    if opt.manualSeed is None:
        opt.manualSeed = random.randint(1, 10000)
    print('Random Seed: ', opt.manualSeed)
    random.seed(opt.manualSeed)
    torch.manual_seed(opt.manualSeed)
    if opt.cuda:
        torch.cuda.manual_seed_all(opt.manualSeed)
        torch.cuda.set_device(opt.gpu_ids)
    cudnn.benchmark = True

    # loading data set
    transform = transforms.Compose([transforms.Resize((config['fineSizeH'], config['fineSizeW'])),
                                    transforms.ToTensor()])
    dataset = Aligned_Dataset(config['dataPath'], subfolder='test', direction='AtoB', transform=transform)
    test_loader = torch.utils.data.DataLoader(dataset, batch_size=1,
                                             shuffle=False, num_workers=int(4))
    # setup model
    trainer = trainer_gan(config, test_loader, resume_epoch=opt.resume_epoch)
    # load a model
    trainer.netG.load_state_dict(torch.load(opt.modeldir))
    if opt.cuda:
        trainer.cuda()
    # testing
    trainer.test()
Example #22
0
    def __init__(self, input_size, output_size, seed=1, n_residual_blocks=3):
        super(NN3, self).__init__()

        torch.manual_seed(seed)

        self.input_size = input_size
        self.output_size = output_size
        h_size = 50

        # self.net = nn.Sequential(
        #   nn.Linear(self.input_size,h_size),
        #   nn.BatchNorm1d(h_size),
        #   # nn.Tanh(),
        #   # nn.Linear(h_size,h_size),
        #   nn.LeakyReLU(),
        #   nn.Linear(h_size,h_size),
        #   nn.BatchNorm1d(h_size),
        #   # nn.Tanh(),
        #   nn.LeakyReLU(),
        #   nn.Linear(h_size,h_size),
        #   nn.BatchNorm1d(h_size),
        #   # nn.Tanh(),
        #   nn.LeakyReLU(),
        #   nn.Linear(h_size,self.output_size),
        # )

        self.first_layer = nn.Linear(self.input_size,h_size)
        self.last_layer = nn.Linear(h_size,self.output_size)

        # n_residual_blocks = 5
        model = []
        # Residual blocks
        for _ in range(n_residual_blocks):
            model += [ResidualBlock(h_size)]
        self.part3 = nn.Sequential(*model)
Example #23
0
    def test_horovod_allreduce_inplace(self):
        """Test that the allreduce correctly sums 1D, 2D, 3D tensors."""
        hvd.init()
        size = hvd.size()
        dtypes = [torch.IntTensor, torch.LongTensor,
                  torch.FloatTensor, torch.DoubleTensor]
        if torch.cuda.is_available():
            dtypes += [torch.cuda.IntTensor, torch.cuda.LongTensor,
                       torch.cuda.FloatTensor, torch.cuda.DoubleTensor]
        dims = [1, 2, 3]
        for dtype, dim in itertools.product(dtypes, dims):
            torch.manual_seed(1234)
            tensor = torch.FloatTensor(*([17] * dim)).random_(-100, 100)
            tensor = tensor.type(dtype)
            multiplied = tensor * size
            hvd.allreduce_(tensor, average=False)
            max_difference = tensor.sub(multiplied).max()

            # Threshold for floating point equality depends on number of
            # ranks, since we're comparing against precise multiplication.
            if size <= 3 or dtype in [torch.IntTensor, torch.LongTensor,
                                      torch.cuda.IntTensor, torch.cuda.LongTensor]:
                threshold = 0
            elif size < 10:
                threshold = 1e-4
            elif size < 15:
                threshold = 5e-4
            else:
                break

            assert max_difference <= threshold, 'hvd.allreduce produces incorrect results'
Example #24
0
    def test_horovod_allreduce_error(self):
        """Test that the allreduce raises an error if different ranks try to
        send tensors of different rank or dimension."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        # Same rank, different dimension
        torch.manual_seed(1234)
        dims = [17 + rank] * 3
        tensor = torch.FloatTensor(*dims).random_(-100, 100)
        try:
            hvd.allreduce(tensor)
            assert False, 'hvd.allreduce did not throw error'
        except torch.FatalError:
            pass

        # Same number of elements, different rank
        torch.manual_seed(1234)
        if rank == 0:
            dims = [17, 23 * 57]
        else:
            dims = [17, 23, 57]
        tensor = torch.FloatTensor(*dims).random_(-100, 100)
        try:
            hvd.allreduce(tensor)
            assert False, 'hvd.allreduce did not throw error'
        except torch.FatalError:
            pass
Example #25
0
    def test_horovod_allreduce_grad(self):
        """Test the correctness of the allreduce gradient."""
        hvd.init()
        size = hvd.size()
        dtypes = [torch.IntTensor, torch.LongTensor,
                  torch.FloatTensor, torch.DoubleTensor]
        if torch.cuda.is_available():
            dtypes += [torch.cuda.IntTensor, torch.cuda.LongTensor,
                       torch.cuda.FloatTensor, torch.cuda.DoubleTensor]
        dims = [1, 2, 3]
        for dtype, dim in itertools.product(dtypes, dims):
            torch.manual_seed(1234)
            tensor = torch.FloatTensor(*([17] * dim)).random_(-100, 100)
            tensor = tensor.type(dtype)
            tensor = torch.autograd.Variable(tensor, requires_grad=True)
            summed = hvd.allreduce(tensor, average=False)

            summed.backward(torch.ones([17] * dim))
            grad_out = tensor.grad.data.numpy()

            expected = np.ones([17] * dim) * size
            err = np.linalg.norm(expected - grad_out)
            self.assertLess(err, 0.00000001,
                            "gradient %s differs from expected %s, "
                            "error: %s" % (grad_out, expected, str(err)))
Example #26
0
    def __init__(self, seed=1):
        super(NN, self).__init__()

        torch.manual_seed(seed)

        self.input_size = 1
        self.output_size = 1
        h_size = 50

        # self.net = nn.Sequential(
        #   nn.Linear(self.input_size,h_size),
        #   nn.ReLU(),
        #   nn.Linear(h_size,self.output_size)
        # )
        self.net = nn.Sequential(
          nn.Linear(self.input_size,h_size),
          # nn.Tanh(),
          # nn.Linear(h_size,h_size),
          nn.Tanh(),
          nn.Linear(h_size,self.output_size),
          # nn.Tanh(),
          # nn.Linear(h_size,self.output_size)
        )

        # self.optimizer = optim.Adam(self.parameters(), lr=.01)
        self.optimizer = optim.Adam(self.parameters(), lr=.0004)
Example #27
0
    def train_step(self, sample, update_params=True, dummy_batch=False):
        """Do forward, backward and parameter update."""
        # Set seed based on args.seed and the update number so that we get
        # reproducible results when resuming from checkpoints
        seed = self.args.seed + self.get_num_updates()
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)

        if not dummy_batch:
            self.meters['train_wall'].start()

        # forward and backward pass
        sample = self._prepare_sample(sample)
        loss, sample_size, logging_output, oom_fwd = self._forward(sample)
        oom_bwd = self._backward(loss)

        # buffer stats and logging outputs
        self._buffered_stats['sample_sizes'].append(sample_size)
        self._buffered_stats['logging_outputs'].append(logging_output)
        self._buffered_stats['ooms_fwd'].append(oom_fwd)
        self._buffered_stats['ooms_bwd'].append(oom_bwd)

        # update parameters
        if update_params:
            agg_logging_output = self._update_params()
        else:
            agg_logging_output = None  # buffering updates

        if not dummy_batch:
            self.meters['train_wall'].stop()

        return agg_logging_output
def predict_fn(input_data, model):
    logger.info('Generating text based on input parameters.')
    corpus = model['corpus']
    model = model['model']

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logger.info('Current device: {}'.format(device))
    torch.manual_seed(input_data['seed'])
    ntokens = len(corpus.dictionary)
    input = torch.randint(ntokens, (1, 1), dtype=torch.long).to(device)
    hidden = model.init_hidden(1)

    logger.info('Generating {} words.'.format(input_data['words']))
    result = []
    with torch.no_grad():  # no tracking history
        for i in range(input_data['words']):
            output, hidden = model(input, hidden)
            word_weights = output.squeeze().div(input_data['temperature']).exp().cpu()
            word_idx = torch.multinomial(word_weights, 1)[0]
            input.fill_(word_idx)
            word = corpus.dictionary.idx2word[word_idx]
            word = word if type(word) == str else word.decode()
            if word == '<eos>':
                word = '\n'
            elif i % 12 == 11:
                word = word + '\n'
            else:
                word = word + ' '
            result.append(word)
    return ''.join(result)
Example #29
0
def main(argv):

    (opt, args) = parser.parse_args(argv)
    config = get_config(opt.config)
    print(opt)
    if opt.manualSeed is None:
        opt.manualSeed = random.randint(1, 10000)
    print("Random Seed: ", opt.manualSeed)
    random.seed(opt.manualSeed)
    torch.manual_seed(opt.manualSeed)
    if config['cuda']:
        torch.cuda.manual_seed_all(opt.manualSeed)
        torch.cuda.set_device(opt.gpu_ids)
    cudnn.benchmark = True

    transform = transforms.Compose([transforms.Resize((512, 512)),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])
    dataset = Aligned_Dataset(config['datapath'], subfolder='test', direction='AtoB', transform=transform)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=1,
                                             shuffle=False, num_workers=int(2))
    model_dir = '/media/scw4750/AIwalker/stackgan-like/checkpoints/generator_epoch_160.pkl'
    trainer = GAN_Trainer(config, dataloader)
    # load the model
    trainer.G.load_state_dict(torch.load(model_dir))
    trainer.test()

    return
Example #30
0
    def __init__(self, seed=1):
        super(NN, self).__init__()

        torch.manual_seed(seed)

        self.action_size = 2
        self.state_size = 4
        self.value_size = 1

        
        h_size = 50

        self.actor = nn.Sequential(
          nn.Linear(self.state_size,h_size),
          nn.ReLU(),
          nn.Linear(h_size,self.action_size),
          # nn.log_softmax(dim=1)
        )

        self.critic = nn.Sequential(
          nn.Linear(self.state_size,h_size),
          nn.ReLU(),
          nn.Linear(h_size,self.value_size)
        )

        self.Q_func = nn.Sequential(
          nn.Linear(self.state_size + self.action_size,h_size),
          nn.ReLU(),
          nn.Linear(h_size,self.value_size)
        )

        self.optimizer_actor = optim.Adam(self.actor.parameters(), lr=.0001)
        self.optimizer_critic = optim.Adam(self.critic.parameters(), lr=.0001)
        self.optimizer_qfunc = optim.Adam(self.Q_func.parameters(), lr=.0001)
Example #31
0
def set_seed(seed): #随机数设置
    np.random.seed(seed)
    #random.seed(seed)
    torch.manual_seed(seed) # cpu
    torch.cuda.manual_seed_all(seed)  # gpu
    torch.backends.cudnn.deterministic = True
Example #32
0
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from parser import parse
import torch
import numpy as np

if __name__ == '__main__':
    args = parse()
    """ Setting seed of the RNG in all packages."""
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    """ Import specified class with the experimental setup."""
    exp_args = args.experiment.split(".")
    exp_path = ".".join(exp_args[:-1])
    exp_name = exp_args[-1]
    runner_class = getattr(__import__(exp_path, fromlist=[exp_name]), exp_name)
    runner = runner_class(args)

    if not args.infer_only:
        runner.train()
    runner.infer()
Example #33
0
        nn.init.xavier_normal_(self.lstm.weight_ih_l0)
        nn.init.orthogonal_(self.lstm.weight_hh_l0)
        nn.init.xavier_normal_(self.out.weight)

    def forward(self, x, states):
        x = self.embedding(x)
        h, states = self.lstm(x, states)
        y = self.out(h)

        return y, states


if __name__ == '__main__':
    np.random.seed(123)
    torch.manual_seed(123)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    '''
    1. データの準備
    '''
    data_dir = os.path.join(os.path.dirname(__file__), 'data')

    en_train_path = os.path.join(data_dir, 'train.en')
    en_val_path = os.path.join(data_dir, 'dev.en')
    en_test_path = os.path.join(data_dir, 'test.en')

    ja_train_path = os.path.join(data_dir, 'train.ja')
    ja_val_path = os.path.join(data_dir, 'dev.ja')
    ja_test_path = os.path.join(data_dir, 'test.ja')
Example #34
0
import math
import os
import warnings

import random; random.seed(1001)

import torch
try:
    torch.cuda.manual_seed(1001)
except:
    warnings.warn('no NVIDIA driver found')
    torch.manual_seed(1001)

from seqmod.hyper import Hyperband
from seqmod.hyper.utils import make_sampler

from seqmod.modules.lm import LM
from seqmod import utils as u
from seqmod.misc.trainer import Trainer
from seqmod.misc.loggers import StdLogger
from seqmod.misc.optimizer import Optimizer
from seqmod.misc.dataset import Dict, BlockDataset
from seqmod.misc.preprocess import text_processor
from seqmod.misc.early_stopping import EarlyStopping


# Load data
def load_lines(path, processor=text_processor()):
    lines = []
    if os.path.isfile(path):
Example #35
0
File: train.py Project: gjt9274/ML
def set_random_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
Example #36
0

def compute_MB_proba(rule_ls, ls_rule_idx):
    rule_idx_cnt = Counter(ls_rule_idx)
    numerator = 0
    for rule_idx in rule_idx_cnt:
        weight = rule_ls[rule_idx].weight
        cnt = rule_idx_cnt[rule_idx]
        numerator += math.exp(weight * cnt)
    return numerator / (numerator + 1.0)


if __name__ == '__main__':
    random.seed(cmd_args.seed)
    np.random.seed(cmd_args.seed)
    torch.manual_seed(cmd_args.seed)

    train(cmd_args)
    x1 = []
    x2 = []
    m = 0
    for i in loss_list:
        x1.append(m)
        m = m + 1
    m = 0
    for i in auc_list:
        x2.append(m)
        m = m + 1
    plt.figure(1)
    print(loss_list)
    plt.plot(x1, loss_list, '.-', color='red')
def main_worker(rank, cfg):
  # Initialize the worker
  distributed = init_worker(rank, cfg)

  # Initialize the random seed
  if cfg.seed is not None:
    torch.manual_seed(cfg.seed)

  # Initialize the PyTorch device
  device_id = cfg.device_id + rank
  device = init_device(cfg, id=device_id)

  # Initialize the model
  model = get_model(cfg)
  model.to(device)
  if distributed:
    model = nn.parallel.DistributedDataParallel(model, device_ids=[device_id])

  # Initialize the loss function
  criterion = get_loss_function(cfg)
  criterion.to(device)

  # Initialize the optimizer
  optimizer = optim.Adam(model.parameters(), lr=1)

  # Check whether the result already exists
  result_dir = get_result_dir(cfg)
  resume = os.path.isdir(result_dir)

  # Sync the workers (required due to the previous isdir check)
  if distributed:
    dist.barrier()

  # Start or resume training
  if resume:
    if rank == 0:
      print('Resuming result:', cfg.result)

    # Load and verify the config
    result_cfg = load_config(result_dir)
    if set(result_cfg.features) != set(cfg.features):
      error('input feature set mismatch')

    # Restore the latest checkpoint
    last_epoch = get_latest_checkpoint_epoch(result_dir)
    checkpoint = load_checkpoint(result_dir, device, last_epoch, model, optimizer)
    step = checkpoint['step']
  else:
    if rank == 0:
      print('Result:', cfg.result)
      os.makedirs(result_dir)

      # Save the config
      save_config(result_dir, cfg)

      # Save the source code
      src_filenames = glob(os.path.join(os.path.dirname(sys.argv[0]), '*.py'))
      src_zip_filename = os.path.join(result_dir, 'src.zip')
      save_zip(src_zip_filename, src_filenames)

    last_epoch = 0
    step = 0

  # Make sure all workers have loaded the checkpoint
  if distributed:
    dist.barrier()

  start_epoch = last_epoch + 1
  if start_epoch > cfg.num_epochs:
    exit() # nothing to do

  # Reset the random seed if resuming result
  if cfg.seed is not None and start_epoch > 1:
    seed = cfg.seed + start_epoch - 1
    torch.manual_seed(seed)

  # Initialize the training dataset
  train_data = TrainingDataset(cfg, cfg.train_data)
  if len(train_data) > 0:
    if rank == 0:
      print('Training images:', train_data.num_images)
  else:
    error('no training images')
  train_loader, train_sampler = get_data_loader(rank, cfg, train_data, shuffle=True)
  train_steps_per_epoch = len(train_loader)

  # Initialize the validation dataset
  valid_data = ValidationDataset(cfg, cfg.valid_data)
  if len(valid_data) > 0:
    if rank == 0:
      print('Validation images:', valid_data.num_images)
    valid_loader, valid_sampler = get_data_loader(rank, cfg, valid_data, shuffle=False)
    valid_steps_per_epoch = len(valid_loader)

  # Initialize the learning rate scheduler
  lr_scheduler = optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=cfg.max_lr,
    total_steps=cfg.num_epochs,
    pct_start=cfg.lr_warmup,
    anneal_strategy='cos',
    div_factor=(25. if cfg.lr is None else cfg.max_lr / cfg.lr),
    final_div_factor=1e4,
    last_epoch=last_epoch-1)

  if lr_scheduler.last_epoch != last_epoch:
    error('failed to restore LR scheduler state')

  # Check whether AMP is enabled
  amp_enabled = cfg.precision == 'mixed'

  if amp_enabled:
    # Initialize the gradient scaler
    scaler = amp.GradScaler()

  # Initialize the summary writer
  log_dir = get_result_log_dir(result_dir)
  if rank == 0:
    summary_writer = SummaryWriter(log_dir)
    if step == 0:
      summary_writer.add_scalar('learning_rate', lr_scheduler.get_last_lr()[0], 0)

  # Training and evaluation loops
  if rank == 0:
    print()
    progress_format = '%-5s %' + str(len(str(cfg.num_epochs))) + 'd/%d:' % cfg.num_epochs
    total_start_time = time.time()

  for epoch in range(start_epoch, cfg.num_epochs+1):
    if rank == 0:
      start_time = time.time()
      progress = ProgressBar(train_steps_per_epoch, progress_format % ('Train', epoch))

    # Switch to training mode
    model.train()
    train_loss = 0.

    # Iterate over the batches
    if distributed:
      train_sampler.set_epoch(epoch)

    for i, batch in enumerate(train_loader, 0):
      # Get the batch
      input, target = batch
      input  = input.to(device,  non_blocking=True)
      target = target.to(device, non_blocking=True)
      if not amp_enabled:
        input  = input.float()
        target = target.float()

      # Run a training step
      optimizer.zero_grad()

      with amp.autocast(enabled=amp_enabled):
        output = model(input)
        loss = criterion(output, target)

      if amp_enabled:
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
      else:
        loss.backward()
        optimizer.step()

      # Next step
      step += 1
      train_loss += loss
      if rank == 0:
        progress.next()

    # Get and update the learning rate
    lr = lr_scheduler.get_last_lr()[0]
    lr_scheduler.step()

    # Compute the average training loss
    if distributed:
      dist.all_reduce(train_loss, op=dist.ReduceOp.SUM)
    train_loss = train_loss.item() / (train_steps_per_epoch * cfg.num_devices)

    # Write summary
    if rank == 0:
      summary_writer.add_scalar('learning_rate', lr, epoch)
      summary_writer.add_scalar('loss', train_loss, epoch)

    # Print stats
    if rank == 0:
      duration = time.time() - start_time
      total_duration = time.time() - total_start_time
      images_per_sec = len(train_data) / duration
      eta = ((cfg.num_epochs - epoch) * total_duration / (epoch + 1 - start_epoch))
      progress.finish('loss=%.6f, lr=%.6f (%.1f images/s, %s, eta %s)'
                      % (train_loss, lr, images_per_sec, format_time(duration), format_time(eta, precision=2)))

    if ((cfg.num_valid_epochs > 0 and epoch % cfg.num_valid_epochs == 0) or epoch == cfg.num_epochs) \
      and len(valid_data) > 0:
      # Validation
      if rank == 0:
        start_time = time.time()
        progress = ProgressBar(valid_steps_per_epoch, progress_format % ('Valid', epoch))

      # Switch to evaluation mode
      model.eval()
      valid_loss = 0.

      # Iterate over the batches
      with torch.no_grad():
        for _, batch in enumerate(valid_loader, 0):
          # Get the batch
          input, target = batch
          input  = input.to(device,  non_blocking=True).float()
          target = target.to(device, non_blocking=True).float()

          # Run a validation step
          loss = criterion(model(input), target)

          # Next step
          valid_loss += loss
          if rank == 0:
            progress.next()

      # Compute the average validation loss
      if distributed:
        dist.all_reduce(valid_loss, op=dist.ReduceOp.SUM)
      valid_loss = valid_loss.item() / (valid_steps_per_epoch * cfg.num_devices)

      # Write summary
      if rank == 0:
        summary_writer.add_scalar('valid_loss', valid_loss, epoch)

      # Print stats
      if rank == 0:
        duration = time.time() - start_time
        images_per_sec = len(valid_data) / duration
        progress.finish('valid_loss=%.6f (%.1f images/s, %.1fs)'
                        % (valid_loss, images_per_sec, duration))

    if (rank == 0) and ((cfg.num_save_epochs > 0 and epoch % cfg.num_save_epochs == 0) or epoch == cfg.num_epochs):
      # Save a checkpoint
      save_checkpoint(result_dir, epoch, step, model, optimizer)

  # Print final stats
  if rank == 0:
    total_duration = time.time() - total_start_time
    print('\nFinished (%s)' % format_time(total_duration))

  # Cleanup
  cleanup_worker(cfg)
import argparse
import os
import time
from datetime import datetime
import shutil

args = utils.get_autoencoder_args()
print '%s_%s'%(args.dataset, args.model)
print datetime.now(), args, '\n============================'


use_cuda = th.cuda.is_available() and not args.use_cpu
dtype = th.cuda.FloatTensor if use_cuda else th.FloatTensor

th.manual_seed(args.seed)
gids = args.gpuid.split(',')
gids = [int(x) for x in gids]
print 'deploy on GPUs:', gids
if use_cuda:
    if len(gids) == 1:
        th.cuda.set_device(gids[0])
    else:
        th.cuda.set_device(gids[0])
        print 'use single GPU', gids[0]
    th.cuda.manual_seed(args.seed)


st_cfg = utils.get_dise_cfg(args.st_layers).split(',')
cnt_cfg = utils.get_dise_cfg(args.cnt_layers).split(',')
base_dep = utils.get_base_dep(args.base_mode)
Example #39
0
def main(args):
    # start experiment
    report_step = 100
    manualSeed = ID if args.seed == 0 else args.seed
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    random.seed(manualSeed)
    torch.manual_seed(manualSeed)
    np.random.seed(manualSeed)
    if args.cuda:
        torch.cuda.manual_seed_all(manualSeed)
    string = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
    start_log("./log/%d-"%ID + string + LOG, args.log)
    if args.resume != -1:
        resume(args.resume)
    myprint(args)
    for d in config:
        myprint("%s: %s" % (d, str(config[d])))
    args.batch_size = config["BATCH_SIZE"]

    # load data
    tokenizer = GPT2Tokenizer.from_pretrained("./%s/gpt"%DATASET)
    tokenizer.bos_token = '<BOS>'
    tokenizer.pad_token = "<PAD>"
    print(tokenizer.add_tokens(['<negative>']))
    print(tokenizer.add_tokens(['<positive>']))
    print(tokenizer.add_tokens(['<PAD>']))
    print(tokenizer.add_tokens(['<BOS>']))

    with open("./%s/%s-gpt.train.json"%(DATASET, STYLE_TYPE), "r") as f:
        data = json.load(f)
    dataloader = Dataloader.GPTLoader(data, tokenizer, args.batch_size, args.cuda, shuffle=True, input_maxlen=30)

    with open("./%s/%s-gpt.dev.json"%(DATASET, STYLE_TYPE), "r") as f:
        data = json.load(f)
    dev_data = Dataloader.GPTLoader(data, tokenizer, args.batch_size, args.cuda, shuffle=False)

    with open("./%s/%s-gpt.test.json"%(DATASET, STYLE_TYPE), "r") as f:
        data = json.load(f)
    if DATASET == "imdb":
        test_data = Dataloader.GPTLoader(data, tokenizer, args.batch_size, args.cuda)
    else:
        test_data = Dataloader.GPTRefLoader(data, tokenizer, args.batch_size, args.cuda)

    # build model
    generator = GPT2LMHeadModel.from_pretrained("./%s/gpt"%DATASET)
    generator.resize_token_embeddings(len(tokenizer))
    language_model = GPT2LMHeadModel.from_pretrained("./%s/gpt"%DATASET)
    language_model.resize_token_embeddings(len(tokenizer))
    language_model.load_state_dict(torch.load("./%s/result/language_model.pkl"%DATASET))
    language_model.eval()
    if config["g_dir"] is not None:
        generator.load_state_dict(torch.load(config["g_dir"]))
    discriminator_a = classifier.AdvDisNet(word_num=len(tokenizer))
    if config["a_dir"] is not None:
        discriminator_a.load_state_dict(torch.load(config["a_dir"]))

    discriminator_b = classifier.RNNDisNet(word_num=len(tokenizer), num_layers=1, dropout=0)
    sim_model = torch.load('sim/sim.pt', map_location='cpu')
    state_dict = sim_model['state_dict']
    vocab_words = sim_model['vocab_words']
    sim_args = sim_model['args']
    sim_args.gpu = args.gpuid
    sim_model = WordAveraging(sim_args, vocab_words)
    sim_model.load_state_dict(state_dict, strict=True)
    L = nn.CrossEntropyLoss()
    BL = nn.BCELoss()

    if args.cuda:
        generator = generator.cuda()
        discriminator_a = discriminator_a.cuda()
        discriminator_b = discriminator_b.cuda()
        sim_model = sim_model.cuda()
        L = L.cuda()
        BL = BL.cuda()
        language_model = language_model.cuda()
        if args.critic:
            critic = critic.cuda()

    goptimizer = optim.Adam(generator.parameters(), lr=config["generator lr"])
    if config["goptim_dir"] is not None:
        goptimizer.load_state_dict(torch.load(config["goptim_dir"], map_location=torch.device('cuda', args.gpuid)))
    for param_group in goptimizer.param_groups:
        param_group['lr'] = config["generator lr"]
    doptimizer_a = optim.Adam(discriminator_a.parameters(), lr=config["class lr"])
    doptimizer_b = optim.Adam(discriminator_b.parameters(), lr=config["discriminator lr"])
    if config["aoptim_dir"] is not None:
        doptimizer_a.load_state_dict(torch.load(config["aoptim_dir"], map_location=torch.device('cuda', args.gpuid)))
    for param_group in doptimizer_a.param_groups:
        param_group['lr'] = config["class lr"]

    
    EPOCH = config["EPOCH"]
    GBATCH = config["generator batch"]
    DBATCH = config["discriminator batch"]
    W_M = config["mle weight"]
    W_A = config["adv weight"]
    W_S = config["sim weight"]
    W_C = config["cycle weight"]
    W_L = config["language weight"]
    W_D = config["class weight"]
    GRAD_CLIP = config["grad clip"]
    PRETRAIN_BATCH = 0
    accumulation_step = config["accumulation_step"]

    gloss_all, gloss_mle, gloss_adv, gloss_cycle, gloss_sim, dloss_a, dloss_b, gcnt, dcnt, avg_language_loss, avg_language_score, avg_adv_score, avg_language_diff = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    avg_fake_loss, avg_real_loss, avg_sim_score, avg_critic_loss = 0, 0, 0, 0
    avg_cls_loss, avg_cls_score, gloss_class, avg_real_loss_cls, avg_fake_loss_cls = 0, 0, 0, 0, 0
    best_record = 1000
    if args.log:
        os.mkdir("./cache/%d"%(ID))
        os.mkdir("./cache/%d/best/"%(ID))
    best_gname = "./cache/%d/best/gen.dict" % ID
    best_a_dname = "./cache/%d/best/a_dis.dict" % ID
    best_b_dname = "./cache/%d/best/b_dis.dict" % ID
    best_goname = "./cache/%d/best/genopt.dict" % ID
    best_a_doname = "./cache/%d/best/a_disopt.dict" % ID
    best_b_doname = "./cache/%d/best/b_disopt.dict" % ID

    gscheduler = optim.lr_scheduler.StepLR(goptimizer, step_size=500, gamma=0.5)
    dscheduler = optim.lr_scheduler.StepLR(doptimizer_a, step_size=250, gamma=0.5)
    fine_tune_stage = args.reinforce
    language_loss_fct = nn.CrossEntropyLoss(reduce=False)
    prev_language_score = 0
    print(classifier.classifer_test(discriminator_a, tokenizer, dev_data, args.batch_size))
    one_tensor = torch.ones(1)
    if args.cuda:
        one_tensor = one_tensor.cuda() 
    # pretrain_language_model(language_model, dataloader)
    for i in range(EPOCH):
        # generator training
        generator.train()
        discriminator_a.eval()
        step_cnt = 0
        goptimizer.zero_grad()
        for j in range(GBATCH * accumulation_step):
            # print(gcnt)
            step_cnt += 1
            batch = dataloader.get()
            # reconstruction loss
            rec_text = torch.cat((batch["src_text"], batch["style_tokens"].unsqueeze(1), batch["src_text"]), dim=1)
            outputs = generator(rec_text, labels=rec_text)
            mleloss = outputs[0]
            mleloss_ = F.threshold(mleloss, config["mle_threshold"], 0)
            # classifier loss
            transfer_text = torch.cat((batch["src_text"], batch["transfer_tokens"].unsqueeze(1)), dim=1)
            cur_len = transfer_text.size(1)
            _, probs = generate(generator, transfer_text, cur_len=cur_len, max_length=int(cur_len * 2 - 1), pad_token_id=tokenizer.pad_token_id,
             eos_token_ids=tokenizer.eos_token_id, batch_size=args.batch_size)
            probs = F.softmax(probs, dim=2)
            idx_probs, words = torch.max(probs, dim=2)
            style_pred = discriminator_a.approximate(probs, 1 - batch["style"])
            style_pred = torch.squeeze(style_pred, 1)
            class_loss = - torch.log(style_pred + 0.0001).mean()
            # adv loss
            adv_pred = discriminator_b.approximate(probs)
            adv_pred = torch.squeeze(adv_pred, 1)
            advloss = - torch.log(adv_pred + 0.0001).mean()
            # sim loss
            if args.sim:
                wx1, wl1, wm1 = sim_model.torchify_batch([make_example(x, sim_model) for x in batch["tokens"]])
                words_ = words.cpu().data.numpy().tolist()
                generate_sents = [tokenizer.decode(evaluate.clean(sent, tokenizer), skip_special_tokens=True, clean_up_tokenization_spaces=False).replace("' ", "'").lstrip() for sent in words_]
                wx2, wl2, wm2 = sim_model.torchify_batch([make_example(x, sim_model) for x in generate_sents])
                with torch.no_grad():
                    sim_scores = sim_model.scoring_function(wx1, wm1, wl1, wx2, wm2, wl2)
                avg_sim_score += sim_scores.mean().item()
                if args.length_penalty:
                    length_penalty = compute_length_penalty(wl1, wl2, 0.25)
                else:
                    length_penalty = 1
                simloss = torch.mul(- torch.mul(sim_scores, length_penalty), torch.log(idx_probs).mean(dim=1)).mean()
            else:
                simloss = torch.zeros(1).cuda()

            # language fluency loss
            with torch.no_grad():
                outputs = language_model(words)
                true_outputs = language_model(batch["src_text"])
            lm_logits = outputs[0]
            shift_logits = lm_logits[..., :-1, :].contiguous()
            shift_labels = words[..., 1:].contiguous()
            language_loss = language_loss_fct(shift_logits.transpose(1, 2), shift_labels)
            lengths = torch.LongTensor([evaluate.get_len(x, tokenizer) for x in words_]) - 1
            lengths = lengths.cuda() if args.cuda else lengths
            mask = get_mask(lengths, language_loss.size(1))
            if config["sentence_level"]:
                language_loss = torch.mul(mask, language_loss).sum(1) / (lengths.float() + 0.001)
                true_lm_logits = true_outputs[0]
                true_shift_logits = true_lm_logits[..., :-1, :].contiguous()
                true_shift_labels = batch["src_text"][..., 1:].contiguous()
                true_language_loss = language_loss_fct(true_shift_logits.transpose(1, 2), true_shift_labels)
                true_lengths = batch["length"] - 1
                true_mask = get_mask(true_lengths, true_language_loss.size(1))
                true_language_loss = torch.mul(true_mask, true_language_loss).sum(1) / (true_lengths.float() + 0.001)
                avg_language_diff += (language_loss.mean() - true_language_loss.mean()).item()
            now_language_score = language_loss.mean().item()
            if config["sentence_level"]:
                language_loss = torch.mul(language_loss - true_language_loss, torch.mul(mask, torch.log(idx_probs[:, 1:])).sum(1) / (lengths.float() + 0.001)).mean()
            else:
                language_loss = (torch.mul(torch.mul(language_loss, torch.log(idx_probs[:, 1:])), mask).sum(1) / (lengths.float() + 0.001)).mean()
            avg_language_loss += language_loss.item()
            avg_language_score += now_language_score

            # compute loss
            if gcnt < PRETRAIN_BATCH:
                loss = W_M * mleloss_
            else:
                loss = W_M * mleloss_ + W_A * advloss + W_S * simloss + W_L * language_loss + W_D * class_loss
            gloss_all += loss.item() / accumulation_step
            gloss_mle += mleloss.item()
            gloss_adv += advloss.item()
            gloss_sim += simloss.item()
            gloss_class += class_loss.item()
            now_advloss = advloss.item()
            now_simloss = simloss.item()
            now_loss = loss.item()
            now_mleloss = mleloss.item()
            loss = loss / accumulation_step # normalizing
            loss.backward()
            if step_cnt % accumulation_step == 0:
                gcnt += 1
                step_cnt = 0
                nn.utils.clip_grad_norm_(generator.parameters(), GRAD_CLIP)
                goptimizer.step()
                goptimizer.zero_grad()
                if W_L < config["max_language_weight"]:
                    # adjusting weights
                    W_L += 1
                del advloss, mleloss, mleloss_, loss, simloss
                torch.cuda.empty_cache()
        # discriminator training
        discriminator_b.train()
        discriminator_a.train()
        generator.eval()
        doptimizer_a.zero_grad()
        doptimizer_b.zero_grad()
        for j in range(DBATCH):
            if gcnt < PRETRAIN_BATCH:
                break
            batch = dataloader.get()
            transfer_text = torch.cat((batch["src_text"], batch["transfer_tokens"].unsqueeze(1)), dim=1)
            cur_len = transfer_text.size(1)
            with torch.no_grad():
            	_, probs = generate(generator, transfer_text, cur_len=cur_len, max_length=int(cur_len * 2 - 1), pad_token_id=tokenizer.pad_token_id,
            	 eos_token_ids=tokenizer.eos_token_id, batch_size=args.batch_size)
            	probs = F.softmax(probs, dim=2)
            	probs.detach_()
            # discriminator for naturalness
            if args.reinforce:
                probs, words = torch.max(probs, dim=2)
                style_pred = discriminator_b(words)
            else:
                style_pred = discriminator_b.approximate(probs)
            style_pred = torch.squeeze(style_pred, 1)
            real_style_pred_true = discriminator_b(batch["src_text"])
            real_style_pred_ture = torch.squeeze(real_style_pred_true, 1)
            fake_loss_b = - torch.log(1 - style_pred).mean()
            real_loss_b = - torch.log(real_style_pred_true).mean()
            advloss_b = real_loss_b + fake_loss_b
            avg_fake_loss += fake_loss_b.item()
            avg_real_loss += real_loss_b.item()
            now_fake_loss = fake_loss_b.item()
            now_real_loss = real_loss_b.item()
            now_dis_loss = advloss_b.item()
            dloss_b += advloss_b.item()
            doptimizer_b.zero_grad()
            advloss_b.backward()
            nn.utils.clip_grad_norm_(discriminator_b.parameters(), GRAD_CLIP)
            doptimizer_b.step()
            # discriminator for style
            if args.update_style:
                if args.reinforce:
                    style_pred = discriminator_a(words, 1 - batch["style"])
                else:
                    style_pred = discriminator_a.approximate(probs, 1 - batch["style"])
                style_pred = torch.squeeze(style_pred, 1)
                real_style_pred_true = discriminator_a(batch["src_text"], batch["style"])
                real_style_pred_ture = torch.squeeze(real_style_pred_true, 1)
                fake_loss_a = - torch.log(1 - style_pred).mean()
                real_loss_a = - torch.log(real_style_pred_true).mean()
                advloss_a = real_loss_a + fake_loss_a
                avg_fake_loss_cls += fake_loss_a.item()
                avg_real_loss_cls += real_loss_a.item()
                dloss_a += advloss_a.item()
                doptimizer_a.zero_grad()
                advloss_a.backward()
                nn.utils.clip_grad_norm_(discriminator_a.parameters(), GRAD_CLIP)
                doptimizer_a.step()
            else:
                real_loss_a = 0
                fake_loss_a = 0
                advloss_a = 0
            dcnt += 1
            del real_loss_b, fake_loss_b, advloss_b, real_loss_a, fake_loss_a, advloss_a
            torch.cuda.empty_cache()

        if gcnt % report_step == 0:
            myprint("task id: %d"%ID)
            myprint("generator training batch: %d"%gcnt)
            myprint("average loss: %.6f"%(gloss_all / report_step))
            myprint("average adv loss: %.6f"%(gloss_adv / (report_step * accumulation_step)))
            myprint("average mle loss: %.6f"%(gloss_mle / (report_step * accumulation_step)))
            myprint("average cycle loss: %.6f"%(gloss_cycle / (report_step * accumulation_step)))
            myprint("average sim loss: %.6f"%(gloss_sim / (report_step * accumulation_step)))
            myprint("average sim score: %.6f"%(avg_sim_score / (report_step * accumulation_step)))
            myprint("avg class loss: %.6f"%(gloss_class / (report_step * accumulation_step)))
            myprint("avg class score: %.6f"%(avg_cls_score / (report_step * accumulation_step)))
            myprint("avg language score: %.6f"%(avg_language_score  / (report_step * accumulation_step)))
            myprint("avg language loss: %.6f"%(avg_language_loss  / (report_step * accumulation_step)))
            if config["sentence_level"]:
                myprint("avg language diff: %.6f"%(avg_language_diff  / (report_step * accumulation_step)))
            myprint("avg adv score: %.6f"%(avg_adv_score / (report_step * accumulation_step)))
            avg_language_loss, avg_language_score, avg_adv_score, avg_language_diff = 0, 0, 0, 0
            myprint()
            gloss_all, gloss_mle, gloss_adv, gloss_cycle, gloss_sim, avg_sim_score, gloss_class, avg_cls_score = 0, 0, 0, 0, 0, 0, 0, 0

        if dcnt % report_step == 0 and dcnt != 0:
            myprint("discriminator training batch: %d"%dcnt)
            myprint("b average loss: %.6f"%(dloss_b / (report_step)))
            myprint("avg real loss: %.6f"%(avg_real_loss/(report_step)))
            myprint("avg fake loss: %.6f"%(avg_fake_loss/(report_step)))
            myprint("a average loss: %.6f"%(dloss_a / (report_step)))
            myprint("avg real cls loss: %.6f"%(avg_real_loss_cls/(report_step)))
            myprint("avg fake cls loss: %.6f"%(avg_fake_loss_cls/(report_step)))
            myprint()
            dloss_a, dloss_b, avg_real_loss, avg_fake_loss, avg_real_loss_cls, avg_fake_loss_cls = 0, 0, 0, 0, 0, 0

        gscheduler.step()
        dscheduler.step()
        string = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
        gname = "./cache/%d/gen-%s.dict" % (ID, string)
        a_dname = "./cache/%d/a_dis-%s.dict" % (ID, string)
        b_dname = "./cache/%d/b_dis-%s.dict" % (ID, string)
        goname = "./cache/%d/genopt-%s.dict" % (ID, string)
        a_doname = "./cache/%d/a_disopt-%s.dict" % (ID, string)
        b_doname = "./cache/%d/b_disopt-%s.dict" % (ID, string)
        if gcnt % 1000 == 0 and args.log:
            generator.eval()
            result = test(generator, "dev")
            acc_transfer = result["acc"]
            self_bleu = result["self_bleu"]
            dev_acc = acc_transfer
            dev_bleu = self_bleu
            dev_ppl = result["ppl"]
            myprint(f"gcnt: {gcnt}")
            myprint("dev set:")
            myprint("acc transfer: %.6f"%acc_transfer)
            myprint("self_bleu: %.6f"%self_bleu)
            myprint("ppl: %.6f"%dev_ppl)
            result = test(generator, "test")
            acc_transfer = result["acc"]
            self_bleu = result["self_bleu"]
            ppl = result["ppl"]
            myprint("test set:")
            myprint("acc transfer: %.6f"%acc_transfer)
            myprint("self_bleu: %.6f"%self_bleu)
            myprint("ppl: %.6f"%ppl)
            if DATASET != "imdb":
                bleu = result["bleu"]
                myprint("bleu: %.6f"%bleu)
            generator.train()
            generator.cpu()
            discriminator_a.cpu()
            f_score = 2 * dev_acc * dev_bleu / (dev_acc + dev_bleu)
            if dev_ppl < best_record and dev_acc > config["acc_threshold"] and gcnt > PRETRAIN_BATCH:
                best_record = dev_ppl
                myprint("best")
                myprint("acc transfer: %.6f"%acc_transfer)
                myprint("self_bleu: %.6f"%self_bleu)
                myprint("ppl: %.6f"%ppl)
                if DATASET != "imdb":
                    myprint("bleu: %.6f"%bleu)
                myprint()
                torch.save(generator.state_dict(), best_gname)
                torch.save(discriminator_a.state_dict(), best_a_dname)
                torch.save(goptimizer.state_dict(), best_goname)
                torch.save(doptimizer_a.state_dict(), best_a_doname)
            if gcnt > PRETRAIN_BATCH:
                gname = "./cache/%d/gen-%d.dict" % (ID, gcnt)
                a_dname = "./cache/%d/a_dis-%d.dict" % (ID, gcnt)
                torch.save(generator.state_dict(), gname)
                torch.save(discriminator_a.state_dict(), a_dname)
            if args.cuda:
                generator.cuda()
                discriminator_a.cuda()
Example #40
0
import torch as t
import numpy as np
import pickle
import matplotlib.pyplot as plt
from torch import nn
from tqdm import tqdm

t.manual_seed(13)
device = t.device("cuda" if t.cuda.is_available() else "cpu")
t.cuda.set_device(2)
# device = t.device("cpu")

def l2_regularize(array):
    loss = t.sum(array ** 2.0)
    return loss

class MFModel(nn.Module):
    def __init__(self, n_users, n_items, n_factors = 10, dropout = 0, sparse = False):
        super(MFModel, self).__init__()
        self.n_users = n_users 
        self.n_items = n_items
        self.user_biases = nn.Embedding(n_users, 1, sparse=sparse).to(device)
        self.item_biases = nn.Embedding(n_items, 1, sparse=sparse).to(device)
        self.user_embedding = nn.Embedding(n_users, n_factors, sparse = sparse).to(device)
        self.item_embedding = nn.Embedding(n_items, n_factors, sparse = sparse).to(device)
#         self.fc = nn.Linear(2 * n_factors, 1).to(device)
        t.nn.init.xavier_uniform_(self.user_embedding.weight)
        t.nn.init.xavier_uniform_(self.item_embedding.weight)
        self.user_biases.weight.data.fill_(0.)
        self.item_biases.weight.data.fill_(0.)
        
        threshold_init=-15,
        candidate_p=[50000, 30000, 20000],
    )

    opt = parser.parse_args(args=[])
    opt = vars(opt)

    # rename alias
    # rename alias

    opt['alias'] = '{}_{}_BaseDim{}_bsz{}_lr_{}_optim_{}_thresholdType{}_thres_init{}_{}-{}_l2_penalty{}'.format(
        opt['model'].upper(),
        opt['alias'],
        opt['latent_dim'],
        opt['batch_size_train'],
        opt['fm_lr'],
        opt['fm_optimizer'],
        opt['threshold_type'].upper(),
        opt['threshold_init'],
        opt['g_type'],
        opt['gk'],
        opt['l2_penalty']
    )
    print(opt['alias'])
    random.seed(opt['seed'])
    # np.random.seed(opt['seed'])
    torch.manual_seed(opt['seed'])
    torch.cuda.manual_seed_all(opt['seed'])
    engine = Engine(opt)
    engine.train()
def main(args):
    if not os.path.isdir('CMDs'):
        os.mkdir('CMDs')
    with open('CMDs/train.cmd', 'a') as f:
        f.write(' '.join(sys.argv) + '\n')
        f.write('--------------------------------\n')

    # Set the seed value all over the place to make this reproducible.
    seed_val = args.seed
    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)
    # Choose device
    device = get_default_device()

    with open(args.train_data_path) as f:
        train_data = json.load(f)

    if args.train_data_half != 0:
        # Seed has already been set earlier
        random.shuffle(train_data)
        mid = len(train_data) // 2

    if args.train_data_half == 1:
        train_data = train_data[:mid]
    elif args.train_data_half == 2:
        train_data = train_data[mid:]

    electra_base = "google/electra-base-discriminator"
    electra_large = "google/electra-large-discriminator"
    tokenizer = ElectraTokenizer.from_pretrained(electra_large, do_lower_case=True)

    labels = []
    input_ids = []
    token_type_ids = []
    count = 0

    for item in train_data:
        context = item["context"]
        question = item["question"]
        lab = item["label"]
        if lab == 3:
            # Remove unanswerable examples at training time
            continue
        labels.append(lab)
        three_inp_ids = []
        three_tok_type_ids = []
        three_answer_options = item["answers"][:3]
        for i, ans in enumerate(three_answer_options):
            combo = context + " [SEP] " + question + " " + ans
            inp_ids = tokenizer.encode(combo)
            if len(inp_ids)>512:
                inp_ids = [inp_ids[0]] + inp_ids[-511:]
            tok_type_ids = [0 if i<= inp_ids.index(102) else 1 for i in range(len(inp_ids))]
            three_inp_ids.append(inp_ids)
            three_tok_type_ids.append(tok_type_ids)
        three_inp_ids = pad_sequences(three_inp_ids, maxlen=MAXLEN, dtype="long", value=0, truncating="post", padding="post")
        three_tok_type_ids = pad_sequences(three_tok_type_ids, maxlen=MAXLEN, dtype="long", value=0, truncating="post", padding="post")
        input_ids.append(three_inp_ids)
        token_type_ids.append(three_tok_type_ids)
    # Create attention masks
    attention_masks = []
    for sen in input_ids:
        sen_attention_masks = []
        for opt in sen:
            att_mask = [int(token_id > 0) for token_id in opt]
            sen_attention_masks.append(att_mask)
        attention_masks.append(sen_attention_masks)
    # Convert to torch tensors
    labels = torch.tensor(labels)
    labels = labels.long().to(device)
    input_ids = torch.tensor(input_ids)
    input_ids = input_ids.long().to(device)
    token_type_ids = torch.tensor(token_type_ids)
    token_type_ids = token_type_ids.long().to(device)
    attention_masks = torch.tensor(attention_masks)
    attention_masks = attention_masks.long().to(device)

    # Create the DataLoader for training set.
    train_data = TensorDataset(input_ids, token_type_ids, attention_masks, labels)
    train_sampler = RandomSampler(train_data)
    train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.batch_size)

    if args.train_data_half != 2:
        model = ElectraForMultipleChoice.from_pretrained(electra_large).to(device)
    else:
        model = torch.load(args.model_path, map_location=device).to(device)

    optimizer = AdamW(model.parameters(),
                    lr = args.learning_rate,
                    eps = args.adam_epsilon
                    # weight_decay = 0.01
                    )

    loss_values = []

    # Total number of training steps is number of batches * number of epochs.
    total_steps = len(train_dataloader) * args.n_epochs
    # Create the learning rate scheduler.
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps = 0.1*total_steps,
                                                num_training_steps = total_steps)


    for epoch in range(args.n_epochs):
        # Perform one full pass over the training set.
        print("")
        print('======== Epoch {:} / {:} ========'.format(epoch + 1, args.n_epochs))
        print('Training...')
        # Measure how long the training epoch takes.
        t0 = time.time()
        # Reset the total loss for this epoch.
        total_loss = 0
        model.train()
        model.zero_grad()
        # For each batch of training data...
        for step, batch in enumerate(train_dataloader):
            # Progress update every 40 batches.
            if step % 40 == 0 and not step == 0:
                # Calculate elapsed time in minutes.
                elapsed = format_time(time.time() - t0)
                # Report progress.
                print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))
            b_input_ids = batch[0].to(device)
            b_tok_typ_ids = batch[1].to(device)
            b_att_msks = batch[2].to(device)
            b_labs = batch[3].to(device)
            model.zero_grad()
            outputs = model(input_ids=b_input_ids, attention_mask=b_att_msks, token_type_ids=b_tok_typ_ids, labels=b_labs)
            loss = outputs[0]
            total_loss += loss.item()
            print(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # Update the learning rate.
            scheduler.step()
                # model.zero_grad()
        # Calculate the average loss over the training data.
        avg_train_loss = total_loss / len(train_dataloader)

        # Store the loss value for plotting the learning curve.
        loss_values.append(avg_train_loss)

        print("")
        print("  Average training loss: {0:.2f}".format(avg_train_loss))
        print("  Training epoch took: {:}".format(format_time(time.time() - t0)))

    # Save the model to a file
    file_path = args.save_path+'electra_QA_MC_seed'+str(args.seed)+'.pt'
    torch.save(model, file_path)
def main(num):
    # Generate configuration files depending on experiment being run
    utils.generate_config_files("conceptnet", num)

    # Loads the correct configuration file
    config_file = "config/conceptnet/config_{}.json".format(num)

    print(config_file)

    # Read config file to option
    config = cfg.read_config(cfg.load_config(config_file))
    opt, meta = cfg.get_parameters(config)

    # config.gpu_mode = torch.cuda.is_available()

    # Set the random seeds
    torch.manual_seed(opt.train.static.seed)
    random.seed(opt.train.static.seed)
    if config.gpu_mode:
        torch.cuda.manual_seed_all(opt.train.static.seed)

    # Load the data
    splits = ["train", "dev", "test"]

    opt.train.dynamic.epoch = 0

    print("Loading Data")

    # Initialize path to pre-set data loader
    path = "data/conceptnet/processed/{}/{}.pickle".format(
        opt.exp, utils.make_name_string(opt.data))

    # Make data loader
    data_loader = data.make_data_loader(opt)
    loaded = data_loader.load_data(path)
    print(data_loader.sequences["train"]["total"].size(0))
    data_loader.opt = opt
    data_loader.batch_size = opt.train.dynamic.bs

    print("Done.")

    text_encoder = TextEncoder(config.encoder_path, config.bpe_path)

    categories = data.conceptnet_data.conceptnet_relations

    special = [data.start_token, data.end_token]
    special += ["<{}>".format(cat) for cat in categories]

    if loaded:
        text_encoder.encoder = data_loader.vocab_encoder
        text_encoder.decoder = data_loader.vocab_decoder
    else:
        for special_token in special:
            text_encoder.decoder[len(encoder)] = special_token
            text_encoder.encoder[special_token] = len(encoder)
        data_loader.make_tensors(text_encoder, special)

    # Set max size of different parts of relation
    context_size_e1 = data_loader.max_e1
    context_size_e2 = data_loader.max_e2
    context_size_r = data_loader.max_r

    opt.data.maxr = context_size_r

    n_special = len(special)
    n_ctx = context_size_e1 + context_size_r + context_size_e2
    n_vocab = len(text_encoder.encoder) + n_ctx

    print(data_loader.__dict__.keys())
    opt.net.vSize = n_vocab

    # Build Model
    print("Building Model")

    model = models.make_model(
        opt, n_vocab, n_ctx, n_special,
        load=(opt.net.init=="pt"))

    print("Done.")

    print("Files will be logged at: {}".format(
        utils.make_name(opt, prefix="results/losses/",
                        is_dir=True, eval_=True)))

    data_loader.reset_offsets("train", keys=["total"])

    data.set_max_sizes(data_loader)

    # Push to GPU
    if config.gpu_mode:
        print("Pushing to GPU: {}".format(config.gpu_index))
        cfg.device = config.gpu_index
        cfg.do_gpu = True
        torch.cuda.set_device(cfg.device)
        model.cuda(cfg.device)
        print("Done.")

    print("Training")

    optimizer = OpenAIAdam(model.parameters(),
                           lr=opt.train.dynamic.lr,
                           schedule=opt.train.static.lrsched,
                           warmup=opt.train.static.lrwarm,
                           t_total=meta.iterations,
                           b1=opt.train.static.b1,
                           b2=opt.train.static.b2,
                           e=opt.train.static.e,
                           l2=opt.train.static.l2,
                           vector_l2=opt.train.static.vl2,
                           max_grad_norm=opt.train.static.clip)

    trainer = train.make_trainer(
        opt, meta, data_loader, model, optimizer)
    print(data_loader.sequences["dev"]["total"].max())
    trainer.set_generator(opt, model, data_loader)
    trainer.set_evaluator(opt, model, data_loader)

    trainer.run()
        #     fig = plt.figure(figsize=(32, 16))
        #     plt.suptitle(batched['sentence'][i], fontsize=30)
        
        #     for j in range(min(len(sequence), 14)):
        #         plt.subplot(3, 5, j+1)
        #         partially_completed_img = clamp_array(sequence[j][:,:,-3:], 0, 255).astype(np.uint8)
        #         partially_completed_img = partially_completed_img[:,:,::-1]
        #         plt.imshow(partially_completed_img)
        #         plt.axis('off')
        
        #     plt.subplot(3, 5, 15)
        #     plt.imshow(color[:,:,::-1])
        #     plt.axis('off')
        
        #     fig.savefig(out_path, bbox_inches='tight')
        #     plt.close(fig)

        break


if __name__ == '__main__':
    config, unparsed = get_config()
    np.random.seed(config.seed)
    random.seed(config.seed)
    torch.manual_seed(config.seed)
    if(config.cuda):
        torch.cuda.manual_seed_all(config.seed)
    prepare_directories(config)

    test_puzzle_model(config)
Example #45
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--dataset', type=str, default="mnist", choices=["mnist", "cifar10"],
                        metavar='D', help='training dataset (mnist or cifar10)')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--percent', type=list, default=[0.8, 0.92, 0.991, 0.93],
                        metavar='P', help='pruning percentage (default: 0.8)')
    parser.add_argument('--alpha', type=float, default=5e-4, metavar='L',
                        help='l2 norm weight (default: 5e-4)')
    parser.add_argument('--rho', type=float, default=1e-2, metavar='R',
                        help='cardinality weight (default: 1e-2)')
    parser.add_argument('--l1', default=False, action='store_true',
                        help='prune weights with l1 regularization instead of cardinality')
    parser.add_argument('--l2', default=False, action='store_true',
                        help='apply l2 regularization')
    parser.add_argument('--num_pre_epochs', type=int, default=3, metavar='P',
                        help='number of epochs to pretrain (default: 3)')
    parser.add_argument('--num_epochs', type=int, default=10, metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--num_re_epochs', type=int, default=3, metavar='R',
                        help='number of epochs to retrain (default: 3)')
    parser.add_argument('--lr', type=float, default=1e-3, metavar='LR',
                        help='learning rate (default: 1e-2)')
    parser.add_argument('--adam_epsilon', type=float, default=1e-8, metavar='E',
                        help='adam epsilon (default: 1e-8)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--save-model', action='store_true', default=False,
                        help='For Saving the current Model')
    args = parser.parse_args()

    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}

    if args.dataset == "mnist":
        train_loader = torch.utils.data.DataLoader(
            datasets.MNIST('data', train=True, download=True,
                           transform=transforms.Compose([
                               transforms.ToTensor(),
                               transforms.Normalize((0.1307,), (0.3081,))
                           ])),
            batch_size=args.batch_size, shuffle=True, **kwargs)

        test_loader = torch.utils.data.DataLoader(
            datasets.MNIST('data', train=False, transform=transforms.Compose([
                               transforms.ToTensor(),
                               transforms.Normalize((0.1307,), (0.3081,))
                           ])),
            batch_size=args.test_batch_size, shuffle=True, **kwargs)

    else:
        args.percent = [0.8, 0.92, 0.93, 0.94, 0.95, 0.99, 0.99, 0.93]
        args.num_pre_epochs = 5
        args.num_epochs = 20
        args.num_re_epochs = 5
        train_loader = torch.utils.data.DataLoader(
            datasets.CIFAR10('data', train=True, download=True,
                             transform=transforms.Compose([
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.49139968, 0.48215827, 0.44653124),
                                                      (0.24703233, 0.24348505, 0.26158768))
                             ])), shuffle=True, batch_size=args.batch_size, **kwargs)

        test_loader = torch.utils.data.DataLoader(
            datasets.CIFAR10('data', train=False, download=True,
                             transform=transforms.Compose([
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.49139968, 0.48215827, 0.44653124),
                                                      (0.24703233, 0.24348505, 0.26158768))
                             ])), shuffle=True, batch_size=args.test_batch_size, **kwargs)

    model = LeNet().to(device) if args.dataset == "mnist" else AlexNet().to(device)
    optimizer = PruneAdam(model.named_parameters(), lr=args.lr, eps=args.adam_epsilon)

    train(args, model, device, train_loader, test_loader, optimizer)
    mask = apply_l1_prune(model, device, args) if args.l1 else apply_prune(model, device, args)
    print_prune(model)
    test(args, model, device, test_loader)
    retrain(args, model, mask, device, train_loader, test_loader, optimizer)
def train(model, train_set, test_set, save, n_epochs=300, valid_size=5000,
          batch_size=64, lr=0.1, wd=0.0001, momentum=0.9, seed=None, info=''):
    
    save += info
    
    if not os.path.exists(save):
        os.makedirs(save)
    if not os.path.isdir(save):
        raise Exception('%s is not a dir' % save)
    
    
    best_error = 1
    
    if seed is not None:
        torch.manual_seed(seed)


    # Data loaders
    test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False,
                                              pin_memory=(torch.cuda.is_available()), num_workers=0)
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True,
                                                   pin_memory=(torch.cuda.is_available()), num_workers=0)

    # Model on cuda
    if torch.cuda.is_available():
        model = model.cuda()

    # Wrap model for multi-GPUs, if necessary
    model_wrapper = model
    
    
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:
        model_wrapper = torch.nn.DataParallel(model).cuda()

    # Optimizer
    optimizer = torch.optim.SGD(model_wrapper.parameters(), lr=lr, momentum=momentum, nesterov=True, weight_decay=wd)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[0.5 * n_epochs, 0.75 * n_epochs],
                                                     gamma=0.1)

    # Start log
    with open(os.path.join(save, 'results.csv'), 'w') as f:
        f.write('epoch,train_loss,train_error,valid_loss,valid_error,test_error\n')

    # Train model
    
    for epoch in range(n_epochs):
        scheduler.step()
        _, train_loss, train_error = train_epoch(
            model=model_wrapper,
            loader=train_loader,
            optimizer=optimizer,
            epoch=epoch,
            n_epochs=n_epochs,
            info=info
        )
        _, valid_loss, valid_error = test_epoch(
            model=model,
            loader=test_loader,
            is_test=True
        )

        # Determine if model is the best
        if valid_error < best_error:
            best_error = valid_error
            print('New best error: %.4f' % best_error)
            torch.save(model.state_dict(), os.path.join(save, 'model.dat'))


        # Log results
        with open(os.path.join(save, 'results.csv'), 'a') as f:
            f.write('%03d,%0.6f,%0.6f,%0.5f,%0.5f,\n' % (
                (epoch + 1),
                train_loss,
                train_error,
                valid_loss,
                valid_error,
            ))

    torch.save(model.state_dict(), os.path.join(save, 'current.dat'))
    # Final test of model on test set
    model.load_state_dict(torch.load(os.path.join(save, 'model.dat')))


    test_results = test_epoch(
        model=model,
        loader=test_loader,
        is_test=True
    )
    _, _, test_error = test_results
    with open(os.path.join(save, 'results.csv'), 'a') as f:
        f.write(',,,,,%0.5f\n' % (test_error))
    print('Final test error: %.4f' % test_error)
    model.load_state_dict(torch.load(os.path.join(save, 'current.dat')))
Example #47
0
def parse_option():
    parser = argparse.ArgumentParser('S3DIS scene-segmentation training')
    parser.add_argument('--cfg', type=str, required=True, help='config file')
    parser.add_argument('--data_root',
                        type=str,
                        default='data',
                        help='root director of dataset')
    parser.add_argument('--num_workers',
                        type=int,
                        default=4,
                        help='num of workers to use')
    parser.add_argument('--batch_size', type=int, help='batch_size')
    parser.add_argument('--num_points', type=int, help='num_points')
    parser.add_argument('--num_steps', type=int, help='num_steps')
    parser.add_argument('--base_learning_rate',
                        type=float,
                        help='base learning rate')
    parser.add_argument('--epochs', type=int, help='number of training epochs')
    parser.add_argument('--start_epoch', type=int, help='used for resume')

    # io
    parser.add_argument('--load_path',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--print_freq',
                        type=int,
                        default=10,
                        help='print frequency')
    parser.add_argument('--save_freq',
                        type=int,
                        default=10,
                        help='save frequency')
    parser.add_argument('--val_freq',
                        type=int,
                        default=10,
                        help='val frequency')
    parser.add_argument('--log_dir',
                        type=str,
                        default='log',
                        help='log dir [default: log]')

    # misc
    parser.add_argument("--local_rank",
                        type=int,
                        help='local rank for DistributedDataParallel')
    parser.add_argument("--rng_seed", type=int, default=0, help='manual seed')

    args, unparsed = parser.parse_known_args()

    update_config(args.cfg)

    config.data_root = args.data_root
    config.num_workers = args.num_workers
    config.load_path = args.load_path
    config.print_freq = args.print_freq
    config.save_freq = args.save_freq
    config.val_freq = args.val_freq
    config.rng_seed = args.rng_seed

    config.local_rank = args.local_rank

    ddir_name = args.cfg.split('.')[-2].split('/')[-1]
    config.log_dir = os.path.join(args.log_dir, 's3dis', ddir_name)

    if args.batch_size:
        config.batch_size = args.batch_size
    if args.num_points:
        config.num_points = args.num_points
    if args.num_steps:
        config.num_steps = args.num_steps
    if args.base_learning_rate:
        config.base_learning_rate = args.base_learning_rate
    if args.epochs:
        config.epochs = args.epochs
    if args.start_epoch:
        config.start_epoch = args.start_epoch

    print(args)
    print(config)

    torch.manual_seed(args.rng_seed)
    torch.cuda.manual_seed_all(args.rng_seed)
    random.seed(args.rng_seed)
    np.random.seed(args.rng_seed)

    return args, config
Example #48
0
File: main.py Project: hsack6/AGATE
)
parser.add_argument(
    '--cls_feats_max',
    type=int,
    default=
    repeat1_attribute_prediction_exist_Tm_binary_utilize_new_attribute_link_egcnh_parameters_cls_feats_max
)

opt = parser.parse_args()
print(opt)

if opt.manualSeed is None:
    opt.manualSeed = random.randint(1, 10000)
print("Random Seed: ", opt.manualSeed)
random.seed(opt.manualSeed)
torch.manual_seed(opt.manualSeed)

opt.dataroot = InputDir

if opt.cuda:
    torch.cuda.manual_seed_all(opt.manualSeed)

opt.L = opt.init_L

gcn_parameters = [
    'feats_per_node', 'feats_per_node_min', 'feats_per_node_max',
    'layer_1_feats', 'layer_1_feats_min', 'layer_1_feats_max', 'layer_2_feats',
    'layer_2_feats_same_as_l1', 'k_top_grcu', 'num_layers', 'lstm_l1_layers',
    'lstm_l1_feats', 'lstm_l1_feats_min', 'lstm_l1_feats_max',
    'lstm_l2_layers', 'lstm_l2_feats', 'lstm_l2_feats_same_as_l1', 'cls_feats',
    'cls_feats_min', 'cls_feats_max', 'output_dim'
Example #49
0
def main(args):
    SEED = 1234

    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)

    train_data = p.load(open(args.train_data, 'rb'))
    dev_data = p.load(open(args.tune_data, 'rb'))
    test_data = p.load(open(args.test_data, 'rb'))
    word_embeddings = p.load(open(args.word_embeddings, 'rb'))
    vocab = p.load(open(args.vocab, 'rb'))

    BATCH_SIZE = 64
    INPUT_DIM = len(vocab)
    EMBEDDING_DIM = len(word_embeddings[0])
    HIDDEN_DIM = 100
    OUTPUT_DIM = 1
    N_LAYERS = 1
    BIDIRECTIONAL = True
    DROPOUT = 0.5

    context_model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, HIDDEN_DIM,
                        N_LAYERS, BIDIRECTIONAL, DROPOUT)
    question_model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, HIDDEN_DIM,
                         N_LAYERS, BIDIRECTIONAL, DROPOUT)
    answer_model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, HIDDEN_DIM,
                       N_LAYERS, BIDIRECTIONAL, DROPOUT)

    utility_model = FeedForward(HIDDEN_DIM * 3, HIDDEN_DIM, OUTPUT_DIM)

    criterion = nn.BCEWithLogitsLoss()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    utility_model = utility_model.to(device)
    context_model = context_model.to(device)
    question_model = question_model.to(device)
    answer_model = answer_model.to(device)
    criterion = criterion.to(device)

    word_embeddings = autograd.Variable(
        torch.FloatTensor(word_embeddings).cuda())
    context_model.embedding.weight.data.copy_(word_embeddings)
    question_model.embedding.weight.data.copy_(word_embeddings)
    answer_model.embedding.weight.data.copy_(word_embeddings)

    # Fix word embeddings
    context_model.embedding.weight.requires_grad = False
    question_model.embedding.weight.requires_grad = False
    answer_model.embedding.weight.requires_grad = False

    optimizer = optim.Adam(list([par for par in context_model.parameters() if par.requires_grad]) + \
          list([par for par in question_model.parameters() if par.requires_grad]) + \
          list([par for par in answer_model.parameters() if par.requires_grad]) + \
          list([par for par in utility_model.parameters() if par.requires_grad]))

    N_EPOCHS = 300
    train_data = prepare_data(train_data, vocab, 'train', args.cuda)
    dev_data = prepare_data(dev_data, vocab, 'dev', args.cuda)
    test_data = prepare_data(test_data, vocab, 'test', args.cuda)

    for epoch in range(N_EPOCHS):
        train_loss, train_acc = train_fn(context_model, question_model, answer_model, utility_model, \
                 train_data, optimizer, criterion, BATCH_SIZE)
        valid_loss, valid_acc = evaluate(context_model, question_model, answer_model, utility_model, \
                                                  dev_data, criterion, BATCH_SIZE)
        #valid_loss, valid_acc = evaluate(context_model, question_model, answer_model, utility_model, \
        #                                    test_data, criterion, BATCH_SIZE)
        print 'Epoch %d: Train Loss: %.3f, Train Acc: %.3f, Val Loss: %.3f, Val Acc: %.3f' % (
            epoch, train_loss, train_acc, valid_loss, valid_acc)
def main():
    logger.info("Running %s" % ' '.join(sys.argv))

    parser = argparse.ArgumentParser()
    ## Required parameters
    parser.add_argument("--do_train",
                        action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        action='store_true',
                        help="Whether to run eval on the dev set.")
    parser.add_argument(
        "--data_dir",
        default="data/",
        type=str,
        help=
        "The input data dir. Should contain the .tsv files (or other data files) for the task."
    )
    parser.add_argument(
        "--output_dir",
        default="checkpoints/predictor/",
        type=str,
        help=
        "The output directory where the model predictions and checkpoints will be written."
    )
    parser.add_argument(
        "--load_dir",
        type=str,
        help=
        "The output directory where the model checkpoints will be loaded during evaluation"
    )
    parser.add_argument('--load_step',
                        type=int,
                        default=0,
                        help="The checkpoint step to be loaded")
    parser.add_argument("--fact",
                        default="first",
                        choices=["first", "second"],
                        type=str,
                        help="Whether to put fact in front.")
    parser.add_argument("--test_set",
                        default="dev",
                        choices=[
                            "train", "dev", "test", "simple_test",
                            "complex_test", "small_test"
                        ],
                        help="Which test set is used for evaluation",
                        type=str)
    parser.add_argument("--train_batch_size",
                        default=18,
                        type=int,
                        help="Total batch size for training.")
    parser.add_argument("--eval_batch_size",
                        default=18,
                        type=int,
                        help="Total batch size for eval.")
    ## Other parameters
    parser.add_argument(
        "--bert_model",
        default="bert-base-uncased",
        type=str,
        help="Bert pre-trained model selected in the list: bert-base-uncased, "
        "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, "
        "bert-base-multilingual-cased, bert-base-chinese.")
    parser.add_argument("--task_name",
                        default="QQP",
                        type=str,
                        help="The name of the task to train.")
    parser.add_argument('--period', type=int, default=500)
    parser.add_argument(
        "--cache_dir",
        default="",
        type=str,
        help=
        "Where do you want to store the pre-trained models downloaded from s3")
    parser.add_argument(
        "--max_seq_length",
        default=256,
        type=int,
        help=
        "The maximum total input sequence length after WordPiece tokenization. \n"
        "Sequences longer than this will be truncated, and sequences shorter \n"
        "than this will be padded.")
    parser.add_argument(
        "--do_lower_case",
        action='store_true',
        help="Set this flag if you are using an uncased model.")
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--num_train_epochs",
                        default=20.0,
                        type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument(
        "--warmup_proportion",
        default=0.1,
        type=float,
        help=
        "Proportion of training to perform linear learning rate warmup for. "
        "E.g., 0.1 = 10%% of training.")
    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument(
        '--gradient_accumulation_steps',
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass."
    )
    parser.add_argument(
        '--fp16',
        action='store_true',
        help="Whether to use 16-bit float precision instead of 32-bit")
    parser.add_argument(
        '--loss_scale',
        type=float,
        default=0,
        help=
        "Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
        "0 (default value): dynamic loss scaling.\n"
        "Positive power of 2: static loss scaling value.\n")
    parser.add_argument('--server_ip',
                        type=str,
                        default='',
                        help="Can be used for distant debugging.")
    parser.add_argument('--server_port',
                        type=str,
                        default='',
                        help="Can be used for distant debugging.")
    args = parser.parse_args()
    pprint(vars(args))
    sys.stdout.flush()

    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd
        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    processors = {
        "qqp": QqpProcessor,
    }

    output_modes = {
        "qqp": "classification",
    }

    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        n_gpu = torch.cuda.device_count()
    else:
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        n_gpu = 1
        # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.distributed.init_process_group(backend='nccl')

    logging.basicConfig(
        format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
        datefmt='%m/%d/%Y %H:%M:%S',
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN)

    logger.info(
        "device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".
        format(device, n_gpu, bool(args.local_rank != -1), args.fp16))

    if args.gradient_accumulation_steps < 1:
        raise ValueError(
            "Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
            .format(args.gradient_accumulation_steps))

    args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    if n_gpu > 0:
        torch.cuda.manual_seed_all(args.seed)

    if not args.do_train and not args.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    logger.info(
        "Datasets are loaded from {}\n Outputs will be saved to {}".format(
            args.data_dir, args.output_dir))
    # if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train:
    #     raise ValueError("Output directory ({}) already exists and is not empty.".format(args.output_dir))
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    task_name = args.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()
    output_mode = output_modes[task_name]

    label_list = processor.get_labels()
    num_labels = len(label_list)

    tokenizer = BertTokenizer.from_pretrained(args.bert_model,
                                              do_lower_case=args.do_lower_case)

    train_examples = None
    num_train_optimization_steps = None
    if args.do_train:
        train_examples = processor.get_train_examples(args.data_dir)
        # train_examples=processor.get_dev_examples(args.data_dir,'test')
        num_train_optimization_steps = int(
            len(train_examples) / args.train_batch_size /
            args.gradient_accumulation_steps) * args.num_train_epochs
        if args.local_rank != -1:
            num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size(
            )

    cache_dir = args.cache_dir if args.cache_dir else os.path.join(
        str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format(
            args.local_rank))
    if args.load_dir:
        load_dir = args.load_dir
    else:
        load_dir = args.bert_model

    model = BertForSequenceClassification.from_pretrained(
        load_dir, cache_dir=cache_dir, num_labels=Constants.act_len)

    if args.fp16:
        model.half()
    model.to(device)
    if args.local_rank != -1:
        try:
            from apex.parallel import DistributedDataParallel as DDP
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training."
            )

        model = DDP(model)
    elif n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # Prepare optimizer
    if args.do_train:
        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.01
        }, {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        }]
        if args.fp16:
            try:
                from apex.optimizers import FP16_Optimizer
                from apex.optimizers import FusedAdam
            except ImportError:
                raise ImportError(
                    "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training."
                )

            optimizer = FusedAdam(optimizer_grouped_parameters,
                                  lr=args.learning_rate,
                                  bias_correction=False,
                                  max_grad_norm=1.0)
            if args.loss_scale == 0:
                optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
            else:
                optimizer = FP16_Optimizer(optimizer,
                                           static_loss_scale=args.loss_scale)
            warmup_linear = WarmupLinearSchedule(
                warmup=args.warmup_proportion,
                t_total=num_train_optimization_steps)

        else:
            optimizer = BertAdam(optimizer_grouped_parameters,
                                 lr=args.learning_rate,
                                 warmup=args.warmup_proportion,
                                 t_total=num_train_optimization_steps)

    global_step = 0
    tr_loss = 0
    best_F1 = 0
    if args.do_train:
        train_features = convert_examples_to_features(train_examples,
                                                      label_list,
                                                      args.max_seq_length,
                                                      tokenizer, output_mode)
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(train_examples))
        logger.info("  Batch size = %d", args.train_batch_size)
        logger.info("  Num steps = %d", num_train_optimization_steps)
        all_input_ids = torch.tensor([f.input_ids for f in train_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in train_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in train_features],
                                       dtype=torch.long)

        all_label_ids = torch.tensor([f.label_id for f in train_features],
                                     dtype=torch.float)

        train_data = TensorDataset(all_input_ids, all_input_mask,
                                   all_segment_ids, all_label_ids)
        if args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)
        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=args.train_batch_size)

        model.train()
        for epoch in range(int(args.num_train_epochs)):
            logger.info("Training epoch {} ...".format(epoch))
            nb_tr_examples, nb_tr_steps = 0, 0
            for step, batch in enumerate(train_dataloader):
                batch = tuple(t.to(device) for t in batch)
                input_ids, input_mask, segment_ids, label_ids = batch
                # define a new function to compute loss values for both output_modes
                logits = model(input_ids, segment_ids, input_mask, labels=None)

                loss_fct = BCEWithLogitsLoss()

                loss = loss_fct(logits.view(-1, 1), label_ids.view(-1, 1))

                if n_gpu > 1:
                    loss = loss.mean()  # mean() to average on multi-gpu.
                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps

                if args.fp16:
                    optimizer.backward(loss)
                else:
                    loss.backward()

                tr_loss += loss.item()

                nb_tr_examples += input_ids.size(0)
                nb_tr_steps += 1
                if (step + 1) % args.gradient_accumulation_steps == 0:
                    if args.fp16:
                        # modify learning rate with special warm up BERT uses
                        # if args.fp16 is False, BertAdam is used that handles this automatically
                        lr_this_step = args.learning_rate * warmup_linear.get_lr(
                            global_step, args.warmup_proportion)
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr_this_step
                    optimizer.step()
                    optimizer.zero_grad()
                    model.zero_grad()
                    global_step += 1

                if (step + 1) % args.period == 0:
                    # Save a trained model, configuration and tokenizer
                    model_to_save = model.module if hasattr(
                        model, 'module') else model

                    # If we save using the predefined names, we can load using `from_pretrained`
                    model.eval()
                    torch.set_grad_enabled(False)  # turn off gradient tracking
                    precision, recall, F1 = evaluate(args, model, device,
                                                     processor, label_list,
                                                     num_labels, tokenizer,
                                                     output_mode)

                    if F1 > best_F1:
                        output_dir = os.path.join(
                            args.output_dir, 'pre_{}_recall_{}_F1_{}'.format(
                                precision, recall, F1))
                        if not os.path.exists(output_dir):
                            os.makedirs(output_dir)

                        output_model_file = os.path.join(
                            output_dir,
                            WEIGHTS_NAME,
                        )
                        output_config_file = os.path.join(
                            output_dir, CONFIG_NAME)

                        torch.save(model_to_save.state_dict(),
                                   output_model_file)
                        model_to_save.config.to_json_file(output_config_file)
                        tokenizer.save_vocabulary(output_dir)

                        best_F1 = F1

                    model.train()  # turn on train mode
                    torch.set_grad_enabled(True)  # start gradient tracking
                    tr_loss = 0

    # do eval before exit
    if args.do_eval:
        if not args.do_train:
            global_step = 0
            output_dir = None
        save_dir = output_dir if output_dir is not None else args.load_dir
        load_step = args.load_step

        if args.load_dir is not None:
            load_step = print(
                os.path.split(args.load_dir)[1].replace('save_step_', ''))
            print("load_step = {}".format(load_step))

        F1 = evaluate(args, model, device, processor, label_list, num_labels,
                      tokenizer, output_mode)

        with open("test_result.txt", 'a') as f:
            print("load step: {} F1: {}".format(str(load_step), str(F1)),
                  file=f)
Example #51
0
def main():
    args = parser.parse_args()
    cf = ConfigParser.ConfigParser()
    try:
        cf.read(args.conf)
    except:
        print("conf file not exists")
        sys.exit(1)
    try:
        seed = cf.get('Training', 'seed')
        seed = long(seed)
    except:
        seed = torch.cuda.initial_seed()
        cf.set('Training', 'seed', seed)
        cf.write(open(args.conf, 'w'))

    USE_CUDA = cf.getboolean("Training", "use_cuda")
    torch.manual_seed(seed)
    if USE_CUDA:
        torch.cuda.manual_seed(seed)

    logger = init_logger(os.path.join(args.log_dir, 'train_ctc_model.log'))

    #Define Model
    rnn_input_size = cf.getint('Model', 'rnn_input_size')
    rnn_hidden_size = cf.getint('Model', 'rnn_hidden_size')
    rnn_layers = cf.getint('Model', 'rnn_layers')
    rnn_type = supported_rnn[cf.get('Model', 'rnn_type')]
    bidirectional = cf.getboolean('Model', 'bidirectional')
    batch_norm = cf.getboolean('Model', 'batch_norm')
    rnn_param = {
        "rnn_input_size": rnn_input_size,
        "rnn_hidden_size": rnn_hidden_size,
        "rnn_layers": rnn_layers,
        "rnn_type": rnn_type,
        "bidirectional": bidirectional,
        "batch_norm": batch_norm
    }

    num_class = cf.getint('Model', 'num_class')
    drop_out = cf.getfloat('Model', 'drop_out')
    add_cnn = cf.getboolean('Model', 'add_cnn')

    cnn_param = {}
    layers = cf.getint('CNN', 'layers')
    channel = eval(cf.get('CNN', 'channel'))
    kernel_size = eval(cf.get('CNN', 'kernel_size'))
    stride = eval(cf.get('CNN', 'stride'))
    padding = eval(cf.get('CNN', 'padding'))
    pooling = eval(cf.get('CNN', 'pooling'))
    batch_norm = cf.getboolean('CNN', 'batch_norm')
    activation_function = supported_activate[cf.get('CNN',
                                                    'activation_function')]

    cnn_param['batch_norm'] = batch_norm
    cnn_param['activate_function'] = activation_function
    cnn_param["layer"] = []
    for layer in range(layers):
        layer_param = [
            channel[layer], kernel_size[layer], stride[layer], padding[layer]
        ]
        if pooling is not None:
            layer_param.append(pooling[layer])
        else:
            layer_param.append(None)
        cnn_param["layer"].append(layer_param)

    model = CTC_Model(rnn_param=rnn_param,
                      add_cnn=add_cnn,
                      cnn_param=cnn_param,
                      num_class=num_class,
                      drop_out=drop_out)

    for idx, m in enumerate(model.children()):
        print(idx, m)
        logger.info(str(idx) + "->" + str(m))

    dataset = cf.get('Data', 'dataset')
    data_dir = cf.get('Data', 'data_dir')
    feature_type = cf.get('Data', 'feature_type')
    out_type = cf.get('Data', 'out_type')
    n_feats = cf.getint('Data', 'n_feats')
    mel = cf.getboolean('Data', 'mel')
    batch_size = cf.getint("Training", 'batch_size')

    #Data Loader
    train_dataset = SpeechDataset(data_dir,
                                  data_set='train',
                                  feature_type=feature_type,
                                  out_type=out_type,
                                  n_feats=n_feats,
                                  mel=mel)
    dev_dataset = SpeechDataset(data_dir,
                                data_set="dev",
                                feature_type=feature_type,
                                out_type=out_type,
                                n_feats=n_feats,
                                mel=mel)

    if add_cnn:
        train_loader = SpeechCNNDataLoader(train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=4,
                                           pin_memory=False)
        dev_loader = SpeechCNNDataLoader(dev_dataset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=4,
                                         pin_memory=False)
    else:
        train_loader = SpeechDataLoader(train_dataset,
                                        batch_size=batch_size,
                                        shuffle=True,
                                        num_workers=4,
                                        pin_memory=False)
        dev_loader = SpeechDataLoader(dev_dataset,
                                      batch_size=batch_size,
                                      shuffle=False,
                                      num_workers=4,
                                      pin_memory=False)
    #decoder for dev set
    decoder = GreedyDecoder(dev_dataset.int2class, space_idx=-1, blank_index=0)

    #Training
    init_lr = cf.getfloat('Training', 'init_lr')
    num_epoches = cf.getint('Training', 'num_epoches')
    end_adjust_acc = cf.getfloat('Training', 'end_adjust_acc')
    decay = cf.getfloat("Training", 'lr_decay')
    weight_decay = cf.getfloat("Training", 'weight_decay')

    params = {
        'num_epoches': num_epoches,
        'end_adjust_acc': end_adjust_acc,
        'mel': mel,
        'seed': seed,
        'decay': decay,
        'learning_rate': init_lr,
        'weight_decay': weight_decay,
        'batch_size': batch_size,
        'feature_type': feature_type,
        'n_feats': n_feats,
        'out_type': out_type
    }
    print(params)

    if USE_CUDA:
        model = model.cuda()

    loss_fn = CTCLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=init_lr,
                                 weight_decay=weight_decay)

    #visualization for training
    from visdom import Visdom
    viz = Visdom()
    if add_cnn:
        title = dataset + ' ' + feature_type + str(n_feats) + ' CNN_LSTM_CTC'
    else:
        title = dataset + ' ' + feature_type + str(n_feats) + ' LSTM_CTC'

    opts = [
        dict(title=title + " Loss", ylabel='Loss', xlabel='Epoch'),
        dict(title=title + " Loss on Dev", ylabel='DEV Loss', xlabel='Epoch'),
        dict(title=title + ' CER on DEV', ylabel='DEV CER', xlabel='Epoch')
    ]
    viz_window = [None, None, None]

    count = 0
    learning_rate = init_lr
    loss_best = 1000
    loss_best_true = 1000
    adjust_rate_flag = False
    stop_train = False
    adjust_time = 0
    acc_best = 0
    start_time = time.time()
    loss_results = []
    dev_loss_results = []
    dev_cer_results = []

    while not stop_train:
        if count >= num_epoches:
            break
        count += 1

        if adjust_rate_flag:
            learning_rate *= decay
            adjust_rate_flag = False
            for param in optimizer.param_groups:
                param['lr'] *= decay

        print("Start training epoch: %d, learning_rate: %.5f" %
              (count, learning_rate))
        logger.info("Start training epoch: %d, learning_rate: %.5f" %
                    (count, learning_rate))

        loss = train(model,
                     train_loader,
                     loss_fn,
                     optimizer,
                     logger,
                     add_cnn=add_cnn,
                     print_every=20,
                     USE_CUDA=USE_CUDA)
        loss_results.append(loss)
        acc, dev_loss = dev(model,
                            dev_loader,
                            loss_fn,
                            decoder,
                            logger,
                            add_cnn=add_cnn,
                            USE_CUDA=USE_CUDA)
        print("loss on dev set is %.4f" % dev_loss)
        logger.info("loss on dev set is %.4f" % dev_loss)
        dev_loss_results.append(dev_loss)
        dev_cer_results.append(acc)

        #adjust learning rate by dev_loss
        if dev_loss < (loss_best - end_adjust_acc):
            loss_best = dev_loss
            loss_best_true = dev_loss
            #acc_best = acc
            adjust_rate_count = 0
            model_state = copy.deepcopy(model.state_dict())
            op_state = copy.deepcopy(optimizer.state_dict())
        elif (dev_loss < loss_best + end_adjust_acc):
            adjust_rate_count += 1
            if dev_loss < loss_best and dev_loss < loss_best_true:
                loss_best_true = dev_loss
                #acc_best = acc
                model_state = copy.deepcopy(model.state_dict())
                op_state = copy.deepcopy(optimizer.state_dict())
        else:
            adjust_rate_count = 10

        if acc > acc_best:
            acc_best = acc
            best_model_state = copy.deepcopy(model.state_dict())
            best_op_state = copy.deepcopy(optimizer.state_dict())

        print("adjust_rate_count:" + str(adjust_rate_count))
        print('adjust_time:' + str(adjust_time))
        logger.info("adjust_rate_count:" + str(adjust_rate_count))
        logger.info('adjust_time:' + str(adjust_time))

        if adjust_rate_count == 10:
            adjust_rate_flag = True
            adjust_time += 1
            adjust_rate_count = 0
            if loss_best > loss_best_true:
                loss_best = loss_best_true
            model.load_state_dict(model_state)
            optimizer.load_state_dict(op_state)

        if adjust_time == 8:
            stop_train = True

        time_used = (time.time() - start_time) / 60
        print("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" %
              (count, acc, time_used))
        logger.info("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" %
                    (count, acc, time_used))

        x_axis = range(count)
        y_axis = [
            loss_results[0:count], dev_loss_results[0:count],
            dev_cer_results[0:count]
        ]
        for x in range(len(viz_window)):
            if viz_window[x] is None:
                viz_window[x] = viz.line(
                    X=np.array(x_axis),
                    Y=np.array(y_axis[x]),
                    opts=opts[x],
                )
            else:
                viz.line(
                    X=np.array(x_axis),
                    Y=np.array(y_axis[x]),
                    win=viz_window[x],
                    update='replace',
                )

    print("End training, best dev loss is: %.4f, acc is: %.4f" %
          (loss_best, acc_best))
    logger.info("End training, best dev loss acc is: %.4f, acc is: %.4f" %
                (loss_best, acc_best))
    model.load_state_dict(best_model_state)
    optimizer.load_state_dict(best_op_state)
    best_path = os.path.join(args.log_dir,
                             'best_model' + '_dev' + str(acc_best) + '.pkl')
    cf.set('Model', 'model_file', best_path)
    cf.write(open(args.conf, 'w'))
    params['epoch'] = count

    torch.save(
        CTC_Model.save_package(model,
                               optimizer=optimizer,
                               epoch=params,
                               loss_results=loss_results,
                               dev_loss_results=dev_loss_results,
                               dev_cer_results=dev_cer_results), best_path)
#
# First, the needed imports.

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from distutils.version import LooseVersion as LV
import os
from datetime import datetime

from transformers import ViTFeatureExtractor, ViTForImageClassification
from transformers import __version__ as transformers_version

torch.manual_seed(42)
import numpy as np

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print('Using PyTorch version:', torch.__version__,
      'Transformers version:', transformers_version,
      'Device:', device)
assert(LV(torch.__version__) >= LV("1.0.0"))


# TensorBoard is a tool for visualizing progress during training. Although
# TensorBoard was created for TensorFlow, it can also be used with PyTorch. It
Example #53
0
def _test_integration_multiclass(device, output_dict):

    rank = idist.get_rank()
    torch.manual_seed(12)

    def _test(metric_device, n_classes, labels=None):

        classification_report = ClassificationReport(device=metric_device,
                                                     output_dict=output_dict,
                                                     labels=labels)
        n_iters = 80
        s = 16
        offset = n_iters * s
        y_true = torch.randint(0,
                               n_classes,
                               size=(offset *
                                     idist.get_world_size(), )).to(device)
        y_preds = torch.rand(offset * idist.get_world_size(),
                             n_classes).to(device)

        def update(engine, i):
            return (
                y_preds[i * s + rank * offset:(i + 1) * s + rank * offset, :],
                y_true[i * s + rank * offset:(i + 1) * s + rank * offset],
            )

        engine = Engine(update)

        classification_report.attach(engine, "cr")

        data = list(range(n_iters))
        engine.run(data=data)

        assert "cr" in engine.state.metrics
        res = engine.state.metrics["cr"]
        res2 = classification_report.compute()
        assert res == res2

        assert isinstance(res, dict if output_dict else str)
        if not output_dict:
            res = json.loads(res)

        from sklearn.metrics import classification_report as sklearn_classification_report

        sklearn_result = sklearn_classification_report(
            y_true.cpu().numpy(),
            torch.argmax(y_preds, dim=1).cpu().numpy(),
            output_dict=True)

        for i in range(n_classes):
            label_i = labels[i] if labels else str(i)
            assert pytest.approx(res[label_i]["precision"] == sklearn_result[
                str(i)]["precision"])
            assert pytest.approx(
                res[label_i]["f1-score"] == sklearn_result[str(i)]["f1-score"])
            assert pytest.approx(
                res[label_i]["recall"] == sklearn_result[str(i)]["recall"])
        assert pytest.approx(res["macro avg"]["precision"] ==
                             sklearn_result["macro avg"]["precision"])
        assert pytest.approx(res["macro avg"]["recall"] ==
                             sklearn_result["macro avg"]["recall"])
        assert pytest.approx(res["macro avg"]["f1-score"] ==
                             sklearn_result["macro avg"]["f1-score"])

    for _ in range(5):
        # check multiple random inputs as random exact occurencies are rare
        metric_devices = ["cpu"]
        if device.type != "xla":
            metric_devices.append(idist.device())
        for metric_device in metric_devices:
            _test(metric_device, 2, ["label0", "label1"])
            _test(metric_device, 2)
            _test(metric_device, 3, ["label0", "label1", "label2"])
            _test(metric_device, 3)
            _test(metric_device, 4, ["label0", "label1", "label2", "label3"])
            _test(metric_device, 4)
Example #54
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--yaml-config', type=str, default='simg_bmi_regression_3.6.6.2_nfs.yaml')
    parser.add_argument('--run-train', type=str, default='True')
    parser.add_argument('--run-test', type=str, default='False')
    parser.add_argument('--run-grad-cam', type=str, default='False')
    parser.add_argument('--train-fold', type=int, default=0)
    args = parser.parse_args()

    SRC_ROOT = os.path.dirname(os.path.realpath(__file__)) + '/..'
    yaml_config = os.path.join(SRC_ROOT, f'src/yaml/{args.yaml_config}')
    logger.info(f'Read yaml file {yaml_config}')
    f = open(yaml_config, 'r').read()
    config = yaml.safe_load(f)

    out_folder = config['exp_dir']
    learning_rate = config['learning_rate']
    batch_size = config['batch_size']
    epoch_num = config['epoch_num']
    fold_num = config['fold_num']

    mkdir_p(out_folder)

    # load CUDA
    cuda = torch.cuda.is_available()
    print(f'cuda: {cuda}')
    # cuda = False
    torch.manual_seed(1)

    # Create data loader
    train_loader_list, valid_loader_list, test_loader_list = get_data_loader_cv(config)

    # Create trainer list
    performance_array = []
    for idx_fold in range(fold_num):
        # If train only one fold
        if args.train_fold != -1:
            # Only train on specified fold.
            if args.train_fold != idx_fold:
                continue

        # Create model
        model = create_model(config)
        if cuda:
            torch.cuda.manual_seed(1)
            model = model.cuda()

        # load optimizor
        optim = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999))

        # Create trainer
        fold_out_folder = os.path.join(out_folder, f'fold_{idx_fold}')
        train_loader = train_loader_list[idx_fold]
        validate_loader = valid_loader_list[idx_fold]
        test_loader = test_loader_list[idx_fold]
        trainer_obj = Trainer(
            cuda,
            model,
            optimizer=optim,
            train_loader=train_loader,
            validate_loader=validate_loader,
            test_loader=test_loader,
            out=fold_out_folder,
            max_epoch=epoch_num,
            batch_size=batch_size,
            config=config
        )

        # Train
        trainer_obj.epoch = config['start_epoch']
        if args.run_train == 'True':
            trainer_obj.train_epoch()

        # Test
        if args.run_test == 'True':
            trainer_obj.run_test()
            performance_array.append(trainer_obj.test_performance)

        if args.run_grad_cam == 'True':
            trainer_obj.run_grad_cam()

    if args.run_test == 'True':
        mse_array = np.array([statics_dict['loss'] for statics_dict in performance_array])
        rmse_array = np.sqrt(mse_array)
        rmse_mean = np.mean(rmse_array)
        rmse_std = np.std(rmse_array)
        perf_str = f'RMSE {rmse_mean:.5f} ({rmse_std:.5f})\n'
        print(f'Performance of cross-validation:')
        print(perf_str)
        perf_file = os.path.join(out_folder, 'perf')
        with open(perf_file, 'w') as fv:
            fv.write(perf_str)
            fv.close()
Example #55
0
def main():
    """
    Main function.
    """
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--img_size',
                        type=int,
                        default=28,
                        help="size of image (default: 28)")
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=14,
                        metavar='N',
                        help='number of epochs to train (default: 14)')
    parser.add_argument('--lr',
                        type=float,
                        default=1.0,
                        metavar='LR',
                        help='learning rate (default: 1.0)')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.7,
                        metavar='M',
                        help='Learning rate step gamma (default: 0.7)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--dry-run',
                        action='store_true',
                        default=False,
                        help='quickly check a single pass')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--save-model',
                        action='store_true',
                        default=False,
                        help='For Saving the current Model')
    parser.add_argument('--save_dir', default="experiments")
    parser.add_argument('--log_file', default="log.o")
    parser.add_argument('--ckpt_path')  # for compatibility
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'batch_size': args.batch_size}
    if use_cuda:
        kwargs.update({
            'num_workers': 1,
            'pin_memory': True,
            'shuffle': True
        }, )

    transform = transforms.Compose([
        transforms.Resize(args.img_size),
        transforms.ToTensor(),
        transforms.Normalize((0.1307, ), (0.3081, ))
    ])
    dataset1 = datasets.MNIST('./data',
                              train=True,
                              download=True,
                              transform=transform)
    dataset2 = datasets.MNIST('./data', train=False, transform=transform)
    train_loader = torch.utils.data.DataLoader(dataset1, **kwargs)
    test_loader = torch.utils.data.DataLoader(dataset2, **kwargs)

    model = Net(args.img_size).to(device)
    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)

    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(args, model, device, test_loader)
        scheduler.step()

    if args.save_model:
        torch.save(
            model.state_dict(),
            os.path.join(args.save_dir, "mnist_cnn_%d.pt" % args.img_size))
Example #56
0
def _test_integration_multilabel(device, output_dict):

    rank = idist.get_rank()
    torch.manual_seed(12)

    def _test(metric_device, n_epochs, labels=None):

        classification_report = ClassificationReport(device=metric_device,
                                                     output_dict=output_dict,
                                                     is_multilabel=True)

        n_iters = 10
        s = 16
        n_classes = 7

        offset = n_iters * s
        y_true = torch.randint(0,
                               2,
                               size=(offset * idist.get_world_size(),
                                     n_classes, 6, 8)).to(device)
        y_preds = torch.randint(0,
                                2,
                                size=(offset * idist.get_world_size(),
                                      n_classes, 6, 8)).to(device)

        def update(engine, i):
            return (
                y_preds[i * s + rank * offset:(i + 1) * s + rank * offset,
                        ...],
                y_true[i * s + rank * offset:(i + 1) * s + rank * offset, ...],
            )

        engine = Engine(update)

        classification_report.attach(engine, "cr")

        data = list(range(n_iters))
        engine.run(data=data, max_epochs=n_epochs)

        assert "cr" in engine.state.metrics
        res = engine.state.metrics["cr"]
        res2 = classification_report.compute()
        assert res == res2

        assert isinstance(res, dict if output_dict else str)
        if not output_dict:
            res = json.loads(res)

        np_y_preds = to_numpy_multilabel(y_preds)
        np_y_true = to_numpy_multilabel(y_true)

        from sklearn.metrics import classification_report as sklearn_classification_report

        sklearn_result = sklearn_classification_report(np_y_true,
                                                       np_y_preds,
                                                       output_dict=True)

        for i in range(n_classes):
            label_i = labels[i] if labels else str(i)
            assert pytest.approx(res[label_i]["precision"] == sklearn_result[
                str(i)]["precision"])
            assert pytest.approx(
                res[label_i]["f1-score"] == sklearn_result[str(i)]["f1-score"])
            assert pytest.approx(
                res[label_i]["recall"] == sklearn_result[str(i)]["recall"])
        assert pytest.approx(res["macro avg"]["precision"] ==
                             sklearn_result["macro avg"]["precision"])
        assert pytest.approx(res["macro avg"]["recall"] ==
                             sklearn_result["macro avg"]["recall"])
        assert pytest.approx(res["macro avg"]["f1-score"] ==
                             sklearn_result["macro avg"]["f1-score"])

    for _ in range(3):
        # check multiple random inputs as random exact occurencies are rare
        metric_devices = ["cpu"]
        if device.type != "xla":
            metric_devices.append(idist.device())
        for metric_device in metric_devices:
            _test(metric_device, 1)
            _test(metric_device, 2)
            _test(metric_device, 1, ["0", "1", "2", "3", "4", "5", "6"])
            _test(metric_device, 2, ["0", "1", "2", "3", "4", "5", "6"])

from Agents.DQN.DQNAsynER import DQNAsynERMaster, SharedAdam
from Agents.Core.MLPNet import MultiLayerNetRegression
import json
import gym
from torch import optim
from copy import deepcopy
import torch
from Env.CustomEnv.MountainCarEnv import MountainCarEnvCustom
torch.manual_seed(1)
# first construct the neutral network

config = dict()

config['trainStep'] = 2000
config['epsThreshold'] = 0.3
config['epsilon_start'] = 0.3
config['epsilon_final'] = 0.05
config['epsilon_decay'] = 200
config['targetNetUpdateStep'] = 100
config['memoryCapacity'] = 200000
config['trainBatchSize'] = 32
config['gamma'] = 0.9
config['learningRate'] = 0.0001
config['netGradClip'] = 1
config['logFlag'] = False
config['logFileName'] = ''
config['logFrequency'] = 50
config['netUpdateOption'] = 'doubleQ'
config['nStepForward'] = 3
Example #58
0
# @File : train_ALL_LSTM.py
# @Last Modify Time : 2018/07/19 22:35
# @Contact : bamtercelboo@{gmail.com, 163.com}

import os
import sys
import torch
import torch.autograd as autograd
import torch.nn.functional as F
import torch.nn.utils as utils
import torch.optim.lr_scheduler as lr_scheduler
import shutil
import random
import numpy as np
seed_num = 233
torch.manual_seed(seed_num)
random.seed(seed_num)


def train(train_iter, dev_iter, test_iter, model, args):
    if args.cuda:
        model.cuda()

    if args.Adam is True:
        print("Adam Training......")
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.init_weight_decay)
    elif args.SGD is True:
        print("SGD Training.......")
        optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.init_weight_decay,
                                    momentum=args.momentum_value)
    elif args.Adadelta is True:
Example #59
0
print(x.type())
print(x.item())
print(x.size())

print("\n<<< Vector >>>")
x = torch.FloatTensor([23, 24, 24.5, 26, 27.2, 23.0])
print(x.type())
print(x.data)
print(x.size())

print("\n<<< Matrix >>>")
np.random.seed(0)
data = np.random.randn(3, 3)
x = torch.from_numpy(data)
print(x.type())
print(x.data)
print(x.size())

print("\n<<< Some Operations >>>")
torch.manual_seed(0)
a = torch.rand(1, 2)
b = torch.rand(1, 2)
print("a : ", a.data)
print("b : ", b)

c = torch.add(a, b)
print("a+b : ", c)
d = torch.add(a, -b)  #subtract가 없음
print("a-b : ", d)
e = torch.mul(a, b)
print("axb : ", e)
Example #60
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--workers', type=int, help='number of data loading workers', default=1)

	parser.add_argument('--batchSize', type=int, default=32, help='input batch size')
	parser.add_argument('--nz', type=int, default=8, help='size of the latent z vector')
	parser.add_argument('--ngf', type=int, default=64, help='size of the latent z vector')
	parser.add_argument('--ndf', type=int, default=64, help='size of the latent z vector')
	parser.add_argument('--niter', type=int, default=25, help='number of epochs to train for')
	parser.add_argument('--lr', type=float, default=0.0002, help='learning rate, default=0.0002')
	parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5')
	parser.add_argument('--outf', default='./out/', help='folder to output images and model checkpoints')
	parser.add_argument('--manualSeed', type=int, help='manual seed')

	INFO = 'X'

	opt = parser.parse_args()
	print(opt)

	ngpu = 1


	if opt.manualSeed is None:
		opt.manualSeed = random.randint(1, 10000)
	print("Random Seed: ", opt.manualSeed)
	random.seed(opt.manualSeed)
	torch.manual_seed(opt.manualSeed)

	if (torch.cuda.is_available() and ngpu > 0):
		torch.cuda.manual_seed(opt.manualSeed)

	filename = '_'.join(
		[INFO, str(opt.manualSeed), str(opt.batchSize), str(opt.niter), str(opt.lr), str(opt.nz), str(opt.ngf),  str(opt.ndf)])

	try:
		os.makedirs(filename)
	except OSError:
		pass

	dataset = ds.MimicData()
	assert dataset
	dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batchSize, shuffle=True, num_workers=int(opt.workers))

	device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")
	print(device)

	nz = int(opt.nz)
	ngf = opt.ngf
	ndf = opt.ndf
	nc = 1

	# custom weights initialization called on netG and netD
	# custom weights initialization called on netG and netD
	def weights_init(m):
		classname = m.__class__.__name__
		if classname.find('Conv') != -1:
			nn.init.normal_(m.weight.data, 0, 0.02)
		elif classname.find('BatchNorm') != -1:
			nn.init.normal_(m.weight.data, 1, 0.02)
			nn.init.constant_(m.bias.data, 0)

	class Generator(nn.Module):
		def __init__(self):
			super(Generator, self).__init__()
			self.main = nn.Sequential(
				# input is Z, going into a convolution
				nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
				nn.BatchNorm2d(ngf * 8),
				nn.ReLU(True),
				# state size. (ngf*8) x 4 x 4
				nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
				nn.BatchNorm2d(ngf * 4),
				nn.ReLU(True),
				# state size. (ngf*4) x 8 x 8
				nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
				nn.BatchNorm2d(ngf * 2),
				nn.ReLU(True),
				# state size. (ngf*2) x 16 x 16
				nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
				nn.BatchNorm2d(ngf),
				nn.ReLU(True),
				# state size. (ngf) x 32 x 32
				nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
				nn.Tanh()
				# state size. (nc) x 64 x 64
			)

		def forward(self, input):
			output = self.main(input)
			return output

	netG = Generator().to(device)

	# Handle multi-gpu if desired
	if (device.type == 'cuda') and (ngpu > 1):
		netG = nn.DataParallel(netG, list(range(ngpu)))

	netG.apply(weights_init)

	class Discriminator(nn.Module):
		def __init__(self):
			super(Discriminator, self).__init__()
			self.main = nn.Sequential(
				# input is (nc) x 64 x 64
				nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
				nn.LeakyReLU(0.2, inplace=True),
				# state size. (ndf) x 32 x 32
				nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
				nn.BatchNorm2d(ndf * 2),
				nn.LeakyReLU(0.2, inplace=True),
				# state size. (ndf*2) x 16 x 16
				nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
				nn.BatchNorm2d(ndf * 4),
				nn.LeakyReLU(0.2, inplace=True),
				# state size. (ndf*4) x 8 x 8
				nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
				nn.BatchNorm2d(ndf * 8),
				nn.LeakyReLU(0.2, inplace=True),
				# state size. (ndf*8) x 4 x 4
				nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
				nn.Sigmoid()
			)

		def forward(self, input):
			output = self.main(input)
			return output.view(-1, 1).squeeze(1)

	netD = Discriminator().to(device)

	if (device.type == 'cuda') and (ngpu > 1):
		netD = nn.DataParallel(netD, list(range(ngpu)))

	netD.apply(weights_init)

	criterion = nn.BCELoss()

	fixed_noise = torch.randn(opt.batchSize, nz, 1, 1, device=device)
	real_label = 1
	fake_label = 0

	# setup optimizer
	optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
	optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))

	ldl = []
	lgl = []
	dxl = []
	dgz1l = []
	dgz2l = []

	for epoch in range(opt.niter):

		eldl = []
		elgl = []
		edxl = []
		edgz1l = []
		edgz2l = []
		for i, data in enumerate(dataloader, 0):

			data = data[2]
			data.unsqueeze_(1)
			data.add_(-0.5).mul_(2)

			# train with real
			netD.zero_grad()
			real_cpu = data.to(device)
			batch_size = real_cpu.size(0)
			label = torch.full((batch_size,), real_label, device=device)

			output = netD(real_cpu)
			errD_real = criterion(output, label)
			errD_real.backward()
			D_x = output.mean().item()

			# train with fake
			noise = torch.randn(batch_size, nz, 1, 1, device=device)
			fake = netG(noise)

			label.fill_(fake_label)
			output = netD(fake.detach())
			errD_fake = criterion(output, label)
			errD_fake.backward()
			D_G_z1 = output.mean().item()
			errD = errD_real + errD_fake
			optimizerD.step()

			# Update G network: maximize log(D(G(z)))
			netG.zero_grad()
			label.fill_(real_label)  # fake labels are real for generator cost
			output = netD(fake)
			errG = criterion(output, label)
			errG.backward()
			D_G_z2 = output.mean().item()
			optimizerG.step()

			eldl.append(errD.item())
			elgl.append(errG.item())
			edxl.append(D_x)
			edgz1l.append(D_G_z1)
			edgz2l.append(D_G_z2)


			if i % 100 == 0:
				print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f'
					  % (epoch, opt.niter, i, len(dataloader),
						 errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
				vutils.save_image(real_cpu,
						'%s/real_samples.png' % filename,
						normalize=True,pad_value=0.5)
				fake = netG(fixed_noise)
				fake = fake.detach().gt(0)
				vutils.save_image(fake,
						'%s/fake_samples_epoch_%03d.png' % (filename, epoch),
						normalize=True,pad_value=0.5)
		ldl.append(np.mean(eldl))
		lgl.append(np.mean(elgl))
		dxl.append(np.mean(edxl))
		dgz1l.append(np.mean(edgz1l))
		dgz2l.append(np.mean(edgz2l))
		plot_gan(ldl, lgl, dxl, dgz1l, dgz2l, filename)