def nmt_test(model_path_pruned, DATA_PATH, GPU_ID, translate_param1_path, translate_param2_path, group_dict):
     cuda.set_device(GPU_ID)
     valid_data = torch.load(DATA_PATH + 'len50_pywmt14.valid.pt')
     fields = onmt.IO.load_fields(torch.load(DATA_PATH + 'len50_pywmt14.vocab.pt'))
     valid_data.fields = fields 
     checkpoint = torch.load(model_path_pruned, map_location=lambda storage, loc: storage)
     model_opt = checkpoint['opt']
     with cuda.device(GPU_ID):
         ref_model = onmt.ModelConstructor.make_base_model(model_opt, fields, True, checkpoint)
         ref_model.eval()
         ref_model.generator.eval()
         masked_model = MaskedModel(ref_model, group_dict, cuda.current_device(), cuda.current_device()) # ref_model is at current_device, no copy will happen
     translate_opt, translate_dummy_opt = translate_opt_initialize(translate_param1_path, translate_param2_path, DATA_PATH, model_path_pruned, GPU_ID)
     translator = init_translate_model(translate_opt, translate_dummy_opt)
     del translator.model
     translator.model = ref_model
     tt=open(translate_opt.tgt, 'r')
     references = [[t] for t in tt]

     translate_data = onmt.IO.ONMTDataset(
       translate_opt.src, translate_opt.tgt, fields,
       use_filter_pred=False)
     prune_data = onmt.IO.OrderedIterator(
       dataset=translate_data, device=GPU_ID,
       batch_size=1, train=False, sort=False,
       shuffle=False)

     sparsity = masked_model.get_sparsity()
     total_param = masked_model.total_parameters_of_pretrain()

     tmp_fit1 = evaluate(masked_model, valid_data, fields)
     tmp_fit2 = evaluate_trans(translator, references, prune_data, translate_data)
     return total_param, sparsity, tmp_fit1, tmp_fit2
Exemple #2
0
def main():

    valid_data = torch.load(TRAIN_DATA + '.valid.pt')
    fields = onmt.IO.load_fields(torch.load(TRAIN_DATA + '.vocab.pt'))
    valid_data.fields = fields  # we need to clear this assignment relationg if we want to transfere valid among threads

    if GPU_ID == 0 or GPU_ID == 1:
        cuda.set_device(GPU_ID)
        with cuda.device(GPU_ID):
            # '/fl/deepModels/tmp/rnnsearch_tmp.pt','/fl/deepModels/tmp/loungnet_tmp.pt'
            checkpoint_path = '/fl/deepModels/tmp/rnnsearch_tmp.pt'
            checkpoint = torch.load(checkpoint_path,
                                    map_location=lambda storage, loc: storage)
            model_opt = checkpoint['opt']
            ref_model = onmt.ModelConstructor.make_base_model(
                model_opt, fields, True, checkpoint)
            ref_model.eval()
            ref_model.generator.eval()
            masked_model = MaskedModel(
                ref_model, group_dict, cuda.current_device(),
                cuda.current_device(
                ))  # ref_model is at current_device, no copy will happen

        # train data loading

        translate_opt, translate_dummy_opt = translate_opt_initialize(
            '/fl/NMTSWPO/workspace/opennmt_translate_opt.pt',
            '/fl/NMTSWPO/workspace/opennmt_translate_dummy_opt.pt')
        translator = init_translate_model(translate_opt, translate_dummy_opt)
        del translator.model
        translator.model = masked_model
        tt = open(translate_opt.tgt, 'r', encoding='utf-8')
        references = [[t] for t in tt]

        p = 0.3
        translate_data = onmt.IO.ONMTDataset(translate_opt.src,
                                             translate_opt.tgt,
                                             fields,
                                             use_filter_pred=False)
        prune_data = onmt.IO.OrderedIterator(dataset=translate_data,
                                             device=GPU_ID,
                                             batch_size=1,
                                             train=False,
                                             sort=False,
                                             shuffle=False)
        tmp_crate = len(masked_model.group_name_list) * [p]

        masked_model.change_mask(tmp_crate, apply_MP_on_mask)
        masked_model.apply_mask()

        tmp_fit = evaluate_trans(translator, references, prune_data,
                                 translate_data)
        logger.scalar_summary('test_bleu', tmp_fit[1] * 100, int(p * 100))

        logger.scalar_summary('test_ppl', tmp_fit[0], int(p * 100))

        print('percentage %s => bleu (%.4f), ppl (%.4f)' %
              (p * 100, tmp_fit[1] * 100, tmp_fit[0]))
Exemple #3
0
def train(net, loader, ep, scheduler=None, writer=None):
    global n_iter
    if scheduler:
        scheduler.step()

    net.train()
    loss_all, norm_all = [], []
    train_iter = tqdm(loader)
    for images, labels in train_iter:
        n_iter += 1

        images, labels = images.cuda(), labels.cuda()
        embedding = net(images)
        loss = criterion(embedding, labels)
        loss_all.append(loss.item())

        if writer:
            writer.add_scalar('loss/train', loss.item(), n_iter)
        print(cuda.memory_allocated(cuda.current_device()))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_iter.set_description("[Train][Epoch %d] Loss: %.5f" % (ep, loss.item()))
    print('[Epoch %d] Loss: %.5f\n' % (ep, torch.Tensor(loss_all).mean()))
Exemple #4
0
 def check_devices():
     for i in range(device_count()):
         print("Found device {}:".format(i), get_device_name(i))
     if device_count() == 0:
         print("No GPU device found")
     else:
         print("Current cuda device is", get_device_name(current_device()))
Exemple #5
0
def eval(net, loader, ep):
    K = [1, 10, 100, 1000]
    net.eval()
    test_iter = tqdm(loader)
    embeddings_all, labels_all = [], []

    test_iter.set_description("[Eval][Epoch %d]" % ep)
    with torch.no_grad():
        for images, labels in test_iter:
            images, labels = images.cuda(), labels.cuda()
            embedding = net(images)
            embeddings_all.append(embedding.data)
            labels_all.append(labels.data)
            print(cuda.memory_allocated(cuda.current_device()))

        embeddings_all = torch.cat(embeddings_all).cpu()
        labels_all = torch.cat(labels_all).cpu()
        rec = recall(embeddings_all, labels_all, K=K)

        print("Embedding Size: %d" % len(embeddings_all))
        print(labels_all.sum())

        for k, r in zip(K, rec):
            print('[Epoch %d] Recall@%d: [%.4f]\n' % (ep, k+1, 100 * r))

    return rec[0]
Exemple #6
0
def which_processor():
    """Check if fastai/torch is using GPU or CPU"""
    if is_available():
        device_nr = current_device()
        print(f"Fast.ai (Torch) is using GPU: {get_device_name(device_nr)}")
    else:
        print("Cuda is not available. Fast.ai/Torch is using CPU")
Exemple #7
0
    def test_sequential_move_to_cuda_via_to(self):
        """Test moving AnalogSequential to cuda (from CPU), using ``.to()``."""
        if not cuda.is_compiled():
            raise SkipTest('not compiled with CUDA support')

        # Map the original tile classes to the expected ones after `cuda()`.
        tile_classes = {
            tiles.AnalogTile: tiles.CudaAnalogTile,
            tiles.CudaAnalogTile: tiles.CudaAnalogTile
        }

        layer = self.get_layer()
        expected_class = tile_classes[layer.analog_tile.tile.__class__]
        expected_device = device('cuda', current_device())

        # Create a container and move to cuda.
        model = AnalogSequential(layer)
        model.to(device('cuda'))

        analog_tile = layer.analog_tile
        self.assertEqual(analog_tile.device, expected_device)
        self.assertEqual(analog_tile.get_analog_ctx().data.device,
                         expected_device)
        if analog_tile.shared_weights is not None:
            self.assertEqual(analog_tile.shared_weights.data.device,
                             expected_device)
            self.assertEqual(analog_tile.shared_weights.data.size()[0],
                             analog_tile.tile.get_x_size())
            self.assertEqual(analog_tile.shared_weights.data.size()[1],
                             analog_tile.tile.get_d_size())

        # Assert the tile has been moved to cuda.
        self.assertIsInstance(layer.analog_tile.tile, expected_class)
Exemple #8
0
 def get_device_id(self) -> Union[str, torch.device]:
     if cuda.is_available():
         device_id = self.non_default_device_to_use or cuda.current_device()
         return torch.device('cuda', device_id)
     elif self.fallback_to_cpu:
         return "cpu"
     else:
         raise CudaNotAvailable("Cuda not available")
Exemple #9
0
 def get_gpu_statistics(self):
     id = cuda.current_device()
     print("Max memory allocated on GPU %d: %d bytes" %
           (id, cuda.max_memory_allocated(id)))
     print("Max memory cached on GPU %d: %d bytes" %
           (id, cuda.max_memory_cached(id)))
     print("Current memory allocated on GPU %d: %d bytes" %
           (id, cuda.memory_allocated(id)))
     print("Current memory cached on GPU %d: %d bytes" %
           (id, cuda.memory_cached(id)))
def get_device(force_cpu: bool) \
        -> Tuple[str, str]:
    """Gets the available device.

    :param force_cpu: Force CPU usage?
    :type force_cpu: bool
    :return: Device and device name.
    :rtype: str, str
    """
    return ('cuda', cuda.get_device_name(cuda.current_device())) \
        if cuda.is_available() and not force_cpu else \
        ('cpu', processor())
    def enable_GPU(self):
        # Get the GPU device name.
        device_name = tf.test.gpu_device_name()

        # The device name should look like the following:
        if device_name == '/device:GPU:0':
            print('Found GPU at: {}'.format(device_name))
        else:
            raise SystemError('GPU device not found')
        # From torch:
        device = cuda.current_device()
        return device
Exemple #12
0
def device_info(the_device: str) \
        -> None:
    """Prints an informative message about the device that we are using.

    :param the_device: The device.
    :type the_device: str
    """
    from torch.cuda import get_device_name, current_device
    from platform import processor
    actual_device = get_device_name(current_device()) \
        if the_device.startswith('cuda') else processor()
    cmd_msg(f'Using device: `{actual_device}`.')
Exemple #13
0
    def __init__(self, source_tile: AnalogTile):
        if not cuda.is_compiled():
            raise CudaError('aihwkit has not been compiled with CUDA support')

        # Create a new instance of the rpu config.
        new_rpu_config = deepcopy(source_tile.rpu_config)

        # Create the tile, replacing the simulator tile.
        super().__init__(source_tile.out_size, source_tile.in_size,
                         new_rpu_config, source_tile.bias,
                         source_tile.in_trans, source_tile.out_trans)

        self.cuda(current_device())
Exemple #14
0
    def __init__(self, source_tile: FloatingPointTile):
        if not cuda.is_compiled():
            raise CudaError('aihwkit has not been compiled with CUDA support')

        # Create a new instance of the rpu config.
        new_rpu_config = deepcopy(source_tile.rpu_config)

        # Create the tile, replacing the simulator tile.
        super().__init__(source_tile.out_size, source_tile.in_size, new_rpu_config,
                         source_tile.bias, source_tile.in_trans, source_tile.out_trans)
        self.tile = tiles.CudaFloatingPointTile(source_tile.tile)

        # Set the cuda properties
        self.stream = current_stream()
        self.device = torch_device(current_device())
Exemple #15
0
    def __init__(self,
                 out_size: int,
                 in_size: int,
                 resistive_device: Optional[BaseResistiveDevice] = None,
                 bias: bool = False,
                 in_trans: bool = False,
                 out_trans: bool = False):
        if not cuda.is_compiled():
            raise RuntimeError(
                'aihwkit has not been compiled with CUDA support')
        super().__init__(out_size, in_size, resistive_device, bias, in_trans,
                         out_trans)

        self.tile = tiles.CudaAnalogTile(self.tile)
        self.stream = current_stream()
        self.device = torch_device(current_device())
Exemple #16
0
def make_loss_compute(model, tgt_vocab, dataset, gpu_id=None, copy_attn=False, copy_attn_force=False):
    """
    This returns user-defined LossCompute object, which is used to
    compute loss in train/validate process. You can implement your
    own *LossCompute class, by subclassing LossComputeBase.
    """
    if copy_attn:
        compute = onmt.modules.CopyGeneratorLossCompute(
            model.generator, tgt_vocab, dataset, copy_attn_force)
    else:
        compute = onmt.Loss.NMTLossCompute(model.generator, tgt_vocab)

    if gpu_id == None:
        gpu_id = cuda.current_device()
    compute.cuda(gpu_id)

    return compute
Exemple #17
0
    def __init__(self, source_tile: AnalogTile):
        if not cuda.is_compiled():
            raise RuntimeError(
                'aihwkit has not been compiled with CUDA support')

        # Create a new instance of the resistive device.
        new_resistive_device = deepcopy(source_tile.resistive_device)

        # Create the tile, replacing the simulator tile.
        super().__init__(source_tile.out_size, source_tile.in_size,
                         new_resistive_device, source_tile.bias,
                         source_tile.in_trans, source_tile.out_trans)
        self.tile = tiles.CudaAnalogTile(source_tile.tile)

        # Set the cuda properties
        self.stream = current_stream()
        self.device = torch_device(current_device())
def config_cuda(use_cuda):
    if not use_cuda:
        print('Using cpu')
        torch.device('cpu')
        return 'cpu'
    elif not cuda.is_available():
        print('Cuda not found, using cpu')
        torch.device('cpu')
        return 'cpu'
    print('Configuring cuda...')
    torch.device('cuda')
    cuda.set_device(0)
    current_dev = cuda.current_device()
    current_dev_name = cuda.get_device_name(current_dev)
    current_dev_specs = cuda.get_device_properties(current_dev)

    print(f'Current Device: {current_dev}')
    print(f'Current Device Name: {current_dev_name}')
    print(f'Current Device Specs: {current_dev_specs}')
    print()

    return 'cuda'
Exemple #19
0
    def test_save_with_cuda(self):
        """Whether model is correctly reconstructed after saving"""
        if not cuda.is_compiled():
            raise SkipTest('not compiled with CUDA support')

        # Map the original tile classes to the expected ones after `cuda()`.
        tile_classes = {
            tiles.AnalogTile: tiles.CudaAnalogTile,
            tiles.CudaAnalogTile: tiles.CudaAnalogTile
        }

        layer = self.get_layer()
        model = AnalogSequential(layer)
        model.cuda()
        with TemporaryFile() as file:
            save(model.state_dict(), file)
            # Create a new model and load its state dict.
            file.seek(0)
            checkpoint = load(file)
        model.load_state_dict(checkpoint)

        expected_device = device('cuda', current_device())
        expected_class = tile_classes[layer.analog_tile.tile.__class__]

        analog_tile = model[0].analog_tile
        self.assertEqual(analog_tile.device, expected_device)
        self.assertEqual(analog_tile.get_analog_ctx().data.device,
                         expected_device)
        if analog_tile.shared_weights is not None:
            self.assertEqual(analog_tile.shared_weights.data.device,
                             expected_device)
            self.assertEqual(analog_tile.shared_weights.data.size()[0],
                             analog_tile.tile.get_x_size())
            self.assertEqual(analog_tile.shared_weights.data.size()[1],
                             analog_tile.tile.get_d_size())

        # Assert the tile has been moved to cuda.
        self.assertIsInstance(layer.analog_tile.tile, expected_class)
Exemple #20
0
    def __init__(self, model, use_cuda=None, model_name='nnTrainer_model'):

        # Basics
        super(nnTrainer, self).__init__()
        self.model = model
        self.model_name = model_name.split('.')[0]
        self.results_path = 'results'
        if not os.path.exists(self.results_path):
            os.makedirs(self.results_path)

        # Use CUDA?
        self.use_cuda = use_cuda if (
            use_cuda != None and cuda.is_available()) else cuda.is_available()
        self.device = 'cpu' if (not self.use_cuda) else (
            'cuda:' + str(cuda.current_device()))
        self.device = torch.device(self.device)
        clog('Model CUDA:', self.use_cuda, '| Device:', self.device)

        # Current loss and loss history
        self.train_loss = 0
        self.valid_loss = 0
        self.train_loss_hist = []
        self.valid_loss_hist = []
Exemple #21
0
def make_sequence_video(cfg):
    with open(cfg) as fd:
        data_specs = json.load(fd)

    temp_size = data_specs['temp_size']
    num_of_temp_features = data_specs['temp_features']
    m = load_model(data_specs['model_path'],
                   DOTNetCNN.name(),
                   # ToNameNet.name(),
                   "model_params.json")
    use_gpu = torch_cuda.is_available()
    device = torch_device(torch_cuda.current_device()) if use_gpu else torch_device("cpu")
    m.to(device)

    r = get_images_classes(
        data_specs['images_path'],
        data_specs['info_dict'],
        data_specs['class_of_interest']
    )
    train_d, val_d, test_d = split_data(
        data_specs['positive_ev_path'],
        r,
        window_size=temp_size,
        future_size=0,
        shuffle=False
    )
    full_df = pd.concat([train_d, val_d, test_d], ignore_index=True)

    info_path = data_specs["info_path"]
    ds = PixelLevelDs(
        full_df,
        info_path=info_path,
        add_polarity="neg" if num_of_temp_features > 1 else "",
    )
    # make_video(m, ds)
    make_non_repeating_video(m, ds)
def main():

    total_times = 100
    run_times = 0
    init_threshold = ...
    start_t = time.time()

    valid_data = torch.load(TRAIN_DATA + '.valid.pt')
    fields = onmt.IO.load_fields(torch.load(TRAIN_DATA + '.vocab.pt'))
    # fields = onmt.IO.load_fields_from_vocab(torch.load(TRAIN_DATA + '.vocab.pt'))
    valid_data.fields = fields  # we need to clear this assignment relationg if we want to transfere valid among threads

    checkpoint = torch.load(weights, map_location=lambda storage, loc: storage)
    model_opt = checkpoint['opt']
    # masked_models = []
    with cuda.device(GPU_ID):
        ref_model = onmt.ModelConstructor.make_base_model(
            model_opt, fields, True, checkpoint)
        ref_model.eval()
        ref_model.generator.eval()
        masked_model = MaskedModel(
            ref_model, group_dict, cuda.current_device(), cuda.current_device(
            ))  # ref_model is at current_device, no copy will happen
        # masked_models.append(masked_model)
    train_opt, _, _ = opt_initialize(checkpoint, 'opennmt_translate_opt.pt',
                                     'opennmt_translate_dummy_opt.pt')
    if GPU_ID:
        cuda.set_device(GPU_ID)

    # 只需要原始的accuracy
    acc_of_no_prune = 0
    get_acc_of_no_prune = False
    print(time_now(), "start while")
    while run_times < total_times:
        print("-----------------------------------------")
        print("-----------------------------------------")
        print("start Iteration ", run_times)

        print("----------------test model for masked_model------")
        masked_model.make_evaluable()
        tmp_fit = evaluate(masked_model, valid_data, fields)
        print('acc (%.4f), ppl (%.4f)' % (tmp_fit[1], tmp_fit[0]))
        model_sparsity = masked_model.get_sparsity()
        print('Sparsity: {}'.format(model_sparsity))

        # init threshold
        best_threshold = 0
        itr_time = time.time()

        xxx = np.arange(0., 1, 0.01)
        print(time_now(), "start testing pruning")
        masked_model.make_evaluable()
        for i in range(len(xxx)):
            # best_threshold = 0.2
            # break
            tmp_crate = len(masked_model.group_name_list) * [xxx[i]]
            masked_model.change_mask(tmp_crate, apply_MP_on_mask)
            masked_model.apply_mask()
            tmp_fit = evaluate(masked_model, valid_data, fields)

            print('percentage %s => acc (%.4f), ppl (%.4f)' %
                  (xxx[i] * 100, tmp_fit[1], tmp_fit[0]))
            if i == 0 and not get_acc_of_no_prune:
                acc_of_no_prune = tmp_fit[1]
                acc_of_no_prune = int(acc_of_no_prune * 10) / 10
                get_acc_of_no_prune = True
            elif acc_of_no_prune - tmp_fit[1] > acc_percent_prune:
                best_threshold = xxx[i] - 0.01
                break
        # -------------------------------------------------
        # Start writing
        # prune again
        print(time_now(), " init accuracy of model:", acc_of_no_prune)
        print("accuracy constraint:", acc_percent_prune)
        print("-------test------------:", get_acc_of_no_prune)
        print(time_now(), " apply pruning with threshold:", best_threshold)
        tmp_crate = len(masked_model.group_name_list) * [best_threshold]
        masked_model.change_mask(tmp_crate, apply_MP_on_mask)
        masked_model.apply_mask()

        # print information
        tmp_fit = evaluate(masked_model, valid_data, fields)
        print('percentage %s => acc (%.4f), ppl (%.4f)' %
              (best_threshold * 100, tmp_fit[1], tmp_fit[0]))
        model_sparsity = masked_model.get_sparsity()
        print('Sparsity: {}'.format(model_sparsity))

        #--------------- start retraining --------------
        # first store model
        print(time_now(), "start saving model")
        _, saved_model = update_checkpoint(checkpoint, masked_model, run_times,
                                           acc_percent_prune)
        print(time_now(), "finish saving model:", saved_model)

        model_for_train = masked_model
        pretrained_leaf_dict = model_for_train.make_trainable()
        optim = build_optim(model_for_train.masked_model, checkpoint,
                            train_opt, pretrained_leaf_dict)
        print("finish building optim")

        print(time_now(), "start loading data for retraining")
        train = torch.load(train_opt.data + '.train.pt')
        valid = torch.load(train_opt.data + '.valid.pt')
        train_fields = load_fields(train, valid, checkpoint, train_opt)
        print(time_now(), "finish data loading")

        recovered = train_model(model_for_train, train, valid, train_fields,
                                optim, train_opt, run_times, acc_of_no_prune)
        print(time_now(), "finish retraining ")
        if not recovered:
            exit()
        else:
            print("------------Accuracy recorverd!--------------------")
            print("recovered accuracy:", acc_of_no_prune)

        masked_model = MaskedModel(model_for_train.masked_model, group_dict,
                                   cuda.current_device(),
                                   cuda.current_device())

        #-------------------------------------------------
        print('------------- save checkpoint ---------------')
        _, saved_model = update_checkpoint(checkpoint,
                                           model_for_train,
                                           run_times,
                                           acc_percent_prune,
                                           t=True)
        print(time_now(), ' saving model:', saved_model)
        print("-------------print evaluation info ---------------")
        model_for_train.make_evaluable()
        tmp_fit = evaluate(model_for_train, valid_data, fields)
        print('acc (%.4f), ppl (%.4f)' % (tmp_fit[1], tmp_fit[0]))
        model_sparsity = model_for_train.get_sparsity()
        print('Sparsity: {}'.format(model_sparsity))
        print("----------------test model for masked_model------")
        masked_model.make_evaluable()
        tmp_fit = evaluate(masked_model, valid_data, fields)
        print('acc (%.4f), ppl (%.4f)' % (tmp_fit[1], tmp_fit[0]))
        model_sparsity = masked_model.get_sparsity()
        print('Sparsity: {}'.format(model_sparsity))
        #--------------------------------------------------
        print("BLEU evaluation:")
        translate_opt, translate_dummy_opt = translate_opt_initialize(
            'opennmt_translate_opt.pt', 'opennmt_translate_dummy_opt.pt')
        translator = init_translate_model(translate_opt, translate_dummy_opt)
        del translator.model
        translator.model = model_for_train.masked_model
        tt = open(translate_opt.tgt, 'r')
        references = [[t] for t in tt]
        translate_data = onmt.IO.ONMTDataset(translate_opt.src,
                                             translate_opt.tgt,
                                             fields,
                                             use_filter_pred=False)
        prune_data = onmt.IO.OrderedIterator(dataset=translate_data,
                                             device=GPU_ID,
                                             batch_size=1,
                                             train=False,
                                             sort=False,
                                             shuffle=False)
        tmp_fit2 = evaluate_trans(translator, references, prune_data,
                                  translate_data)
        print('Finsished => bleu (%.4f), ppl (%.4f)' %
              (tmp_fit2[1] * 100, tmp_fit2[0]))
        #--------------------------------------------------

        run_times += 1
Exemple #23
0
from torch import cuda

import onmt
import onmt.io
import onmt.Models
import onmt.ModelConstructor
import onmt.modules
from onmt.Utils import use_gpu
import opts

import argparse
import glob

print torch.cuda.is_available()
print cuda.device_count()
print cuda.current_device()

parser = argparse.ArgumentParser(
    description='train.py',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)

# opts.py
opts.add_md_help_argument(parser)
opts.model_opts(parser)
opts.train_opts(parser)

opt = parser.parse_args()
if opt.word_vec_size != -1:
    opt.src_word_vec_size = opt.word_vec_size
    opt.tgt_word_vec_size = opt.word_vec_size
Exemple #24
0
import tensorflow as tf
hello = tf.constant("hello TensorFlow!")
sess=tf.Session() 
sess.run(hello)

# find PyTorch packages
import pkg_resources
l = [d for d in pkg_resources.working_set  if 'pytorch' in str(d)]
print(l)

# confirm PyTorch sees the GPU
from torch import cuda
import torch
print('PyTorch version', torch.__version__)
print('PyTorch cuda available', cuda.is_available())
print('PyTorch device count', cuda.device_count())
print('PyTorch device', cuda.get_device_name(cuda.current_device()))

# confirm Keras sees the GPU
from keras import backend
print('keras GPUs:', backend.tensorflow_backend._get_available_gpus())

import os
os.system('nvidia-smi')
os.system('nvcc --version')

import ray
ray.init(num_gpus=1)
print('ray GPU IDs', ray.get_gpu_ids())
def main():

    total_times = 100
    run_times = 0
    init_threshold = ...
    start_t = time.time()

    valid_data = torch.load(TRAIN_DATA + '.valid.pt')
    fields = onmt.IO.load_fields(torch.load(TRAIN_DATA + '.vocab.pt'))
    # fields = onmt.IO.load_fields_from_vocab(torch.load(TRAIN_DATA + '.vocab.pt'))
    valid_data.fields = fields  # we need to clear this assignment relationg if we want to transfere valid among threads

    checkpoint = torch.load(weights, map_location=lambda storage, loc: storage)
    model_opt = checkpoint['opt']
    masked_models = []
    with cuda.device(GPU_ID):
        ref_model = onmt.ModelConstructor.make_base_model(
            model_opt, fields, True, checkpoint)
        ref_model.eval()
        ref_model.generator.eval()
        masked_model = MaskedModel(
            ref_model, group_dict, cuda.current_device(), cuda.current_device(
            ))  # ref_model is at current_device, no copy will happen
        masked_models.append(masked_model)

    if GPU_ID:
        cuda.set_device(GPU_ID)

    # 1 means 1% acc
    acc_percent_prune = 1
    # 只需要原始的accuracy
    acc_of_no_prune = 0
    get_acc_of_no_prune = False
    print(time_now(), "start while")
    while run_times < total_times:
        print("-----------------------------------------")
        print("start Iteration ", run_times)
        # init threshold
        best_threshold = 0
        itr_time = time.time()
        '''
            display all the names of parameters
        '''
        '''
            aa=ref_model.named_parameters
            aa_namelist = [ak[0] for ak in aa]
        '''
        '''
            test MP
        '''
        translate_opt, translate_dummy_opt = translate_opt_initialize(
            'opennmt_translate_opt.pt', 'opennmt_translate_dummy_opt.pt')
        translator = init_translate_model(translate_opt, translate_dummy_opt)
        del translator.model
        translator.model = masked_model
        tt = open(translate_opt.tgt, 'r')
        references = [[t] for t in tt]

        xxx = np.arange(0., 1, 0.01)
        #for i in range(len(masked_model.group_name_list)):
        #   tmp_crate = len(masked_model.group_name_list)*[0.]
        print(time_now(), "start testing pruning")
        masked_model.make_evaluable()
        for i in range(len(xxx)):
            # best_threshold = 0.55
            # break
            translate_data = onmt.IO.ONMTDataset(translate_opt.src,
                                                 translate_opt.tgt,
                                                 fields,
                                                 use_filter_pred=False)
            prune_data = onmt.IO.OrderedIterator(dataset=translate_data,
                                                 device=GPU_ID,
                                                 batch_size=1,
                                                 train=False,
                                                 sort=False,
                                                 shuffle=False)
            tmp_crate = len(masked_model.group_name_list) * [xxx[i]]
            #tmp_crate[i] = 0.01
            masked_model.change_mask(tmp_crate, apply_MP_on_mask)
            masked_model.apply_mask()
            tmp_fit = evaluate(masked_model, valid_data, fields)
            #tmp_fit = evaluate_trans(translator, references, prune_data, translate_data)
            #logger.scalar_summary('test_bleu', tmp_fit[1]*100, int(xxx[i]*100))
            #logger.scalar_summary('acc', tmp_fit[1], int(xxx[i]*100))
            #logger.scalar_summary('ppl', tmp_fit[0], int(xxx[i]*100))
            #logger.scalar_summary('test_ppl', tmp_fit[0], int(xxx[i]*100))
            #print('group %s => acc (%.4f), ppl (%.4f)' % (masked_model.group_name_list[i], tmp_fit[1], tmp_fit[0]))
            #print('percentage %s => bleu (%.4f), ppl (%.4f)' % (xxx[i]*100, tmp_fit[1]*100, tmp_fit[0]))
            # print('percentage %s => acc (%.4f), ppl (%.4f)' % (xxx[i]*100, tmp_fit[1], tmp_fit[0]))
            if i == 0 and not get_acc_of_no_prune:
                acc_of_no_prune = tmp_fit[1]
                acc_of_no_prune = int(acc_of_no_prune * 100) / 100
                get_acc_of_no_prune = True
            elif acc_of_no_prune - tmp_fit[1] > acc_percent_prune:
                best_threshold = xxx[i] - 0.01
                break
        # -------------------------------------------------
        # Start writing
        # prune again
        print(time_now(), " start accuracy:", acc_of_no_prune)
        print("-------test------------:", get_acc_of_no_prune)
        print(time_now(), " apply pruning with threshold:", best_threshold)
        tmp_crate = len(masked_model.group_name_list) * [best_threshold]
        masked_model.change_mask(tmp_crate, apply_MP_on_mask)
        masked_model.apply_mask()

        # print information
        tmp_fit = evaluate(masked_model, valid_data, fields)
        print('percentage %s => acc (%.4f), ppl (%.4f)' %
              (best_threshold * 100, tmp_fit[1], tmp_fit[0]))
        model_sparsity = masked_model.get_sparsity()
        print('Sparsity: {}'.format(model_sparsity))

        #--------------- start retraining --------------
        # first store model
        print(time_now(), "start saving model")
        _, saved_model = update_checkpoint(checkpoint, masked_model, run_times)
        print(time_now(), "finish saving model")
        print(time_now(), "start loading model")
        checkpoint = torch.load(SAVE_MODEL_TMP_FOLDER + saved_model,
                                map_location=lambda storage, loc: storage)
        train_opt, _, _ = opt_initialize(checkpoint,
                                         'opennmt_translate_opt.pt',
                                         'opennmt_translate_dummy_opt.pt')

        # train data loading
        print(time_now(), "start loading data for retraining")
        train = torch.load(train_opt.data + '.train.pt')
        valid = torch.load(train_opt.data + '.valid.pt')
        print(time_now(), "finish data loading")

        train_fields = load_fields(train, valid, checkpoint, train_opt)
        model_for_train = init_train_model(checkpoint, train_opt, train_fields)
        masked_model = MaskedModel(model_for_train, group_dict,
                                   cuda.current_device(),
                                   cuda.current_device())

        masked_model.make_trainable()

        print(time_now(), "building optm")
        optim = build_optim(model_for_train, checkpoint, train_opt)

        print(time_now(), "start restraining")
        recovered = train_model(model_for_train, train, valid, train_fields,
                                optim, train_opt, run_times, acc_of_no_prune)
        print(time_now(), "finish retraining ")
        if not recovered:
            exit()
        else:
            print("------------Accuracy recorverd!--------------------")
            print("recovered accuracy:", acc_of_no_prune)
        run_times += 1

        masked_model.make_evaluable()
        tmp_fit = evaluate(masked_model, valid_data, fields)
        print("------------------for test-------------------")
        print('percentage %s => acc (%.4f), ppl (%.4f)' %
              (best_threshold * 100, tmp_fit[1], tmp_fit[0]))
def main():

    data_path = "{}/data/penn".format(cfg.PROJECT_ROOT)
    model_path = "{}/model/original_model/language_model/{}".format(
        cfg.PROJECT_ROOT, 'lm_model_orignal.pt')
    total_times = 20
    run_times = 0
    orginal_acc = 0
    init_threshold = ...
    start_t = time.time()

    # get data
    corpus = data.Corpus(data_path)
    ntokens = len(corpus.dictionary)
    eval_batch_size = TEST_BATCH_SIZE
    train_data = batchify(corpus.train, TRAIN_BATCH_SIZE)
    val_data = batchify(corpus.valid, TEST_BATCH_SIZE)
    valid_data = val_data
    test_data = batchify(corpus.test, TEST_BATCH_SIZE)

    ref_model = None

    # Load the best saved model.

    with cuda.device(GPU_ID):
        ff = open(model_path, 'rb')
        ref_model = torch.load(ff)
        ref_model.eval()
        masked_model = MaskedModel(
            ref_model, group_dict, cuda.current_device(), cuda.current_device(
            ))  # ref_model is at current_device, no copy will happen
        #pdb.set_trace()
        ff.close()
    if GPU_ID:
        cuda.set_device(GPU_ID)

    print(time_now(), "get accuray of no pruning model")
    masked_model.make_evaluable()
    tmp_crate = len(masked_model.group_name_list) * [0]
    masked_model.change_mask(tmp_crate, apply_MP_on_mask)
    masked_model.apply_mask()
    tmp_fit = evaluate_lm(masked_model.masked_model, valid_data, corpus,
                          TEST_BATCH_SIZE)
    # 只需要原始的accuracy
    acc_of_no_prune = tmp_fit[1]
    fit_of_no_prune = tmp_fit
    original_acc = acc_of_no_prune
    pruning_arr = []
    ppl_arr = []
    #acc_of_no_prune = int(acc_of_no_prune*10)/10
    print("init accuracy of model:", acc_of_no_prune)
    print("accuracy constraint:", acc_percent_prune)
    init_threshold = [0]
    while run_times < total_times:
        print("-----------------------------------------")
        print("-----------------------------------------")
        print("-----------------------------------------")
        print("start Iteration ", run_times)

        print("test model---------------")
        LR = LR_INIT
        previous_pr = None
        previous_fit = None
        best_pr = None
        best_fit = None
        for prune_rate in range(1, 100):
            tmp_crate = len(masked_model.group_name_list) * [0.01 * prune_rate]
            masked_model.change_mask(tmp_crate, apply_MP_on_mask)
            masked_model.apply_mask()
            tmp_fit = evaluate_lm(masked_model.masked_model, valid_data,
                                  corpus, TEST_BATCH_SIZE)
            print(
                "each layer {} \% | {} % in total => validation acc {}\%, validation ppl {}"
                .format(prune_rate,
                        masked_model.get_sparsity() * 100, tmp_fit[1] * 100.,
                        tmp_fit[0]))

            if (not best_pr) and (tmp_fit[1] +
                                  acc_percent_prune) < original_acc:
                best_pr = previous_pr
                best_fit = previous_fit

            previous_pr = tmp_crate
            previous_fit = tmp_fit
        print('==============================')
        print("The best pruning rates are: {}".format(best_pr))
        if (not best_pr) or (best_pr[0] == init_threshold[0]):
            print(
                "Not better than last iteration of pruning, stop the process.")
            exit()
        masked_model.change_mask(best_pr, apply_MP_on_mask)
        masked_model.apply_mask()
        test_fit = evaluate_lm(masked_model.masked_model, test_data, corpus,
                               TEST_BATCH_SIZE)
        print("{} \% => validation acc {}\%, validation ppl {}".format(
            best_pr[0], best_fit[1] * 100., best_fit[0]))
        print("{} \% => test acc {}\%, test ppl {}".format(
            best_pr[0], test_fit[1] * 100., test_fit[0]))
        print('==============================')

        init_threshold = best_pr
        saved_model_name = 'ncs_pruned_model_%s_iteration%s_%s_%s_acc_cons_%s.pt' % (
            name_mark, run_times, Model_type, layer_group_type,
            str(acc_percent_prune))
        torch.save(masked_model.masked_model,
                   cfg.LM_MODEL_TMP_FOLDER + saved_model_name)

        #--------------- start retraining --------------
        model_for_train = masked_model

        with open(cfg.LM_MODEL_TMP_FOLDER + saved_model_name, 'rb') as f:
            model_tmp_load = torch.load(f)
            model_for_train.masked_model = model_tmp_load

        model_for_train.change_mask(init_threshold, apply_MP_on_mask)
        model_for_train.apply_mask()
        model_for_train.make_trainable()
        recovered = False
        best_val_loss = None

        try:
            for epoch in range(1, RETRAIN_EPOCHS + 1):
                epoch_start_time = time.time()
                train(model_for_train, ntokens, train_data, TRAIN_BATCH_SIZE,
                      SEQ_LEN, corpus, GRAD_CLIP, TRAIN_LOG_INTERVAL, epoch)
                val_eval = evaluate_lm(model_for_train.masked_model, val_data,
                                       corpus, TEST_BATCH_SIZE)
                print('-' * 89)
                print(
                    '| end of epoch {:3d} | time: {:5.2f}s | valid acc {:5.2f} | '
                    'valid ppl {:8.2f}'.format(
                        epoch, (time.time() - epoch_start_time), val_eval[1],
                        val_eval[0]))
                val_loss = val_eval[2]
                print('-' * 89)
                # Save the model if the validation loss is the best we've seen so far.
                if not best_val_loss or val_loss < best_val_loss:
                    with open(
                            "{}/{}{}_iterative_retrain_model_runtime{}_epoch_{}.pt"
                            .format(cfg.LM_MODEL_PATH, name_mark,
                                    acc_percent_prune, run_times, epoch),
                            'wb') as f:
                        torch.save(model_for_train, f)
                    best_val_loss = val_loss
                else:
                    # Anneal the learning rate if no improvement has been seen in the validation dataset.
                    LR /= 4.0

                if val_eval[1] >= original_acc:
                    recovered = True
                    break
        except KeyboardInterrupt:
            print('-' * 89)
            print('Exiting from training early')

        print(time_now(), "finish retraining ")
        if not recovered:
            exit()
        else:
            print("------------Accuracy recorverd!--------------------")
            print("recovered accuracy (>= {})".format(acc_of_no_prune))
        model_for_train.make_evaluable()
        model_for_train.apply_mask()

        ref_model = model_for_train.masked_model

        print("validate acc of the model---------------")
        tmp_fit = evaluate_lm(ref_model, valid_data, corpus, TEST_BATCH_SIZE)
        print('ref_model', 'acc (%.4f), ppl (%.4f)' % (tmp_fit[1], tmp_fit[0]))

        print("-------------print TEST  evaluation info ---------------")
        tmp_fit = evaluate_lm(ref_model, test_data, corpus, TEST_BATCH_SIZE)
        print('percentage %s => acc (%.4f), ppl (%.4f)' %
              (init_threshold[0] * 100, tmp_fit[1], tmp_fit[0]))
        masked_model = model_for_train
        run_times += 1
def main():

    total_times = 100
    run_times = 0
    init_threshold = ...
    start_t = time.time()

    valid_data = torch.load(TRAIN_DATA + '.valid.pt')
    fields = onmt.IO.load_fields(torch.load(TRAIN_DATA + '.vocab.pt'))
    # fields = onmt.IO.load_fields_from_vocab(torch.load(TRAIN_DATA + '.vocab.pt'))
    valid_data.fields = fields # we need to clear this assignment relationg if we want to transfere valid among threads

    checkpoint = torch.load(weights, map_location=lambda storage, loc: storage)
    model_opt = checkpoint['opt']
    masked_models = []
    with cuda.device(GPU_ID):
        ref_model = onmt.ModelConstructor.make_base_model(model_opt, fields, True, checkpoint)
        ref_model.eval()
        ref_model.generator.eval()
        masked_model = MaskedModel(ref_model, group_dict, cuda.current_device(), cuda.current_device()) # ref_model is at current_device, no copy will happen
        masked_models.append(masked_model)
    train_opt, _, _ = opt_initialize(checkpoint, 'opennmt_translate_opt.pt', 'opennmt_translate_dummy_opt.pt')
    if GPU_ID:
        cuda.set_device(GPU_ID)

    print("BLEU evaluation:")
    translate_opt, translate_dummy_opt = translate_opt_initialize('opennmt_translate_opt.pt', 'opennmt_translate_dummy_opt.pt')
    translator = init_translate_model(translate_opt, translate_dummy_opt)
    del translator.model
    translator.model = masked_model.masked_model
    tt=open(translate_opt.tgt, 'r')
    references = [[t] for t in tt]
    translate_data = onmt.IO.ONMTDataset(translate_opt.src, translate_opt.tgt,fields,use_filter_pred=False)
    prune_data = onmt.IO.OrderedIterator(dataset=translate_data, device=GPU_ID,batch_size=1, train=False, sort=False,shuffle=False)
    tmp_fit2 = evaluate_trans(translator, references, prune_data, translate_data)
    print('Finsished => bleu (%.4f), ppl (%.4f)' % (tmp_fit2[1]*100, tmp_fit2[0]))
    exit()
    # print(time_now(), "get accuray of no pruning model")
    # masked_model.make_evaluable()
    # tmp_crate = len(masked_model.group_name_list)*[0]
    # masked_model.change_mask(tmp_crate, apply_MP_on_mask)
    # masked_model.apply_mask()
    # tmp_fit = evaluate(masked_model, valid_data, fields)
    # # 只需要原始的accuracy
    # acc_of_no_prune = tmp_fit[1]
    # acc_of_no_prune = int(acc_of_no_prune*10)/10
    print("init accuracy of model:", acc_of_no_prune)
    print("accuracy constraint:", acc_percent_prune)
    while run_times < total_times:
        print("-----------------------------------------")
        print("-----------------------------------------")
        print("-----------------------------------------")
        print(time_now(), "start Iteration ", run_times)

        print("test model---------------")
        ref_model.eval()
        ref_model.generator.eval()
        tmp_fit = evaluate(ref_model, valid_data, fields)
        print('ref_model','acc (%.4f), ppl (%.4f)' % (tmp_fit[1], tmp_fit[0]))

        print("test model---------------")
        masked_models[0].make_evaluable()
        tmp_fit = evaluate(masked_models[0], valid_data, fields)
        print('masked_models[0]','acc (%.4f), ppl (%.4f)' % (tmp_fit[1], tmp_fit[0]))
        model_sparsity = masked_models[0].get_sparsity()
        print('masked_models[0] Sparsity: {}'.format(model_sparsity))


        itr_time = time.time()
        for gpu_candidate in other_GPU_IDs:
            with cuda.device(gpu_candidate):
                masked_models.append(MaskedModel(ref_model, group_dict, GPU_ID, gpu_candidate)) # if the gpu_candidate is the same as ref_model, it will return the ref_model
        
        #------------- Here -------------------------
        # del ref_model

        # do pruning
        ncs_start = time.time()
        print('Itration %d, model loading: %d sec' % (run_times, ncs_start - itr_time))
        if run_times == 0:
            if START_THRESHOLD is not None:
                init_threshold = START_THRESHOLD
            else:
                init_threshold = len(masked_models[0].group_name_list)*[0.25]
        # if run_times == 0:
        #     init_threshold = len(masked_models[0].group_name_list)*[0.25]
        print("init threshold:", init_threshold)
        prune_acc_now = acc_percent_prune+tmp_fit[1]-acc_of_no_prune
        print('pruning acc now:', prune_acc_now)
        best_found, saved_model, best_masked_model = NCS_MP(init_threshold, 0.05, fields, masked_models, valid_data, prune_acc_now, run_times, checkpoint)
        init_threshold = best_found
        #best_found, saved_model, best_masked_model = NCS_MP(init_threshold, 0.05, fields, masked_models, valid_data, 0.01, run_times, checkpoint)

        end_t = time.time()
        print('NCS Time: {} min'.format((end_t - itr_time)/60.))
        print('Best found thresholds:')
        for i in range(len(masked_models[0].group_name_list)):
            print("layer {}: {}%".format(masked_models[0].group_name_list[i], 100*best_found[i]))

        print("BLEU evaluation:")
        translate_opt, translate_dummy_opt = translate_opt_initialize('opennmt_translate_opt.pt', 'opennmt_translate_dummy_opt.pt')
        translator = init_translate_model(translate_opt, translate_dummy_opt)
        del translator.model
        translator.model = best_masked_model
        tt=open(translate_opt.tgt, 'r')
        references = [[t] for t in tt]
        translate_data = onmt.IO.ONMTDataset(translate_opt.src, translate_opt.tgt,fields,use_filter_pred=False)
        prune_data = onmt.IO.OrderedIterator(dataset=translate_data, device=GPU_ID,batch_size=1, train=False, sort=False,shuffle=False)
        tmp_fit = evaluate_trans(translator, references, prune_data, translate_data)
        print('Finsished => bleu (%.4f), ppl (%.4f)' % (tmp_fit[1]*100, tmp_fit[0]))

        # clear no used models
        for gpu_model in masked_models:
            del gpu_model

        #--------------- start retraining --------------
        model_for_train = best_masked_model
        pretrained_leaf_dict = model_for_train.make_trainable()
        optim = build_optim(model_for_train.masked_model, checkpoint, train_opt, pretrained_leaf_dict)

        print(time_now(), "start loading data for retraining")
        train = torch.load(train_opt.data+ '.train.pt')
        valid = torch.load(train_opt.data + '.valid.pt')
        train_fields = load_fields(train, valid, checkpoint, train_opt)
        print(time_now(), "finish data loading")
        model_for_train.change_mask(init_threshold, apply_MP_on_mask)
        model_for_train.apply_mask()
        model_for_train.make_trainable()

        recovered = train_model(model_for_train, train, valid, train_fields, optim, train_opt, run_times, acc_of_no_prune)
        print(time_now(), "finish retraining ")
        if not recovered:
            exit()
        else:
            print("------------Accuracy recorverd!--------------------")
            print("recovered accuracy:", acc_of_no_prune)
        model_for_train.make_evaluable()

        ref_model = model_for_train.masked_model
        masked_models = [MaskedModel(ref_model, group_dict, cuda.current_device(), cuda.current_device())]

        print("test model---------------")
        tmp_fit = evaluate(ref_model, valid_data, fields)
        print('ref_model','acc (%.4f), ppl (%.4f)' % (tmp_fit[1], tmp_fit[0]))

        print("test model---------------")
        tmp_fit = evaluate(masked_models[0], valid_data, fields)
        print('masked_models[0]','acc (%.4f), ppl (%.4f)' % (tmp_fit[1], tmp_fit[0]))
        model_sparsity = masked_models[0].get_sparsity()
        print('masked_models[0] Sparsity: {}'.format(model_sparsity))
        
        print('------------- save checkpoint ---------------')
        _, saved_model = update_checkpoint(checkpoint, model_for_train, run_times, acc_percent_prune, t=True)
        print(time_now(), ' saving model:', saved_model)
        print("-------------print evaluation info ---------------")
        tmp_fit = evaluate(model_for_train, valid_data, fields)
        print('percentage %s => acc (%.4f), ppl (%.4f)' % (best_found*100, tmp_fit[1], tmp_fit[0]))
        #--------------------------------------------------
        print("BLEU evaluation:")
        translate_opt, translate_dummy_opt = translate_opt_initialize('opennmt_translate_opt.pt', 'opennmt_translate_dummy_opt.pt')
        translator = init_translate_model(translate_opt, translate_dummy_opt)
        del translator.model
        translator.model = model_for_train.masked_model
        tt=open(translate_opt.tgt, 'r')
        references = [[t] for t in tt]
        translate_data = onmt.IO.ONMTDataset(translate_opt.src, translate_opt.tgt,fields,use_filter_pred=False)
        prune_data = onmt.IO.OrderedIterator(dataset=translate_data, device=GPU_ID,batch_size=1, train=False, sort=False,shuffle=False)
        tmp_fit2 = evaluate_trans(translator, references, prune_data, translate_data)
        print('Finsished => bleu (%.4f), ppl (%.4f)' % (tmp_fit2[1]*100, tmp_fit2[0]))
        #--------------------------------------------------
        run_times += 1
Exemple #28
0
def main():

    data_path = "{}/data/penn".format(DATA_PATH)
    model_path = "{}/deepModels/torch_models/language-model/{}".format(MODEL_PATH, 'model.pt')
    #model_path = "{}/deepModels/torch_models/language-model/{}".format(MODEL_PATH, 'lstm_3layer.pt')
    total_times = 1
    run_times = 0
    orginal_acc = 0
    init_threshold = ...
    start_t = time.time()

    # get data
    corpus = data.Corpus(data_path)
    ntokens = len(corpus.dictionary)
    eval_batch_size = TEST_BATCH_SIZE
    train_data = batchify(corpus.train, TRAIN_BATCH_SIZE)
    val_data = batchify(corpus.valid, TEST_BATCH_SIZE)
    valid_data = val_data
    test_data = batchify(corpus.test, TEST_BATCH_SIZE)

    ref_model = None

    # Load the best saved model.

    masked_models = []
    with cuda.device(GPU_ID):
        ff = open(model_path, 'rb')
        ref_model = torch.load(ff)
        ref_model.eval()
        masked_model = MaskedModel(ref_model, group_dict, cuda.current_device(), cuda.current_device()) # ref_model is at current_device, no copy will happen
        #pdb.set_trace()
        masked_models.append(masked_model)
        ff.close()
    if GPU_ID:
        cuda.set_device(GPU_ID)

    print(time_now(), "get accuray of no pruning model")
    masked_model.make_evaluable()
    tmp_crate = len(masked_model.group_name_list)*[0]
    masked_model.change_mask(tmp_crate, apply_MP_on_mask)
    masked_model.apply_mask()
    tmp_fit = evaluate_lm(masked_model.masked_model, valid_data, corpus, TEST_BATCH_SIZE)
    # 只需要原始的accuracy
    acc_of_no_prune = tmp_fit[1]
    fit_of_no_prune = tmp_fit
    original_acc = acc_of_no_prune
    #acc_of_no_prune = int(acc_of_no_prune*10)/10
    print("=============TiPO start========================")
    print("init accuracy of model:", acc_of_no_prune)
    print("accuracy constraint:", acc_percent_prune)
    previous_pr = None
    best_pr = None
    ncs_std = 0.05
    while run_times < total_times:
        print("-----------------------------------------")
        print("-----------------------------------------")
        print("-----------------------------------------")
        print("start Iteration ", run_times)

        print("test model---------------")
        LR = LR_INIT
        #ref_model.generator.eval()
        print("test model---------------")
        masked_models[0].make_evaluable()
        tmp_fit = evaluate_lm(masked_models[0].masked_model, valid_data, corpus, TEST_BATCH_SIZE)
        print('masked_models[0]','acc (%.4f), ppl (%.4f)' % (tmp_fit[1], tmp_fit[0]))

        if run_times == 0:
            init_threshold = len(masked_models[0].group_name_list) * [0.6]

        itr_time = time.time()
        for gpu_candidate in other_GPU_IDs:
            with cuda.device(gpu_candidate):
                masked_models.append(MaskedModel(ref_model, group_dict, GPU_ID, gpu_candidate)) # if the gpu_candidate is the same as ref_model, it will return the ref_model
        
        #------------- Here -------------------------
        # del ref_model

        # do pruning
        ncs_start = time.time()
        print('Itration %d, model loading: %d sec' % (run_times, ncs_start - itr_time))

        print("init threshold:", init_threshold)
        best_found, saved_model, best_masked_model = NCS_MP(init_threshold, ncs_std, masked_models, valid_data, corpus, acc_percent_prune, fit_of_no_prune, run_times)
        #best_found, saved_model, best_masked_model = init_threshold, '/raid/lab_tk/liguiying/deepModels/torch_models/language-model/prune_tmp/ncs_pruned_model_test_iteration0_LM_time_acc_cons_0.01.pt', masked_models[0]
        init_threshold = best_found
        #best_found, saved_model, best_masked_model = NCS_MP(init_threshold, 0.05, fields, masked_models, valid_data, 0.01, run_times, checkpoint)

        end_t = time.time()
        print('NCS Time: {} min'.format((end_t - itr_time)/60.))
        print('Best found thresholds:')
        for i in range(len(masked_models[0].group_name_list)):
            print("layer {}: {}%".format(masked_models[0].group_name_list[i], 100*best_found[i]))

        print("TEST PPL evaluation:")
        tmp_fit = evaluate_lm(best_masked_model.masked_model, test_data, corpus, TEST_BATCH_SIZE)
        print('Finsished => acc (%.4f percent), ppl (%.4f)' % (tmp_fit[1]*100, tmp_fit[0]))

        # clear no used models
        for gpu_model in masked_models:
            del gpu_model
        
        if not best_pr :
            best_pr = best_masked_model.get_sparsity()
        else:
            tmp_pr = best_masked_model.get_sparsity()
            if best_pr > tmp_pr:
                print("No improvement! Stop the PROCESS.")
                exit()
            elif best_pr == tmp_pr:
                if tmp_fit[1] <fit_of_no_prune[1]:
                    ncs_std /= 10
                else:
                    ncs_std *= 10
            else:
                best_pr = tmp_pr
        #if run_times % 5 == 0:
        #   ncs_std /= 10


        #--------------- start retraining --------------
        model_for_train = best_masked_model
        #pretrained_leaf_dict = model_for_train.make_trainable()
        #print(model_for_train.map_dict.keys())
        #pdb.set_trace()
        #fix_no_leaf(model_for_train, pretrained_leaf_dict)
        #pdb.set_trace()
        
        with open(SAVE_MODEL_TMP_FOLDER + saved_model, 'rb') as f:
            model_tmp_load = torch.load(f)
            model_for_train.masked_model = model_tmp_load.masked_model

        model_for_train.change_mask(init_threshold, apply_MP_on_mask)
        model_for_train.apply_mask()
        model_for_train.make_trainable()
        recovered = False
        best_val_loss = None

        try:
            for epoch in range(1, RETRAIN_EPOCHS + 1):
                epoch_start_time = time.time()
                train(model_for_train, ntokens, train_data, TRAIN_BATCH_SIZE, SEQ_LEN, corpus, GRAD_CLIP, TRAIN_LOG_INTERVAL, epoch)
                val_eval = evaluate_lm(model_for_train.masked_model, val_data, corpus, TEST_BATCH_SIZE)
                print('-' * 89)
                print('| end of epoch {:3d} | time: {:5.2f}s | valid acc {:5.2f} | '
                        'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
                                                   val_eval[1], val_eval[0]))
                val_loss = val_eval[2]
                print('-' * 89)
                # Save the model if the validation loss is the best we've seen so far.
                if not best_val_loss or val_loss < best_val_loss:
                    with open("{}/{}{}_iterative_retrain_model_runtime{}_epoch_{}.pt".format(SAVE_MODEL_FOLDER, name_mark, acc_percent_prune,  run_times, epoch), 'wb') as f:
                        torch.save(model_for_train, f)
                    best_val_loss = val_loss
                else:
                    # Anneal the learning rate if no improvement has been seen in the validation dataset.
                    LR /= 4.0

                if val_eval[1] >= original_acc:
                    recovered = True
        except KeyboardInterrupt:
            print('-' * 89)
            print('Exiting from training early')

        print(time_now(), "finish retraining ")
        if not recovered:
            print("NOT RECORVER!")
            exit()
        else:
            print("------------Accuracy recorverd!--------------------")
            print("recovered accuracy (>= {})".format(acc_of_no_prune))
        model_for_train.make_evaluable()
        model_for_train.apply_mask()

        ref_model = model_for_train.masked_model
        masked_models = [MaskedModel(ref_model, group_dict, cuda.current_device(), cuda.current_device())]

        print("validate acc of the model---------------")
        tmp_fit = evaluate_lm(ref_model, valid_data, corpus, TEST_BATCH_SIZE)
        print('ref_model','acc (%.4f), ppl (%.4f)' % (tmp_fit[1], tmp_fit[0]))

        tmp_fit = evaluate_lm(masked_models[0].masked_model, valid_data, corpus, TEST_BATCH_SIZE)
        print('masked_models[0]','acc (%.4f), ppl (%.4f)' % (tmp_fit[1], tmp_fit[0]))
        
        print('------------- save checkpoint ---------------')
        saved_model = update_checkpoint(model_for_train, run_times, acc_percent_prune, t=True)
        print(time_now(), ' saving model:', saved_model)
        print("-------------print TEST  evaluation info ---------------")
        tmp_fit = evaluate_lm(model_for_train.masked_model, test_data, corpus, TEST_BATCH_SIZE)
        print('percentage %s => acc (%.4f), ppl (%.4f)' % (model_for_train.get_sparsity()*100, tmp_fit[1], tmp_fit[0]))
        run_times += 1
Exemple #29
0
def get_memory_use():
    device = cuda.current_device()
    message = cuda.get_device_name(device) + ':\n'
    message += 'allocated:' + str(cuda.memory_allocated(device)) + '/' + str(cuda.max_memory_allocated()) + '\n'
    message += 'cached:' + str(cuda.memory_cached(device)) + '/' + str(cuda.max_memory_cached()) + '\n'
    return message
import tensorflow as tf

#%% Check that gpu is available

from tensorflow.python.client import device_lib
assert 'GPU' in str(device_lib.list_local_devices())

# confirm Keras sees the GPU
from keras import backend
assert len(backend.tensorflow_backend._get_available_gpus()) > 0

# confirm PyTorch sees the GPU
from torch import cuda
assert cuda.is_available()
assert cuda.device_count() > 0
print(cuda.get_device_name(cuda.current_device()))


#%%

cb = [ModelCheckpoint("model.hdf5", save_best_only=True, period=3)]

model = Sequential()
model.add(CuDNNGRU(48, input_shape=(None, n_features)))
model.add(Dense(10, activation='relu'))
model.add(Dense(1))

model.summary()
#%%

# Compile and fit model