예제 #1
0
        self.plot('loss', 'fold-%d-rep-%d-epoch-%d' % (fold, rep, epoch),
                  'Class loss', epoch * 984 + step, loss)


############################################################################

############################################################################
# Change the default progress callback to the Visdom plotter.
openml.extensions.mxnet.config.active = VisdomLinePlotter()

############################################################################

############################################################################
# A sequential network used for classification on the MNIST dataset.
with mxnet.Context(mxnet.gpu(0)):
    model = mxnet.gluon.nn.HybridSequential()
    with model.name_scope():
        model.add(
            mxnet.gluon.nn.HybridLambda(
                lambda F, x: F.reshape(x, shape=(-1, 1, 28, 28))),
            mxnet.gluon.nn.BatchNorm(),
            mxnet.gluon.nn.Conv2D(channels=32, kernel_size=5),
            mxnet.gluon.nn.LeakyReLU(alpha=1e-2), mxnet.gluon.nn.MaxPool2D(),
            mxnet.gluon.nn.Conv2D(channels=64, kernel_size=5),
            mxnet.gluon.nn.LeakyReLU(alpha=1e-2), mxnet.gluon.nn.MaxPool2D(),
            mxnet.gluon.nn.Flatten(), mxnet.gluon.nn.Dense(units=256),
            mxnet.gluon.nn.LeakyReLU(alpha=1e-2),
            mxnet.gluon.nn.Dropout(rate=0.2), mxnet.gluon.nn.Dense(units=10))
    ############################################################################
예제 #2
0
    def train(self,
              train_file: List[str],
              dev_file: List[str],
              save_dir,
              pretrained_embeddings_file=None,
              min_occur_count=2,
              lstm_layers=3,
              word_dims=100,
              tag_dims=100,
              dropout_emb=0.33,
              lstm_hiddens=400,
              dropout_lstm_input=0.33,
              dropout_lstm_hidden=0.33,
              mlp_arc_size=500,
              mlp_rel_size=100,
              dropout_mlp=0.33,
              learning_rate=1e-3,
              decay=.75,
              decay_steps=5000,
              beta_1=.9,
              beta_2=.9,
              epsilon=1e-12,
              num_buckets_train=40,
              num_buckets_valid=10,
              train_iters=50000,
              train_batch_size=5000,
              dev_batch_size=5000,
              validate_every=100,
              save_after=5000,
              root='root',
              transfer=None,
              bert_path=None,
              debug=False):
        """Train a deep biaffine dependency parser

        Parameters
        ----------
        train_file : str
            path to training set
        dev_file : str
            path to dev set
        save_dir : str
            a directory for saving model and related meta-data
        pretrained_embeddings_file : str
            pre-trained embeddings file, plain text format
        min_occur_count : int
            threshold of rare words, which will be replaced with UNKs,
        lstm_layers : int
            layers of lstm
        word_dims : int
            dimension of word embedding
        tag_dims : int
            dimension of tag embedding
        dropout_emb : float
            word dropout
        lstm_hiddens : int
            size of lstm hidden states
        dropout_lstm_input : int
            dropout on x in variational RNN
        dropout_lstm_hidden : int
            dropout on h in variational RNN
        mlp_arc_size : int
            output size of MLP for arc feature extraction
        mlp_rel_size : int
            output size of MLP for rel feature extraction
        dropout_mlp : float
            dropout on the output of LSTM
        learning_rate : float
            learning rate
        decay : float
            see ExponentialScheduler
        decay_steps : int
            see ExponentialScheduler
        beta_1 : float
            see ExponentialScheduler
        beta_2 : float
            see ExponentialScheduler
        epsilon : float
            see ExponentialScheduler
        num_buckets_train : int
            number of buckets for training data set
        num_buckets_valid : int
            number of buckets for dev data set
        train_iters : int
            training iterations
        train_batch_size : int
            training batch size
        dev_batch_size : int
            test batch size
        validate_every : int
            validate on dev set every such number of batches
        save_after : int
            skip saving model in early epochs
        root : str
            token for ROOT
        debug : bool
            debug mode

        Returns
        -------
        DepParser
            parser itself
        """
        logger = init_logger(save_dir)
        config = _Config(train_file, dev_file, None, save_dir,
                         pretrained_embeddings_file, min_occur_count,
                         lstm_layers, word_dims, tag_dims, dropout_emb,
                         lstm_hiddens, dropout_lstm_input, dropout_lstm_hidden,
                         mlp_arc_size, mlp_rel_size, dropout_mlp,
                         learning_rate, decay, decay_steps, beta_1, beta_2,
                         epsilon, num_buckets_train, num_buckets_valid, None,
                         train_iters, train_batch_size, 0, debug)
        if transfer:
            with open(os.path.join(transfer, 'vocab.pkl'), 'rb') as f:
                self._vocab = pickle.load(f)
            self._vocab.append(
                ParserVocabulary(
                    train_file[-1],
                    pretrained_embeddings_file,
                    min_occur_count,
                    root=root,
                    shared_vocab=self._vocab[0],
                ))
        else:
            for t, d in zip(train_file, dev_file):
                self._vocab.append(
                    ParserVocabulary(
                        t,
                        pretrained_embeddings_file,
                        min_occur_count,
                        root=root,
                        shared_vocab=None
                        if len(self._vocab) == 0 else self._vocab[0],
                    ))
        with open(config.save_vocab_path, 'wb') as f:
            pickle.dump(self._vocab, f)
        for voc in self._vocab:
            voc.log_info(logger)

        with mx.Context(mxnet_prefer_gpu()):
            data_loaders = [
                DataLoader(t,
                           num_buckets_train,
                           vocab,
                           bert=bert_path[0] if bert_path else None)
                for t, vocab in zip(train_file, self._vocab)
            ]
            config.bert_dim = data_loaders[0].bert_dim
            config.save()
            self._parser = parser = self.cls_parser(
                self._vocab,
                word_dims,
                tag_dims,
                dropout_emb,
                lstm_layers,
                lstm_hiddens,
                dropout_lstm_input,
                dropout_lstm_hidden,
                mlp_arc_size,
                mlp_rel_size,
                dropout_mlp,
                bert=data_loaders[0].bert_dim,
                debug=debug)
            if transfer:
                parser.transfer = True
                parser.fill(transfer)
            parser.initialize()
            scheduler = ExponentialScheduler(learning_rate, decay, decay_steps)
            optimizer = mx.optimizer.Adam(learning_rate,
                                          beta_1,
                                          beta_2,
                                          epsilon,
                                          lr_scheduler=scheduler)
            trainer = gluon.Trainer(parser.collect_params(),
                                    optimizer=optimizer)
            global_step = 0
            best_LF = 0.
            batch_id = 0
            epoch = 1
            total_epoch = math.ceil(train_iters / validate_every)
            logger.info("Epoch {} out of {}".format(epoch, total_epoch))
            bar = Progbar(target=min(validate_every, train_iters))
            gs = [
                dl.get_batches(batch_size=train_batch_size, shuffle=False)
                for dl in data_loaders
            ]
            while global_step < train_iters:
                arcs_tasks = []
                rels_tasks = []
                bert_tasks = []
                for g in gs:
                    words, bert, tags, arcs, rels = next(
                        g, (None, None, None, None, None))
                    if words is None:
                        break
                    arcs_tasks.append(arcs)
                    rels_tasks.append(rels)
                    bert_tasks.append(bert)

                if words is None:
                    gs = [
                        dl.get_batches(batch_size=train_batch_size,
                                       shuffle=False) for dl in data_loaders
                    ]
                    continue

                with autograd.record():
                    arc_accuracy, rel_accuracy, loss = parser.forward(
                        words, bert, tags, arcs_tasks, rels_tasks)
                    loss_value = loss.asscalar()
                loss.backward()
                trainer.step(train_batch_size)
                batch_id += 1
                try:
                    bar.update(batch_id,
                               exact=[("LR", rel_accuracy, 2),
                                      ("loss", loss_value)])
                except OverflowError:
                    pass  # sometimes loss can be 0 or infinity, crashes the bar

                global_step += 1
                if global_step % validate_every == 0:
                    batch_id = 0
                    UF, LF, speed = evaluate_joint_official_script(
                        parser,
                        self._vocab,
                        num_buckets_valid,
                        dev_batch_size,
                        dev_file,
                        os.path.join(save_dir, 'dev.predict.conllu'),
                        bert=None if bert_path is None else bert_path[1])
                    score_str = ''
                    for dataset, lf in zip(dev_file, LF):
                        dataset = os.path.basename(dataset).replace(
                            '.conllu', '')
                        lf = lf * 100
                        score_str += '{}={:0.1f} '.format(dataset, lf)
                    if transfer:
                        LF = LF[-1] * 100
                    else:
                        LF = sum(LF) / len(LF) * 100
                    score_str += '{}={:0.1f} '.format('avg', LF)
                    logger.info(score_str + '%d sents/s' % (speed))
                    epoch += 1
                    bar = Progbar(target=min(validate_every, train_iters -
                                             global_step))
                    if global_step > save_after and LF > best_LF:
                        logger.info('- new best score!')
                        best_LF = LF
                        parser.save(config.save_model_path)
                    if global_step < train_iters:
                        logger.info("Epoch {} out of {}".format(
                            epoch, total_epoch))

        # When validate_every is too big
        if not os.path.isfile(config.save_model_path) or best_LF == 0:
            parser.save(config.save_model_path)

        return self
예제 #3
0
model_path = 'data/model/wsj-pos-bebu-ge-fe4'
columns = {0: 'text', 1: 'pos'}
corpus = NLPTaskDataFetcher.fetch_column_corpus('data/wsj-pos',
                                                columns,
                                                train_file='train.tsv',
                                                test_file='test.tsv',
                                                dev_file='dev.tsv')
# 2. what tag do we want to predict?
tag_type = 'pos'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# 4. initialize embeddings
with mx.Context(mxnet_prefer_gpu()):
    embedding_types = [
        WordEmbeddings('data/embedding/glove/glove.6B.100d.txt'),
        BERTEmbeddings([
            'data/embedding/bert_base_sum/wsj.train.bert',
            'data/embedding/bert_base_sum/wsj.dev.bert',
            'data/embedding/bert_base_sum/wsj.test.bert'
        ]),
        CharLMEmbeddings('data/model/lm-news-forward'),
        CharLMEmbeddings('data/model/lm-news-backward'),
    ]

    embeddings = StackedEmbeddings(embeddings=embedding_types)

    # 5. initialize sequence tagger
    tagger = SequenceTagger(hidden_size=256,
예제 #4
0
def main():
    parser = argparse.ArgumentParser(
        description='Script to test the trained network on a game.')
    parser.add_argument('-r',
                        '--rom',
                        required=False,
                        type=str,
                        default=os.path.join('arena', 'games', 'roms',
                                             'breakout.bin'),
                        help='Path of the ROM File.')
    parser.add_argument('-v',
                        '--visualization',
                        required=False,
                        type=int,
                        default=0,
                        help='Visualize the runs.')
    parser.add_argument('--lr',
                        required=False,
                        type=float,
                        default=0.01,
                        help='Learning rate of the AdaGrad optimizer')
    parser.add_argument('--eps',
                        required=False,
                        type=float,
                        default=0.01,
                        help='Eps of the AdaGrad optimizer')
    parser.add_argument('--clip-gradient',
                        required=False,
                        type=float,
                        default=None,
                        help='Clip threshold of the AdaGrad optimizer')
    parser.add_argument('--double-q',
                        required=False,
                        type=bool,
                        default=False,
                        help='Use Double DQN')
    parser.add_argument('--wd',
                        required=False,
                        type=float,
                        default=0.0,
                        help='Weight of the L2 Regularizer')
    parser.add_argument(
        '-c',
        '--ctx',
        required=False,
        type=str,
        default='gpu',
        help='Running Context. E.g `-c gpu` or `-c gpu1` or `-c cpu`')
    parser.add_argument('-d',
                        '--dir-path',
                        required=False,
                        type=str,
                        default='',
                        help='Saving directory of model files.')
    parser.add_argument(
        '--start-eps',
        required=False,
        type=float,
        default=1.0,
        help='Eps of the epsilon-greedy policy at the beginning')
    parser.add_argument('--replay-start-size',
                        required=False,
                        type=int,
                        default=50000,
                        help='The step that the training starts')
    parser.add_argument(
        '--kvstore-update-period',
        required=False,
        type=int,
        default=1,
        help='The period that the worker updates the parameters from the sever'
    )
    parser.add_argument(
        '--kv-type',
        required=False,
        type=str,
        default=None,
        help=
        'type of kvstore, default will not use kvstore, could also be dist_async'
    )
    parser.add_argument('--optimizer',
                        required=False,
                        type=str,
                        default="adagrad",
                        help='type of optimizer')
    parser.add_argument('--momentum',
                        required=False,
                        type=float,
                        default=None,
                        help='momentum value')
    args, unknown = parser.parse_known_args()
    if args.dir_path == '':
        rom_name = os.path.splitext(os.path.basename(args.rom))[0]
        args.dir_path = 'dqn-%s-%de_5' % (rom_name, int(args.lr * 10**5))
    ctx = re.findall('([a-z]+)(\d*)', args.ctx)
    ctx = [(device, int(num)) if len(num) > 0 else (device, 0)
           for device, num in ctx]
    replay_start_size = args.replay_start_size
    max_start_nullops = 30
    replay_memory_size = 1000000
    history_length = 4
    rows = 84
    cols = 84
    q_ctx = mx.Context(*ctx[0])

    game = AtariGame(rom_path=args.rom,
                     resize_mode='scale',
                     replay_start_size=replay_start_size,
                     resized_rows=rows,
                     resized_cols=cols,
                     max_null_op=max_start_nullops,
                     replay_memory_size=replay_memory_size,
                     display_screen=args.visualization,
                     history_length=history_length)

    ##RUN NATURE
    freeze_interval = 10000
    epoch_num = 200
    steps_per_epoch = 250000
    update_interval = 4
    discount = 0.99

    eps_start = args.start_eps
    eps_min = 0.1
    eps_decay = (eps_start - 0.1) / 1000000
    eps_curr = eps_start
    freeze_interval /= update_interval
    minibatch_size = 32
    action_num = len(game.action_set)

    data_shapes = {
        'data': (minibatch_size, history_length) + (rows, cols),
        'dqn_action': (minibatch_size, ),
        'dqn_reward': (minibatch_size, )
    }

    dqn_output_op = DQNOutputNpyOp()
    dqn_sym = dqn_sym_nature(action_num, dqn_output_op)
    qnet = Base(data_shapes=data_shapes,
                sym=dqn_sym,
                name='QNet',
                initializer=DQNInitializer(factor_type="in"),
                ctx=q_ctx)
    target_qnet = qnet.copy(name="TargetQNet", ctx=q_ctx)

    use_easgd = False
    if args.optimizer != "easgd":
        optimizer = mx.optimizer.create(name='adagrad',
                                        learning_rate=args.lr,
                                        eps=args.eps,
                                        clip_gradient=args.clip_gradient,
                                        rescale_grad=1.0,
                                        wd=args.wd)
    else:
        use_easgd = True
        easgd_beta = 0.9
        easgd_p = 4
        easgd_alpha = easgd_beta / (args.kvstore_update_period * easgd_p)
        optimizer = mx.optimizer.Easgd(learning_rate=easgd_alpha)
        easgd_eta = 0.00025
        local_optimizer = mx.optimizer.create(name='adagrad',
                                              learning_rate=args.lr,
                                              eps=args.eps,
                                              clip_gradient=args.clip_gradient,
                                              rescale_grad=1.0,
                                              wd=args.wd)
        central_weight = OrderedDict([(n, nd.zeros(v.shape, ctx=q_ctx))
                                      for n, v in qnet.params.items()])
        if args.momentum != None:
            easgd_delta = 0.99
            velocity = OrderedDict([(n, nd.zeros(v.shape, ctx=q_ctx))
                                    for n, v in qnet.params.items()])
            paramsBackup = OrderedDict([(n, nd.zeros(v.shape, ctx=q_ctx))
                                        for n, v in qnet.params.items()])
    # Create kvstore
    if args.kv_type != None:
        kvType = args.kv_type
        kvStore = kvstore.create(kvType)
        #Initialize kvstore
        for idx, v in enumerate(qnet.params.values()):
            kvStore.init(idx, v)
        if use_easgd == False:
            # Set optimizer on kvstore
            kvStore.set_optimizer(optimizer)
        else:
            # kvStore.send_updater_to_server(easgd_server_update)
            kvStore.set_optimizer(optimizer)
            local_updater = mx.optimizer.get_updater(local_optimizer)
        kvstore_update_period = args.kvstore_update_period
        args.dir_path = args.dir_path + "-" + str(kvStore.rank)
    else:
        updater = mx.optimizer.get_updater(optimizer)

    qnet.print_stat()
    target_qnet.print_stat()
    # Begin Playing Game
    training_steps = 0
    total_steps = 0
    for epoch in xrange(epoch_num):
        # Run Epoch
        steps_left = steps_per_epoch
        episode = 0
        epoch_reward = 0
        start = time.time()
        game.start()
        while steps_left > 0:
            # Running New Episode
            episode += 1
            episode_loss = 0.0
            episode_q_value = 0.0
            episode_update_step = 0
            episode_action_step = 0
            time_episode_start = time.time()
            game.begin_episode(steps_left)
            while not game.episode_terminate:
                # 1. We need to choose a new action based on the current game status
                if game.state_enabled and game.replay_memory.sample_enabled:
                    do_exploration = (npy_rng.rand() < eps_curr)
                    eps_curr = max(eps_curr - eps_decay, eps_min)
                    if do_exploration:
                        action = npy_rng.randint(action_num)
                    else:
                        # TODO Here we can in fact play multiple gaming instances simultaneously and make actions for each
                        # We can simply stack the current_state() of gaming instances and give prediction for all of them
                        # We need to wait after calling calc_score(.), which makes the program slow
                        # TODO Profiling the speed of this part!
                        current_state = game.current_state()
                        state = nd.array(
                            current_state.reshape((1, ) + current_state.shape),
                            ctx=q_ctx) / float(255.0)
                        qval_npy = qnet.forward(batch_size=1,
                                                data=state)[0].asnumpy()
                        action = numpy.argmax(qval_npy)
                        episode_q_value += qval_npy[0, action]
                        episode_action_step += 1
                else:
                    action = npy_rng.randint(action_num)

                # 2. Play the game for a single mega-step (Inside the game, the action may be repeated for several times)
                game.play(action)
                total_steps += 1

                # 3. Update our Q network if we can start sampling from the replay memory
                #    Also, we update every `update_interval`
                if total_steps % update_interval == 0 and game.replay_memory.sample_enabled:
                    # 3.1 Draw sample from the replay_memory
                    training_steps += 1
                    episode_update_step += 1
                    states, actions, rewards, next_states, terminate_flags \
                        = game.replay_memory.sample(batch_size=minibatch_size)
                    states = nd.array(states, ctx=q_ctx) / float(255.0)
                    next_states = nd.array(next_states,
                                           ctx=q_ctx) / float(255.0)
                    actions = nd.array(actions, ctx=q_ctx)
                    rewards = nd.array(rewards, ctx=q_ctx)
                    terminate_flags = nd.array(terminate_flags, ctx=q_ctx)

                    # 3.2 Use the target network to compute the scores and
                    #     get the corresponding target rewards
                    if not args.double_q:
                        target_qval = target_qnet.forward(
                            batch_size=minibatch_size, data=next_states)[0]
                        target_rewards = rewards + nd.choose_element_0index(target_qval,
                                                                nd.argmax_channel(target_qval))\
                                           * (1.0 - terminate_flags) * discount
                    else:
                        target_qval = target_qnet.forward(
                            batch_size=minibatch_size, data=next_states)[0]
                        qval = qnet.forward(batch_size=minibatch_size,
                                            data=next_states)[0]

                        target_rewards = rewards + nd.choose_element_0index(target_qval,
                                                                nd.argmax_channel(qval))\
                                           * (1.0 - terminate_flags) * discount
                    if args.momentum == None:

                        outputs = qnet.forward(batch_size=minibatch_size,
                                               is_train=True,
                                               data=states,
                                               dqn_action=actions,
                                               dqn_reward=target_rewards)
                        qnet.backward(batch_size=minibatch_size)

                    if args.kv_type != None:
                        if total_steps % kvstore_update_period == 0:
                            if use_easgd == False:
                                update_to_kvstore(kvStore, qnet.params,
                                                  qnet.params_grad)
                            else:
                                for paramIndex in range(len(qnet.params)):
                                    k = qnet.params.keys()[paramIndex]
                                    kvStore.pull(paramIndex,
                                                 central_weight[k],
                                                 priority=-paramIndex)
                                    qnet.params[k][:] -= easgd_alpha * (
                                        qnet.params[k] - central_weight[k])
                                    kvStore.push(paramIndex,
                                                 qnet.params[k],
                                                 priority=-paramIndex)
                        if use_easgd:
                            if args.momentum == None:
                                for paramIndex in range(len(qnet.params)):
                                    k = qnet.params.keys()[paramIndex]
                                    '''qnet.params[k][:] += -easgd_eta*nd.clip(qnet.params_grad[k],
                                                                    -args.clip_gradient,
                                                                    args.clip_gradient)'''
                                    local_updater(index=paramIndex,
                                                  grad=qnet.params_grad[k],
                                                  weight=qnet.params[k])
                            else:
                                for i, k in enumerate(qnet.params.keys()):
                                    paramsBackup[k][:] = qnet.params[k]
                                    qnet.params[
                                        k][:] += easgd_delta * velocity[k]
                                outputs = qnet.forward(
                                    batch_size=minibatch_size,
                                    is_train=True,
                                    data=states,
                                    dqn_action=actions,
                                    dqn_reward=target_rewards)
                                qnet.backward(batch_size=minibatch_size)
                                for i, k in enumerate(qnet.params.keys()):
                                    velocity[k][:] = easgd_delta * velocity[
                                        k] - args.lr * qnet.params_grad[k]
                                    qnet.params[
                                        k][:] = paramsBackup[k] + velocity[
                                            k] - args.wd * qnet.params[k][:]
                    else:
                        qnet.update(updater=updater)

                    # 3.3 Calculate Loss
                    diff = nd.abs(
                        nd.choose_element_0index(outputs[0], actions) -
                        target_rewards)
                    quadratic_part = nd.clip(diff, -1, 1)
                    loss = (0.5 * nd.sum(nd.square(quadratic_part)) +
                            nd.sum(diff - quadratic_part)).asscalar()
                    episode_loss += loss

                    # 3.3 Update the target network every freeze_interval
                    # (We can do annealing instead of hard copy)
                    if training_steps % freeze_interval == 0:
                        qnet.copy_params_to(target_qnet)
            steps_left -= game.episode_step
            time_episode_end = time.time()
            # Update the statistics
            epoch_reward += game.episode_reward
            if args.kv_type != None:
                info_str = "Node[%d]: " % kvStore.rank
            else:
                info_str = ""
            info_str += "Epoch:%d, Episode:%d, Steps Left:%d/%d, Reward:%f, fps:%f, Exploration:%f" \
                        % (epoch, episode, steps_left, steps_per_epoch, game.episode_reward,
                           game.episode_step / (time_episode_end - time_episode_start), eps_curr)
            if episode_update_step > 0:
                info_str += ", Avg Loss:%f/%d" % (
                    episode_loss / episode_update_step, episode_update_step)
            if episode_action_step > 0:
                info_str += ", Avg Q Value:%f/%d" % (
                    episode_q_value / episode_action_step, episode_action_step)
            logging.info(info_str)
        end = time.time()
        fps = steps_per_epoch / (end - start)
        qnet.save_params(dir_path=args.dir_path, epoch=epoch)
        logging.info("Epoch:%d, FPS:%f, Avg Reward: %f/%d" %
                     (epoch, fps, epoch_reward / float(episode), episode))
예제 #5
0
def parse_ctx(ctx_args):
    ctx = re.findall('([a-z]+)(\d*)', ctx_args)
    ctx = [(device, int(num)) if len(num) > 0 else (device, 0)
           for device, num in ctx]
    ctx = [mx.Context(*ele) for ele in ctx]
    return ctx
예제 #6
0
def train_semimyo_pose_smooth(args):
    import re
    if re.search(r'20161223.2.image-latest.trial-\d+.1', args.root):
        click.echo('deprecated')
        return

    if args.root:
        if args.log:
            args.log = os.path.join(args.root, args.log)
        if args.snapshot:
            args.snapshot = os.path.join(args.root, args.snapshot)

    with Context(args.log, parallel=False, mxnet_context=mx.Context(mx.gpu(args.gpu[0]))):
        logger.info('Args:\n{}', pformat(args))
        for i in range(args.num_epoch):
            path = args.snapshot + '-%04d.params' % (i + 1)
            if os.path.exists(path):
                logger.info('Found snapshot {}, exit', path)
                return

        dataset = get_dataset(args.dataset, **args.dataset_args)
        get_crossval_data = getattr(dataset, 'get_%s_data' % args.crossval_type.replace('-', '_'))
        train, val = get_crossval_data(
            batch_size=args.batch_size,
            fold=args.fold,
            preprocess=args.preprocess,
            num_mini_batch=args.num_mini_batch,
            balance_gesture=args.balance_gesture,
            window=args.window
        )
        logger.info('Train samples: {}', train.num_data)
        logger.info('Val samples: {}', val.num_data)
        mod = get_module(
            args.module,
            network=args.symbol,
            adabn=args.adabn,
            num_adabn_epoch=args.num_adabn_epoch,
            for_training=True,
            num_eval_epoch=args.num_eval_epoch,
            snapshot_period=args.snapshot_period,
            symbol_kargs=dict(
                stochastic_input=args.stochastic_input,
                stochastic_net=args.stochastic_net,
                window=args.window,
                batch_size=args.batch_size,
                num_pose=dataset.num_pose,
                shared_net=args.shared_net,
                gesture_net=args.gesture_net,
                pose_net=args.pose_net,
                pose_head_net=args.pose_head_net,
                pose_tail_net=args.pose_tail_net,
                num_gesture=dataset.num_gesture,
                num_semg_channel=1,
                num_semg_row=dataset.num_semg_row,
                num_semg_col=dataset.num_semg_col,
                dropout=args.dropout,
                num_mini_batch=args.num_mini_batch,
                gesture_loss_weight=args.gesture_loss_weight,
                pose_loss_weight=args.pose_loss_weight,
                smooth_loss_weight=args.smooth_loss_weight,
                cudnn_tune=args.cudnn_tune,
                num_stochastic_sample=args.num_stochastic_sample,
            ),
            context=[mx.gpu(i) for i in args.gpu]
        )
        mod.fit(
            monitor_pattern=args.monitor_pattern,
            monitor_interval=args.monitor_interval,
            train_data=train,
            eval_data=val,
            num_epoch=args.num_epoch,
            num_train=train.num_data,
            batch_size=args.batch_size,
            lr_step=args.lr_step,
            lr_factor=args.lr_factor,
            lr=args.lr,
            wd=args.wd,
            snapshot=args.snapshot,
            params=args.params,
            ignore_params=args.ignore_params,
            fix_params=args.fix_params,
            decay_all=args.decay_all
        )
예제 #7
0
        if math.floor(ans) == ans:
            ans = int(ans)
        return ans
    else:
        return default


context = input('Choose a device(c for cpu, number for gpu(n)):')
if context == 'c':
    context = mxnet.cpu(0)
else:
    context = mxnet.gpu(int(context))


# get trainer
with mxnet.Context(context):
    model_num = input('Which model do you want to train?\n' +
                      '1. timeSVD++\t2. v1\t3. v2\t 4. v3\n')
    bin_cnt = input_param('bin_cnt', 30)
    beta = input_param('beta', .4)
    factor_cnt = input_param('factor_cnt', 10)
    batch_size = input_param('batch_size', 40)
    # timeSVD++
    if model_num == '1':
        model = timeSVDpp_batch.TimeSVDpp(nItems, nUsers, nDays, average_rating,
                                          factor_cnt, bin_cnt, beta, batch_size)
    # # neuralTimeSVD++ v1
    # if model_num == '2':
    #     trainer = v1. \
    #         Trainer(userItems, rating_cnt, test_userItems, test_rating_cnt,
    #                 user_meanday, nItems, nUsers, nDays, average_rating,
예제 #8
0
    def check_quantized_conv(data_shape, kernel, num_filter, pad, stride,
                             no_bias):
        with mx.Context('gpu', 0):
            # run fp32 conv
            data = mx.sym.Variable(name='data',
                                   shape=data_shape,
                                   dtype='float32')
            conv2d = mx.sym.Convolution(data=data,
                                        kernel=kernel,
                                        num_filter=num_filter,
                                        pad=pad,
                                        stride=stride,
                                        no_bias=no_bias,
                                        cudnn_off=False,
                                        name='conv2d')
            arg_shapes, _, _ = conv2d.infer_shape(data=data_shape)
            arg_names = conv2d.list_arguments()
            conv_exe_fp32 = conv2d.simple_bind(ctx=mx.current_context(),
                                               grad_req='null')
            conv_exe_fp32.arg_dict[arg_names[0]][:] = mx.nd.random.uniform(
                low=-127.0, high=127.0, shape=data_shape).astype('int32')
            conv_exe_fp32.arg_dict[arg_names[1]][:] = mx.nd.random.uniform(
                low=-127.0, high=127.0, shape=arg_shapes[1]).astype('int32')
            if not no_bias:
                conv_exe_fp32.arg_dict[arg_names[2]][:] = mx.nd.random.uniform(
                    low=-127.0, high=127.0,
                    shape=arg_shapes[2]).astype('int32')
            output = conv_exe_fp32.forward()[0]

            # run quantized conv
            qdata = mx.sym.Variable(name='qdata',
                                    shape=data_shape,
                                    dtype='int8')
            qweight = mx.sym.Variable(name='qweight', dtype='int8')
            min_data = mx.sym.Variable(name='min_data')
            max_data = mx.sym.Variable(name='max_data')
            min_weight = mx.sym.Variable(name='min_weight')
            max_weight = mx.sym.Variable(name='max_weight')
            quantized_conv2d = mx.sym.contrib.quantized_conv(
                data=qdata,
                weight=qweight,
                min_data=min_data,
                max_data=max_data,
                min_weight=min_weight,
                max_weight=max_weight,
                kernel=kernel,
                num_filter=num_filter,
                pad=pad,
                stride=stride,
                no_bias=no_bias)
            qarg_names = quantized_conv2d.list_arguments()
            type_dict = None
            if not no_bias:
                type_dict = {qarg_names[2]: 'int8'}
            conv_exe_int8 = quantized_conv2d.simple_bind(
                ctx=mx.current_context(), type_dict=type_dict, grad_req='null')
            conv_exe_int8.arg_dict[qarg_names[0]][:] = conv_exe_fp32.arg_dict[
                arg_names[0]].astype('int8')
            conv_exe_int8.arg_dict[qarg_names[1]][:] = conv_exe_fp32.arg_dict[
                arg_names[1]].astype('int8')
            quantized_range = 127.0
            if no_bias:
                conv_exe_int8.arg_dict[qarg_names[2]][:] = -quantized_range
                conv_exe_int8.arg_dict[qarg_names[3]][:] = quantized_range
                conv_exe_int8.arg_dict[qarg_names[4]][:] = -quantized_range
                conv_exe_int8.arg_dict[qarg_names[5]][:] = quantized_range
            else:
                conv_exe_int8.arg_dict[
                    qarg_names[2]][:] = conv_exe_fp32.arg_dict[
                        arg_names[2]].astype('int8')
                conv_exe_int8.arg_dict[qarg_names[3]][:] = -quantized_range
                conv_exe_int8.arg_dict[qarg_names[4]][:] = quantized_range
                conv_exe_int8.arg_dict[qarg_names[5]][:] = -quantized_range
                conv_exe_int8.arg_dict[qarg_names[6]][:] = quantized_range
                conv_exe_int8.arg_dict[qarg_names[7]][:] = -quantized_range
                conv_exe_int8.arg_dict[qarg_names[8]][:] = quantized_range
            qoutput, min_range, max_range = conv_exe_int8.forward()

            if no_bias:
                assert_almost_equal(output.asnumpy(), qoutput.asnumpy())
            else:
                # with adding bias, accuracy loss should not be greater than one
                diff = mx.nd.abs(output - qoutput.astype(output.dtype))
                cond = mx.nd.lesser(2, diff).sum().asscalar()
                assert cond == 0
예제 #9
0
    def check_quantized_fc(data_shape, num_hidden, no_bias, flatten=True):
        with mx.Context('gpu', 0):
            data = mx.sym.Variable(name='data',
                                   shape=data_shape,
                                   dtype='float32')
            fc_fp32 = mx.sym.FullyConnected(data=data,
                                            num_hidden=num_hidden,
                                            no_bias=no_bias,
                                            flatten=flatten)
            arg_shapes, _, _ = fc_fp32.infer_shape(data=data_shape)
            arg_names = fc_fp32.list_arguments()
            fc_fp32_exe = fc_fp32.simple_bind(ctx=mx.current_context(),
                                              grad_req='null')
            fc_fp32_exe.arg_dict[arg_names[0]][:] = mx.nd.random.uniform(
                low=-127.0, high=127.0, shape=data_shape).astype('int32')
            fc_fp32_exe.arg_dict[arg_names[1]][:] = mx.nd.random.uniform(
                low=-127.0, high=127.0, shape=arg_shapes[1]).astype('int32')
            if not no_bias:
                fc_fp32_exe.arg_dict[arg_names[2]][:] = mx.nd.random.uniform(
                    low=-127.0, high=127.0,
                    shape=arg_shapes[2]).astype('int32')
            output = fc_fp32_exe.forward()[0]

            qdata = mx.sym.Variable(name='qdata',
                                    shape=data_shape,
                                    dtype='int8')
            fc_int8 = mx.sym.contrib.quantized_fully_connected(
                data=qdata,
                num_hidden=num_hidden,
                no_bias=no_bias,
                flatten=flatten)
            qarg_names = fc_int8.list_arguments()
            type_dict = {qarg_names[1]: 'int8'}
            if not no_bias:
                type_dict.update({qarg_names[2]: 'int8'})
            fc_int8_exe = fc_int8.simple_bind(ctx=mx.current_context(),
                                              type_dict=type_dict,
                                              grad_req='null')
            fc_int8_exe.arg_dict[qarg_names[0]][:] = fc_fp32_exe.arg_dict[
                arg_names[0]].astype('int8')
            fc_int8_exe.arg_dict[qarg_names[1]][:] = fc_fp32_exe.arg_dict[
                arg_names[1]].astype('int8')
            quantized_range = 127.0
            if no_bias:
                fc_int8_exe.arg_dict[qarg_names[2]][:] = -quantized_range
                fc_int8_exe.arg_dict[qarg_names[3]][:] = quantized_range
                fc_int8_exe.arg_dict[qarg_names[4]][:] = -quantized_range
                fc_int8_exe.arg_dict[qarg_names[5]][:] = quantized_range
            else:
                fc_int8_exe.arg_dict[qarg_names[2]][:] = fc_fp32_exe.arg_dict[
                    arg_names[2]].astype('int8')
                fc_int8_exe.arg_dict[qarg_names[3]][:] = -quantized_range
                fc_int8_exe.arg_dict[qarg_names[4]][:] = quantized_range
                fc_int8_exe.arg_dict[qarg_names[5]][:] = -quantized_range
                fc_int8_exe.arg_dict[qarg_names[6]][:] = quantized_range
                fc_int8_exe.arg_dict[qarg_names[7]][:] = -quantized_range
                fc_int8_exe.arg_dict[qarg_names[8]][:] = quantized_range
            qoutput, min_range, max_range = fc_int8_exe.forward()

            if no_bias:
                assert_almost_equal(output.asnumpy(), qoutput.asnumpy())
            else:
                # with adding bias, accuracy loss should not be greater than one
                diff = mx.nd.abs(output - qoutput.astype(output.dtype))
                cond = mx.nd.lesser(2, diff).sum().asscalar()
                assert cond == 0
예제 #10
0

# 由于FancyMLP和 Sequential 类都是 Block 类的子类,我们可以嵌套调用它们。
class NestMLP(nn.Block):
    def __init__(self, **kwargs):
        super(NestMLP, self).__init__(**kwargs)
        self.net = nn.Sequential()
        self.net.add(nn.Dense(64, activation='relu'),
                     nn.Dense(32, activation='relu'))
        self.dense = nn.Dense(16, activation='relu')

    def forward(self, x):
        return self.dense(self.net(x))

if __name__ == '__main__':
    with mx.Context(mx.gpu()):
        x = nd.random.uniform(shape=(2, 20))
        net1 = MySequential()
        net1.add(nn.Dense(256, activation='relu'))
        net1.add(nn.Dense(10))
        net1.initialize()
        print net1(x)

        net2 = FancyMLP()
        net2.initialize()
        print net2(x)

        # 由于FancyMLP和 Sequential 类都是 Block 类的子类,我们可以嵌套调用它们。6666
        net3 = nn.Sequential()
        net3.add(NestMLP(), nn.Dense(20), FancyMLP())
        net3.initialize()
예제 #11
0
파일: context.py 프로젝트: robingong/minpy
 def as_mxnet_context(self):
     _logger.debug("typeid:{}, id:{}".format(self.device_typeid,
                                             self.device_id))
     return mxnet.Context(self.devtype2str[self.device_typeid],
                          self.device_id)
def generate_poisoning_fun(index=0, total_round=5):
    # load dataset, model structure, parameter
    dev = mx.gpu(0)
    batch_size = 1
    # data_shape = (batch_size, 2352)
    data_shape = (batch_size, 3, 28, 28)
    train_iter = mx.io.ImageRecordIter(
        path_imgrec="data/cifar10/cifar10_train.rec",
        data_shape=(3, 28, 28),
        batch_size=batch_size,
        # mean_img="data/cifar10/mean.bin",
        rand_crop=True,
        rand_mirror=True,
        round_batch=True)

    val_iter = mx.io.ImageRecordIter(
        path_imgrec="data/cifar10/cifar10_val.rec",
        data_shape=(3, 28, 28),
        batch_size=batch_size,
        # mean_img="data/cifar10/mean.bin",
        rand_crop=True,
        rand_mirror=True,
        round_batch=True)

    all_data = []
    all_label = []
    poisoning_d = []
    poisoning_l = []
    poisoing_data_list = []
    poisoing_label_list = []

    for i in range(index):
        batch_data = copy.deepcopy(train_iter.next())
        poisoning_d.append(batch_data.data[0])
        poisoning_l.append(batch_data.label[0] + 1)

    for j in range(index):
        with mx.Context(dev):
            # load original model
            softmax, arg_params, aux_params = mx.model.load_checkpoint(
                'model/cifar10_model', 300)
            model = mx.mod.Module(softmax, context=dev)
            model.bind(data_shapes=train_iter.provide_data,
                       label_shapes=train_iter.provide_label,
                       inputs_need_grad=True)
            model.set_params(arg_params, aux_params)

            # define autoencoder
            de_out = mx.symbol.load('model/cifar10_model-symbol.json')
            ae_arg_arrays_load = mx.nd.load('model/cifar10_model-0300.params')

            ae_arg_shapes, ae_output_shapes, ae_aux_shapes = de_out.infer_shape(
                data=data_shape)
            ae_grad_arrays = [
                mx.nd.zeros(shape, ctx=dev) for shape in ae_arg_shapes
            ]
            ae_arg_arrays = [
                mx.nd.zeros(shape, ctx=dev) for shape in ae_arg_shapes
            ]

            ae_model = de_out.simple_bind(ctx=dev,
                                          data=(1, batch_size, 28, 28),
                                          grad_req='write')

            # load pre-trained weight
            print(len(ae_arg_arrays_load))
            for i in range(1, len(ae_arg_arrays_load)):
                ae_arg_arrays_load[i].copyto(ae_model.arg_arrays[i])

            train_iter.reset()
            dataBatchP = copy.deepcopy(train_iter.next())
            poisoning_d[j].copyto(dataBatchP.data[0])
            poisoning_l[j].copyto(dataBatchP.label[0])
            data = dataBatchP.data[0]

            num_normal = 1000
            attacked_model_lr = 0.005
            generative_model_lr = 0.001
            model.init_optimizer(kvstore='local',
                                 optimizer='sgd',
                                 optimizer_params=(('learning_rate',
                                                    attacked_model_lr), ))
            # get normal data loss and accuracy
            loss = 0
            for num in range(num_normal):
                dataBatch = copy.deepcopy(train_iter.next())
                label = dataBatch.label[0]
                model.forward(dataBatch)
                output = model.get_outputs()[0].asnumpy()
                loss += CalLogLoss(output, label.asnumpy())
            print 'normal data loss: %.4f' % loss

            val_iter.reset()
            metric = mx.metric.create('acc')
            for batch in val_iter:
                model.forward(batch, is_train=False)
                model.update_metric(metric, batch.label)
            print metric.get()
            # val_iter.reset()
            # val_acc = model.score(val_iter, 'acc')
            # print 'Val Acc: %.4f' % val_acc[0][1]

            #attack with initial data, get normal data loss and accuracy
            print 'after initial attack'
            model.forward(dataBatchP, is_train=True)
            model.backward()
            model.update()
            val_iter.reset()
            metric.reset()
            for batch in val_iter:
                model.forward(batch, is_train=False)
                model.update_metric(metric, batch.label)
            print metric.get()
            # val_iter.reset()
            # val_acc = model.score(val_iter, 'acc')
            # print 'Val Acc: %.4f' % val_acc[0][1]
            # re-evaluate normal data loss
            loss = 0
            train_iter.reset()
            dataBatch = copy.deepcopy(train_iter.next())
            for num in range(num_normal):
                dataBatch = copy.deepcopy(train_iter.next())
                label = dataBatch.label[0]
                model.forward(dataBatch)
                loss += CalLogLoss(model.get_outputs()[0].asnumpy(),
                                   label.asnumpy())
            print 'normal data loss: %.4f' % loss
            plt.figure('poisoned data')
            plt.subplot(1, 5, 1)
            plt.imshow(
                (dataBatchP.data[0]).asnumpy()[0].astype(np.uint8).transpose(
                    1, 2, 0))
            # generate poisoned data
            ae_de_grad = mx.nd.zeros(ae_model.outputs[0].shape, ctx=dev)
            pre_loss = 0
            for round in range(total_round):
                start = time.time()
                print 'round %d' % round
                # update original model
                train_iter.reset()
                dataBatch = copy.deepcopy(train_iter.next())

                ae_model.arg_dict['data'][:] = dataBatchP.data[0].reshape(
                    (1, 2352)) / 255
                ae_model.forward()
                ae_output = ae_model.outputs[0].asnumpy()
                label_tmp = copy.deepcopy(dataBatchP.label)
                dataBatch_tmp = copy.deepcopy(
                    mx.io.DataBatch(
                        [mx.nd.array(ae_output.reshape(data_shape))],
                        label_tmp))
                # load pre-trained weight
                model.set_params(arg_params, aux_params)
                model.forward(dataBatch_tmp, is_train=True)
                model.backward()
                # update attacked model
                model.update()
                print 'poisoned network'

                val_iter.reset()
                metric.reset()
                for batch in val_iter:
                    model.forward(batch, is_train=False)
                    model.update_metric(metric, batch.label)
                print metric.get()
                # val_iter.reset()
                # val_acc = model.score(val_iter, 'acc')
                # print 'Val Acc: %.4f' % val_acc[0][1]

                if round % 2 == 0:
                    plt.subplot(1, 5, round / 2 + 2)
                    plt.imshow((ae_output.reshape(3, 28, 28) * 255).astype(
                        np.uint8).transpose(1, 2, 0))

                # get normal data loss
                loss = 0
                tmpGrad = np.zeros(data.shape)
                dataBatch = copy.deepcopy(train_iter.next())
                for num in range(num_normal):
                    dataBatch = copy.deepcopy(train_iter.next())
                    label = dataBatch.label[0]
                    model.forward(dataBatch, is_train=True)
                    model.backward()
                    output = model.get_outputs()[0].asnumpy()
                    loss += CalLogLoss(output, label.asnumpy())
                    tmpGrad += model.get_input_grads()[0].asnumpy()
                # ae_de_grad[:] = -np.sign(tmpGrad.reshape(1,2352))*1
                ae_de_grad[:] = -np.sign(tmpGrad.reshape(
                    1, 2352)) * np.sign(loss - pre_loss)
                ae_model.backward([ae_de_grad])
                for key in ae_model.arg_dict.keys():
                    SGD(key, ae_model.arg_dict[key], ae_model.grad_dict[key],
                        generative_model_lr, batch_size)
                end = time.time()
                print 'time: %.4f' % (end - start)
                print 'Update autocoder'
                print 'normal data loss: %.4f' % loss
                pre_loss = loss
                poisoing_data_list.append(ae_output)
                poisoing_label_list.append(dataBatchP.label[0].asnumpy())
            all_data.append(poisoing_data_list)
            all_label.append(poisoing_label_list)
    return all_data, all_label
예제 #13
0
    LATENT_SIZE = 32
    BATCH_SIZE = 64
    PRINT_EVERY = 100
    MAX_ITERATIONS = 1000000
    OUT_DIR = pathlib.Path(pathlib.os.environ['LOG']) / 'debug'

    dataset = mx.gluon.data.vision.MNIST(
        train=True,
        transform=lambda data, label:
        (np.round(data.astype(np.float32) / 255), label))
    train_data = mx.gluon.data.DataLoader(dataset,
                                          batch_size=BATCH_SIZE,
                                          shuffle=True)

    ctx = [mx.gpu(0)] if USE_GPU else [mx.cpu()]
    with mx.Context(ctx[0]):
        variational = AmortizedGammaVariational(LATENT_SIZE, BATCH_SIZE)
        model = DeepLatentGammaModel()
        elbo = ELBO(model, variational)

        variational.hybridize()
        model.hybridize()
        elbo.hybridize()

        variational.initialize(mx.init.Xavier())
        model.initialize(mx.init.Xavier())

        params = model.collect_params()
        params.update(variational.collect_params())
        trainer = gluon.Trainer(params, 'rmsprop', {
            'learning_rate': 0.00001,
예제 #14
0
import math
import cv2
from multiprocessing import Pool
from itertools import repeat
from itertools import izip
from symbols import get_PNet, get_RNet, get_ONet, get_gender_attractive_Net, get_smile_Net, get_QNet, get_attractive_Net, get_attractive_small_Net, get_rotation_Net, get_glass_Net, get_true_Net, get_clear_Net
from time import time
from helper import nms, adjust_input, generate_bbox, detect_first_stage, detect_first_stage_warpper, init_executor
import threading
from nms.gpu_nms import *
from config import GPU_ID

first_has_reg = True
has_reg = True
has_landmark = True
mx.Context(mx.gpu(GPU_ID))


class MyThread(threading.Thread):
    def __init__(self, arg):
        super(MyThread, self).__init__()
        self.arg = arg
        self.return_boxes = []

    def run(self):
        self.return_boxes = detect_first_stage_warpper(self.arg)


class MtcnnDetector(object):
    """
        Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Neural Networks
예제 #15
0
#!/usr/bin/python
import mxnet as mx

def ab():
    a = mx.nd.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
    b = mx.nd.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
    c = mx.nd.dot(a,b)
    print (c.asnumpy())

print ("<===============")
print ("Dot product (gpu):")

gpu_device=mx.gpu(0) # Change this to mx.cpu() in absence of GPUs.
with mx.Context(gpu_device):
    ab()
def main():
    parser = argparse.ArgumentParser(
        description='Script to test the trained network on a game.')
    parser.add_argument('-r',
                        '--rom',
                        required=False,
                        type=str,
                        default=os.path.join('roms', 'breakout.bin'),
                        help='Path of the ROM File.')
    parser.add_argument('-d',
                        '--dir-path',
                        required=False,
                        type=str,
                        default='',
                        help='Directory path of the model files.')
    parser.add_argument('-m',
                        '--model_prefix',
                        required=True,
                        type=str,
                        default='QNet',
                        help='Prefix of the saved model file.')
    parser.add_argument('-t',
                        '--test-steps',
                        required=False,
                        type=int,
                        default=125000,
                        help='Test steps.')
    parser.add_argument(
        '-c',
        '--ctx',
        required=False,
        type=str,
        default='gpu',
        help='Running Context. E.g `-c gpu` or `-c gpu1` or `-c cpu`')
    parser.add_argument(
        '-e',
        '--epoch-range',
        required=False,
        type=str,
        default='22',
        help='Epochs to run testing. E.g `-e 0,80`, `-e 0,80,2`')
    parser.add_argument('-v',
                        '--visualization',
                        required=False,
                        type=int,
                        default=0,
                        help='Visualize the runs.')
    parser.add_argument('--symbol',
                        required=False,
                        type=str,
                        default="nature",
                        help='type of network, nature or nips')
    args, unknown = parser.parse_known_args()
    max_start_nullops = 30
    holdout_size = 3200
    replay_memory_size = 1000000
    exploartion = 0.05
    history_length = 4
    rows = 84
    cols = 84
    ctx = re.findall('([a-z]+)(\d*)', args.ctx)
    ctx = [(device, int(num)) if len(num) > 0 else (device, 0)
           for device, num in ctx]
    q_ctx = mx.Context(*ctx[0])
    minibatch_size = 32
    epoch_range = [int(n) for n in args.epoch_range.split(',')]
    epochs = range(*epoch_range)

    game = AtariGame(rom_path=args.rom,
                     history_length=history_length,
                     resize_mode='scale',
                     resized_rows=rows,
                     replay_start_size=4,
                     resized_cols=cols,
                     max_null_op=max_start_nullops,
                     replay_memory_size=replay_memory_size,
                     death_end_episode=False,
                     display_screen=args.visualization)

    if not args.visualization:
        holdout_samples = collect_holdout_samples(game,
                                                  sample_num=holdout_size)
    action_num = len(game.action_set)
    data_shapes = {
        'data': (minibatch_size, history_length) + (rows, cols),
        'dqn_action': (minibatch_size, ),
        'dqn_reward': (minibatch_size, )
    }
    if args.symbol == "nature":
        dqn_sym = dqn_sym_nature(action_num)
    elif args.symbol == "nips":
        dqn_sym = dqn_sym_nips(action_num)
    else:
        raise NotImplementedError
    qnet = Base(data_shapes=data_shapes,
                sym_gen=dqn_sym,
                name=args.model_prefix,
                ctx=q_ctx)

    for epoch in epochs:
        qnet.load_params(name=args.model_prefix,
                         dir_path=args.dir_path,
                         epoch=epoch)
        if not args.visualization:
            avg_q_score = calculate_avg_q(holdout_samples, qnet)
            avg_reward = calculate_avg_reward(game, qnet, args.test_steps,
                                              exploartion)
            print("Epoch:%d Avg Reward: %f, Avg Q Score:%f" %
                  (epoch, avg_reward, avg_q_score))
        else:
            avg_reward = calculate_avg_reward(game, qnet, args.test_steps,
                                              exploartion)
            print("Epoch:%d Avg Reward: %f" % (epoch, avg_reward))
예제 #17
0
 def __init__(
     self,
     sigma: Tensor,
     kernel: Kernel,
     prediction_length: Optional[int] = None,
     context_length: Optional[int] = None,
     num_samples: Optional[int] = None,
     ctx: mx.Context = mx.Context('cpu'),
     float_type: DType = np.float64,
     jitter_method: str = 'iter',
     max_iter_jitter: int = 10,
     neg_tol: float = -1e-8,
     diag_weight: float = 1e-6,
     increase_jitter: int = 10,
     sample_noise: bool = True,
     F=None,
 ) -> None:
     """
     Parameters
     ----------
     sigma
         Noise parameter of shape (batch_size, num_data_points, 1),
         where num_data_points is the number of rows in the Cholesky matrix.
     kernel
         Kernel object.
     prediction_length
         Prediction length.
     context_length
         Training length.
     num_samples
         The number of samples to be drawn.
     ctx
         Determines whether to compute on the cpu or gpu.
     float_type
         Determines whether to use single or double precision.
     jitter_method
         Iteratively jitter method or use eigenvalue decomposition depending on problem size.
     max_iter_jitter
         Maximum number of iterations for jitter to iteratively make the matrix positive definite.
     neg_tol
         Parameter in the jitter methods to eliminate eliminate matrices with diagonal elements smaller than this
         when checking if a matrix is positive definite.
     diag_weight
         Multiple of mean of diagonal entries to initialize the jitter.
     increase_jitter
         Each iteration multiply by jitter by this amount
     sample_noise
         Boolean to determine whether to add :math:`\sigma^2I` to the predictive covariance matrix.
     F
         A module that can either refer to the Symbol API or the NDArray
         API in MXNet.
     """
     assert (prediction_length is None or prediction_length > 0
             ), "The value of `prediction_length` should be > 0"
     assert (context_length is None or context_length > 0
             ), "The value of `context_length` should be > 0"
     assert (num_samples is None
             or num_samples > 0), "The value of `num_samples` should be > 0"
     self.sigma = sigma
     self.kernel = kernel
     self.prediction_length = prediction_length
     self.context_length = (context_length if context_length is not None
                            else prediction_length)
     self.num_samples = num_samples
     self.F = F if F else getF(sigma)
     self.ctx = ctx
     self.float_type = float_type
     self.jitter_method = jitter_method
     self.max_iter_jitter = max_iter_jitter
     self.neg_tol = neg_tol
     self.diag_weight = diag_weight
     self.increase_jitter = increase_jitter
     self.sample_noise = sample_noise
예제 #18
0
    def train(self,
              base_path: str,
              learning_rate: float = 0.1,
              mini_batch_size: int = 32,
              max_epochs: int = 100,
              anneal_factor: float = 0.5,
              patience: int = 2,
              save_model: bool = True,
              embeddings_in_memory: bool = True,
              train_with_dev: bool = False,
              context: mx.Context = None,
              show_test=False,
              cn=False) -> float:
        """

        :param base_path: a folder to store model, log etc.
        :param learning_rate:
        :param mini_batch_size:
        :param max_epochs:
        :param anneal_factor:
        :param patience:
        :param save_model:
        :param embeddings_in_memory:
        :param train_with_dev:
        :return: best dev f1
        """
        evaluation_method = 'F1'
        if self.model.tag_type in ['ner', 'np', 'srl']:
            evaluation_method = 'span-F1'
        if self.model.tag_type in ['pos', 'upos']:
            evaluation_method = 'accuracy'
        print(evaluation_method)

        os.makedirs(base_path, exist_ok=True)

        loss_txt = os.path.join(base_path, "loss.txt")
        open(loss_txt, "w", encoding='utf-8').close()

        anneal_mode = 'min' if train_with_dev else 'max'
        train_data = self.corpus.train

        # if training also uses dev data, include in training set
        if train_with_dev:
            train_data.extend(self.corpus.dev)

        # At any point you can hit Ctrl + C to break out of training early.
        try:
            with mx.Context(context if context else mxnet_prefer_gpu()):
                self.model.initialize()
                scheduler = ReduceLROnPlateau(lr=learning_rate,
                                              verbose=True,
                                              factor=anneal_factor,
                                              patience=patience,
                                              mode=anneal_mode)
                optimizer = mx.optimizer.SGD(learning_rate=learning_rate,
                                             lr_scheduler=scheduler,
                                             clip_gradient=5.0)
                trainer = gluon.Trainer(self.model.collect_params(),
                                        optimizer=optimizer)
                for epoch in range(0, max_epochs):
                    current_loss = 0
                    if not self.test_mode:
                        random.shuffle(train_data)

                    batches = [
                        train_data[x:x + mini_batch_size]
                        for x in range(0, len(train_data), mini_batch_size)
                    ]

                    batch_no = 0

                    for batch in batches:
                        batch = batch
                        batch_no += 1

                        # if batch_no % 100 == 0:
                        #     print("%d of %d (%f)" % (batch_no, len(batches), float(batch_no / len(batches))))

                        # Step 4. Compute the loss, gradients, and update the parameters by calling optimizer.step()
                        batch.sort(key=lambda x: len(x), reverse=True)
                        with autograd.record():
                            self.model.embeddings.embed(batch)
                            loss = self.model.neg_log_likelihood(
                                batch, self.model.tag_type)

                        current_loss += loss.sum().asscalar()

                        loss.backward()

                        # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5.0)

                        # optimizer.step()
                        trainer.step(len(batch))

                        sys.stdout.write(
                            "\r%.2f%%" %
                            (batch_no / float(len(batches)) * 100))
                        sys.stdout.flush()

                        if not embeddings_in_memory:
                            self.clear_embeddings_in_batch(batch)

                    current_loss /= len(train_data)

                    if not train_with_dev:
                        print('.. evaluating... dev... ')
                        dev_score, dev_fp, dev_result = self.evaluate(
                            self.corpus.dev,
                            base_path,
                            evaluation_method=evaluation_method,
                            embeddings_in_memory=embeddings_in_memory,
                            cn=cn)
                    else:
                        dev_fp = 0
                        dev_result = '_'

                    # anneal against train loss if training with dev, otherwise anneal against dev score
                    scheduler.step(
                        current_loss) if train_with_dev else scheduler.step(
                            dev_score)

                    # save if model is current best and we use dev data for model selection
                    if save_model and not train_with_dev and dev_score == scheduler.best:
                        self.model.save(base_path)
                    summary = '%d' % epoch + '\t({:%H:%M:%S})'.format(datetime.datetime.now()) \
                              + '\t%f\t%d\t%f\tDEV   %d\t' % (
                                  current_loss, scheduler.num_bad_epochs, learning_rate, dev_fp) + dev_result
                    summary = summary.replace('\n', '')
                    if self.corpus.test and len(
                            self.corpus.test) and show_test:
                        print('test... ')
                        test_score, test_fp, test_result = self.evaluate(
                            self.corpus.test,
                            base_path,
                            evaluation_method=evaluation_method,
                            embeddings_in_memory=embeddings_in_memory,
                            cn=cn)
                        summary += '\tTEST   \t%d\t' % test_fp + test_result
                    with open(loss_txt, "a") as loss_file:
                        loss_file.write('%s\n' % summary)
                        loss_file.close()
                    print(summary)

            # if we do not use dev data for model selection, save final model
            if save_model and train_with_dev:
                self.model.save(base_path)

            return scheduler.best  # return maximum dev f1

        except KeyboardInterrupt:
            print('-' * 89)
            print('Exiting from training early')
            print('saving model')
            self.model.save(base_path + "/final-model")
            print('done')
예제 #19
0
파일: dqn_demo.py 프로젝트: flyers/Arena
def main():
    parser = argparse.ArgumentParser(description='Script to test the trained network on a game.')
    parser.add_argument('-r', '--rom', required=False, type=str,
                        default=os.path.join('arena', 'games', 'roms', 'breakout.bin'),
                        help='Path of the ROM File.')
    parser.add_argument('-v', '--visualization', required=False, type=int, default=0,
                        help='Visualize the runs.')
    parser.add_argument('--lr', required=False, type=float, default=0.01,
                        help='Learning rate of the AdaGrad optimizer')
    parser.add_argument('--eps', required=False, type=float, default=0.01,
                        help='Eps of the AdaGrad optimizer')
    parser.add_argument('--clip-gradient', required=False, type=float, default=None,
                        help='Clip threshold of the AdaGrad optimizer')
    parser.add_argument('--double-q', required=False, type=bool, default=False,
                        help='Use Double DQN')
    parser.add_argument('--wd', required=False, type=float, default=0.0,
                        help='Weight of the L2 Regularizer')
    parser.add_argument('-c', '--ctx', required=False, type=str, default='gpu',
                        help='Running Context. E.g `-c gpu` or `-c gpu1` or `-c cpu`')
    parser.add_argument('-d', '--dir-path', required=False, type=str, default='',
                        help='Saving directory of model files.')
    args = parser.parse_args()
    if args.dir_path == '':
        rom_name = os.path.splitext(os.path.basename(args.rom))[0]
        args.dir_path = 'dqn-%s' % rom_name
    ctx = re.findall('([a-z]+)(\d*)', args.ctx)
    ctx = [(device, int(num)) if len(num) >0 else (device, 0) for device, num in ctx]
    replay_start_size = 50000
    max_start_nullops = 30
    replay_memory_size = 1000000
    history_length = 4
    rows = 84
    cols = 84
    q_ctx = mx.Context(*ctx[0])

    game = AtariGame(rom_path=args.rom, resize_mode='scale', replay_start_size=replay_start_size,
                     resized_rows=rows, resized_cols=cols, max_null_op=max_start_nullops,
                     replay_memory_size=replay_memory_size, display_screen=args.visualization,
                     history_length=history_length)

    ##RUN NATURE
    freeze_interval = 10000
    epoch_num = 200
    steps_per_epoch = 250000
    update_interval = 4
    discount = 0.99

    eps_start = 1.0
    eps_min = 0.1
    eps_decay = (1.0 - 0.1) / 1000000
    eps_curr = eps_start
    freeze_interval /= update_interval
    minibatch_size = 32
    action_num = len(game.action_set)

    data_shapes = {'data': (minibatch_size, history_length) + (rows, cols),
                   'dqn_action': (minibatch_size,), 'dqn_reward': (minibatch_size,)}
    optimizer_params = {'name': 'adagrad', 'learning_rate': args.lr, 'eps': args.eps,
                        'clip_gradient': args.clip_gradient,
                        'rescale_grad': 1.0,
                        'wd': args.wd}
    dqn_output_op = DQNOutputNpyOp()
    dqn_sym = dqn_sym_nature(action_num, dqn_output_op)
    qnet = Critic(data_shapes=data_shapes, sym=dqn_sym, optimizer_params=optimizer_params, name='QNet',
                  initializer=DQNInitializer(factor_type="in"),
                  ctx=q_ctx)
    target_qnet = qnet.copy(name="TargetQNet", ctx=q_ctx)

    qnet.print_stat()
    target_qnet.print_stat()
    # Begin Playing Game
    training_steps = 0
    total_steps = 0
    for epoch in xrange(epoch_num):
        # Run Epoch
        steps_left = steps_per_epoch
        episode = 0
        epoch_reward = 0
        start = time.time()
        game.start()
        while steps_left > 0:
            # Running New Episode
            episode += 1
            episode_loss = 0.0
            episode_q_value = 0.0
            episode_update_step = 0
            episode_action_step = 0
            time_episode_start = time.time()
            game.begin_episode(steps_left)
            while not game.episode_terminate:
                # 1. We need to choose a new action based on the current game status
                if game.state_enabled and game.replay_memory.sample_enabled:
                    do_exploration = (npy_rng.rand() < eps_curr)
                    eps_curr = max(eps_curr - eps_decay, eps_min)
                    if do_exploration:
                        action = npy_rng.randint(action_num)
                    else:
                        # TODO Here we can in fact play multiple gaming instances simultaneously and make actions for each
                        # We can simply stack the current_state() of gaming instances and give prediction for all of them
                        # We need to wait after calling calc_score(.), which makes the program slow
                        # TODO Profiling the speed of this part!
                        current_state = game.current_state()
                        state = nd.array(current_state.reshape((1,) + current_state.shape),
                                         ctx=q_ctx) / float(255.0)
                        qval_npy = qnet.calc_score(batch_size=1, data=state)[0].asnumpy()
                        action = numpy.argmax(qval_npy)
                        episode_q_value += qval_npy[0, action]
                        episode_action_step += 1
                else:
                    action = npy_rng.randint(action_num)

                # 2. Play the game for a single mega-step (Inside the game, the action may be repeated for several times)
                game.play(action)
                total_steps += 1

                # 3. Update our Q network if we can start sampling from the replay memory
                #    Also, we update every `update_interval`
                if total_steps % update_interval == 0 and game.replay_memory.sample_enabled:
                    # 3.1 Draw sample from the replay_memory
                    training_steps += 1
                    episode_update_step += 1
                    states, actions, rewards, next_states, terminate_flags \
                        = game.replay_memory.sample(batch_size=minibatch_size)
                    states = nd.array(states, ctx=q_ctx) / float(255.0)
                    next_states = nd.array(next_states, ctx=q_ctx) / float(255.0)
                    actions = nd.array(actions, ctx=q_ctx)
                    rewards = nd.array(rewards, ctx=q_ctx)
                    terminate_flags = nd.array(terminate_flags, ctx=q_ctx)

                    # 3.2 Use the target network to compute the scores and
                    #     get the corresponding target rewards
                    if not args.double_q:
                        target_qval = target_qnet.calc_score(batch_size=minibatch_size,
                                                         data=next_states)[0]
                        target_rewards = rewards + nd.choose_element_0index(target_qval,
                                                                nd.argmax_channel(target_qval))\
                                           * (1.0 - terminate_flags) * discount
                    else:
                        target_qval = target_qnet.calc_score(batch_size=minibatch_size,
                                                         data=next_states)[0]
                        qval = qnet.calc_score(batch_size=minibatch_size, data=next_states)[0]

                        target_rewards = rewards + nd.choose_element_0index(target_qval,
                                                                nd.argmax_channel(qval))\
                                           * (1.0 - terminate_flags) * discount
                    outputs = qnet.fit_target(batch_size=minibatch_size, data=states,
                                              dqn_action=actions,
                                              dqn_reward=target_rewards)

                    # 3.3 Calculate Loss
                    diff = nd.abs(nd.choose_element_0index(outputs[0], actions) - target_rewards)
                    quadratic_part = nd.clip(diff, -1, 1)
                    loss = (0.5 * nd.sum(nd.square(quadratic_part)) + nd.sum(diff - quadratic_part)).asscalar()
                    episode_loss += loss

                    # 3.3 Update the target network every freeze_interval
                    # (We can do annealing instead of hard copy)
                    if training_steps % freeze_interval == 0:
                        qnet.copy_params_to(target_qnet)
            steps_left -= game.episode_step
            time_episode_end = time.time()
            # Update the statistics
            epoch_reward += game.episode_reward
            info_str = "Epoch:%d, Episode:%d, Steps Left:%d/%d, Reward:%f, fps:%f, Exploration:%f" \
                        % (epoch, episode, steps_left, steps_per_epoch, game.episode_reward,
                           game.episode_step / (time_episode_end - time_episode_start), eps_curr)
            if episode_update_step > 0:
                info_str += ", Avg Loss:%f/%d" % (episode_loss / episode_update_step,
                                                  episode_update_step)
            if episode_action_step > 0:
                info_str += ", Avg Q Value:%f/%d" % (episode_q_value / episode_action_step,
                                                  episode_action_step)
            logging.info(info_str)
        end = time.time()
        fps = steps_per_epoch / (end - start)
        qnet.save_params(dir_path=args.dir_path, epoch=epoch)
        logging.info("Epoch:%d, FPS:%f, Avg Reward: %f/%d"
                     % (epoch, fps, epoch_reward / float(episode), episode))
예제 #20
0
from gluonts.model.gp_forecaster.gaussian_process import GaussianProcess

from gluonts.mx.context import check_gpu_support
from gluonts.mx.kernels import RBFKernel
from gluonts.mx.linalg_util import jitter_cholesky, jitter_cholesky_eig


# This test verifies that both eigenvalue decomposition and iterative jitter method
# make a non-positive definite matrix positive definite to be able to compute the cholesky.
# Both gpu and cpu as well as single and double precision are tested.
@pytest.mark.skipif(
    sys.platform == "linux",
    reason=
    f"skipping since potrf crashes on mxnet 1.6.0 on linux when matrix is not spd",
)
@pytest.mark.parametrize("ctx", [mx.Context("gpu"), mx.Context("cpu")])
@pytest.mark.parametrize("jitter_method", ["iter", "eig"])
@pytest.mark.parametrize("float_type", [np.float32, np.float64])
def test_jitter_unit(jitter_method, float_type, ctx) -> None:
    # TODO: Enable GPU tests on Jenkins
    if ctx == mx.Context("gpu") and not check_gpu_support():
        return
    matrix = nd.array([[[1, 2], [3, 4]], [[10, 100], [-21.5, 41]]],
                      ctx=ctx,
                      dtype=float_type)
    F = mx.nd
    num_data_points = matrix.shape[1]
    if jitter_method == "eig":
        L = jitter_cholesky_eig(F, matrix, num_data_points, ctx, float_type)
    elif jitter_method == "iter":
        L = jitter_cholesky(F, matrix, num_data_points, ctx, float_type)
예제 #21
0
def jitter_cholesky(
    F,
    matrix: Tensor,
    num_data_points: Optional[int] = None,
    ctx: mx.Context = mx.Context("cpu"),
    float_type: DType = np.float64,
    max_iter_jitter: int = 10,
    neg_tol: float = -1e-8,
    diag_weight: float = 1e-6,
    increase_jitter: int = 10,
) -> Optional[Tensor]:
    """
    This function applies the jitter method.  It iteratively tries to compute the Cholesky decomposition and
    adds a positive tolerance to the diagonal that increases at each iteration until the matrix is positive definite
    or the maximum number of iterations has been reached.

    Parameters
    ----------
    matrix
        Kernel matrix of shape (batch_size, num_data_points, num_data_points).
    num_data_points
        Number of rows in the kernel_matrix.
    ctx
        Determines whether to compute on the cpu or gpu.
    float_type
        Determines whether to use single or double precision.
    max_iter_jitter
        Maximum number of iterations for jitter to iteratively make the matrix positive definite.
    neg_tol
        Parameter in the jitter methods to eliminate eliminate matrices with diagonal elements smaller than this
        when checking if a matrix is positive definite.
    diag_weight
            Multiple of mean of diagonal entries to initialize the jitter.
    increase_jitter
        Each iteration multiply by jitter by this amount
    Returns
    -------
    Optional[Tensor]
        The method either fails to make the matrix positive definite within the maximum number of iterations
        and outputs an error or succeeds and returns the lower triangular Cholesky factor `L`
        of shape (batch_size, num_data_points, num_data_points)
    """
    num_iter = 0
    diag = batch_diagonal(
        F, matrix, num_data_points, ctx, float_type
    )  # shape (batch_size, num_data_points, 1)
    diag_mean = diag.mean(axis=1).expand_dims(
        axis=2
    )  # shape (batch_size, 1, 1)
    jitter = F.zeros_like(diag)  # shape (batch_size, num_data_points, 1)
    # Ensure that diagonal entries are numerically non-negative, as defined by neg_tol
    # TODO: Add support for symbolic case: Cannot use < operator with symbolic variables
    if F.sum(diag <= neg_tol) > 0:
        raise mx.base.MXNetError(
            " Matrix is not positive definite: negative diagonal elements"
        )
    while num_iter <= max_iter_jitter:
        try:
            L = F.linalg.potrf(
                F.broadcast_add(
                    matrix,
                    F.broadcast_mul(
                        F.eye(num_data_points, ctx=ctx, dtype=float_type),
                        jitter,
                    ),
                )
            )
            # gpu will not throw error but will store nans. If nan, L.sum() = nan
            # so the error tolerance can be large.
            # TODO: Add support for symbolic case: Cannot use <= operator with symbolic variables
            assert F.max(F.abs(L.nansum() - L.sum()) <= 1e-1)
            return L
        except:
            if num_iter == 0:
                # Initialize the jitter: constant jitter per each batch
                jitter = (
                    F.broadcast_mul(diag_mean, F.ones_like(jitter))
                    * diag_weight
                )
            else:
                jitter = jitter * increase_jitter
        finally:
            num_iter += 1
    raise mx.base.MXNetError(
        f" Matrix is not positive definite after the maximum number of iterations = {max_iter_jitter} "
        f"with a maximum jitter = {F.max(jitter)}"
    )
model = zoo.load_pretrained_resnext_to_unext101_64_4d(ctx=_ctx,
                                                      migrate_input_norm=False,
                                                      fine_tune=False)
# model=zoo.resume_training_unext101_64_4d(freeze_input_norm = True,
# fine_tune = True,
# ctx=_ctx,
# symb = "unext_resize_ver03_101_64_4_px_global_weight_highest_inv_weight_enp-symbol.json",
# parame = "unext_resize_ver03_101_64_4_px_global_weight_highest_inv_weight_enp-0000.params")
# model=zoo.resume_training_unext101_64_4d_beyond_word(freeze_input_norm = True,
# fine_tune = True,
# ctx=_ctx,
# symb = "unext101_64_4d_deconv_enp_72000-symbol.json",
# parame = "unext101_64_4d_deconv_enp_72000-0000.params")

with mx.Context(_ctx):
    model.hybridize()
    # model.collect_params().initialize()
    # sx = mx.sym.var('data')
    # sym = model(sx)
    # graph = mx.viz.plot_network(sym)
    # graph.format = 'tif'
    # graph.render('model')
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        model.collect_params().initialize()

num_epochs = 80
num_steps = len(my_train)
test_num_steps = len(my_test)
# print(num_steps)
예제 #23
0
    def train(self,
              base_path: str,
              sequence_length: int,
              learning_rate: float = 20,
              mini_batch_size: int = 100,
              anneal_factor: float = 0.25,
              patience: int = 10,
              clip=0.25,
              max_epochs: int = 10000):

        number_of_splits = len(self.corpus.train_files)
        val_data = self._batchify(self.corpus.valid, mini_batch_size)

        os.makedirs(base_path, exist_ok=True)
        loss_txt = os.path.join(base_path, 'loss.txt')
        savefile = os.path.join(base_path, 'best-lm.pt')

        try:
            with mx.Context(mxnet_prefer_gpu()):
                self.model.initialize()
                best_val_loss = 100000000
                scheduler = ReduceLROnPlateau(lr=learning_rate,
                                              verbose=True,
                                              factor=anneal_factor,
                                              patience=patience)
                optimizer = mx.optimizer.SGD(learning_rate=learning_rate,
                                             lr_scheduler=scheduler)
                trainer = gluon.Trainer(self.model.collect_params(),
                                        optimizer=optimizer)

                for epoch in range(1, max_epochs + 1):

                    print('Split %d' % epoch +
                          '\t - ({:%H:%M:%S})'.format(datetime.datetime.now()))

                    # for group in optimizer.param_groups:
                    #     learning_rate = group['lr']

                    train_slice = self.corpus.get_next_train_slice()

                    train_data = self._batchify(train_slice, mini_batch_size)
                    print('\t({:%H:%M:%S})'.format(datetime.datetime.now()))

                    # go into train mode
                    # self.model.train()

                    # reset variables
                    epoch_start_time = time.time()
                    total_loss = 0
                    start_time = time.time()

                    hidden = self.model.init_hidden(mini_batch_size)
                    cell = hidden.copy()

                    # not really sure what this does
                    ntokens = len(self.corpus.dictionary)

                    # do batches
                    for batch, i in enumerate(
                            range(0,
                                  len(train_data) - 1, sequence_length)):

                        data, targets = self._get_batch(
                            train_data, i, sequence_length)

                        # Starting each batch, we detach the hidden state from how it was previously produced.
                        # If we didn't, the model would try backpropagating all the way to start of the dataset.
                        hidden = self._repackage_hidden(hidden)
                        cell = self._repackage_hidden(cell)

                        # self.model.zero_grad()
                        # optimizer.zero_grad()

                        # do the forward pass in the model
                        with autograd.record():
                            output, rnn_output, hidden, cell = self.model.forward(
                                data, hidden, cell)
                            # try to predict the targets
                            loss = self.loss_function(
                                output.reshape(-1, ntokens), targets).mean()
                            loss.backward()

                        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
                        # torch.nn.utils.clip_grad_norm_(self.model.parameters(), clip)

                        trainer.step(mini_batch_size)

                        total_loss += loss.asscalar()

                        if batch % self.log_interval == 0 and batch > 0:
                            cur_loss = total_loss.item() / self.log_interval
                            elapsed = time.time() - start_time
                            print(
                                '| split {:3d} /{:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | '
                                'loss {:5.2f} | ppl {:8.2f}'.format(
                                    epoch, number_of_splits, batch,
                                    len(train_data) // sequence_length,
                                    elapsed * 1000 / self.log_interval,
                                    cur_loss, self._safe_exp(cur_loss)))
                            total_loss = 0
                            start_time = time.time()

                    print('epoch {} done! \t({:%H:%M:%S})'.format(
                        epoch, datetime.datetime.now()))
                    scheduler.step(cur_loss)

                    ###############################################################################
                    # TEST
                    ###############################################################################
                    # skip evaluation
                    # val_loss = self.evaluate(val_data, mini_batch_size, sequence_length)
                    # scheduler.step(val_loss)
                    #
                    # # Save the model if the validation loss is the best we've seen so far.
                    # if val_loss < best_val_loss:
                    #     self.model.save(savefile)
                    #     best_val_loss = val_loss
                    #     print('best loss so far {:5.2f}'.format(best_val_loss))
                    val_loss = cur_loss
                    if (self.corpus.current_train_file_index +
                            1) % 100 == 0 or self.corpus.is_last_slice:
                        self.model.save(savefile)

                    ###############################################################################
                    # print info
                    ###############################################################################
                    print('-' * 89)

                    local_split_number = epoch % number_of_splits
                    if local_split_number == 0:
                        local_split_number = number_of_splits

                    summary = '| end of split {:3d} /{:3d} | epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' \
                              'valid ppl {:8.2f} | learning rate {:3.2f}'.format(local_split_number,
                                                                                 number_of_splits,
                                                                                 epoch,
                                                                                 (time.time() - epoch_start_time),
                                                                                 val_loss,
                                                                                 self._safe_exp(val_loss),
                                                                                 learning_rate)

                    with open(loss_txt, "a") as myfile:
                        myfile.write('%s\n' % summary)

                    print(summary)
                    print('-' * 89)

        except KeyboardInterrupt:
            print('-' * 89)
            print('Exiting from training early')
예제 #24
0
    def forward(self, x):
        return self.relu(self.bn(self.conv(x)))


class SwapAxes(nn.Block):
    def __init__(self, dim1, dim2):
        super(SwapAxes, self).__init__()
        self.dim1 = dim1
        self.dim2 = dim2

    def forward(self, x):
        return nd.swapaxes(x, self.dim1, self.dim2)


with mx.Context(mx.cpu(0)):
    model = nn.Sequential()
    model.add(
        SwapAxes(1, 2),
        CBR(40, 1),
        CBR(40),
        CBR(40),
        nn.MaxPool1D(2),
        CBR(80, 1),
        CBR(80),
        CBR(80),
        nn.MaxPool1D(2),
        CBR(160, 1),
        CBR(160),
        CBR(160),
        CBR(160),
예제 #25
0
def test_amp_conversion():
    def check_amp_convert_symbol():
        x = mx.sym.var("x")
        y = mx.sym.var("y")
        z = mx.sym.FullyConnected(x, y, num_hidden=10, no_bias=True)
        siny = mx.sym.sin(y)
        res = z + siny
        # Compare symbols with similar computation graphs created using convert_symbol and manually.
        res_converted = amp.convert_symbol(res, target_dtype="float16",
                                           target_dtype_ops=["FullyConnected"],
                                           fp32_ops=["sin"])

        x_fp16 = mx.sym.amp_cast(x, dtype="float16")
        y_fp16 = mx.sym.amp_cast(y, dtype="float16")
        amp_casted_siny = mx.sym.sin(mx.sym.amp_cast(y, dtype="float32"))
        z = mx.sym.FullyConnected(x_fp16, y_fp16, num_hidden=10, no_bias=True)
        outs = mx.sym.amp_multicast(z, amp_casted_siny, num_outputs=2)
        res_expected = outs[0] + outs[1]
        assert same_symbol_structure(res_converted, res_expected), \
            "convert_symbol generating wrong computation graph"

        # convert_symbol called with incorrect inputs
        assert_raises(AssertionError, amp.convert_symbol, res,
                      target_dtype="float16", target_dtype_ops=["FullyConnected"],
                      fp32_ops=["elemwise_add"])
        assert_raises(AssertionError, amp.convert_symbol, res,
                      target_dtype="float16", target_dtype_ops=["FullyConnected"],
                      fp32_ops=["Activation"],
                      conditional_fp32_ops=[('Activation', 'act_type', ['selu'])])
        assert_raises(AssertionError, amp.convert_symbol, res,
                      target_dtype="float16", target_dtype_ops=["Activation"],
                      fp32_ops=["Activation"],
                      conditional_fp32_ops=[('Activation', 'act_type', ['selu'])])
        assert_raises(AssertionError, amp.convert_symbol, res,
                      target_dtype="float16", target_dtype_ops=["FullyConnected"],
                      fp32_ops=["FullyConnected"])

        # Test for op in conditional ops with condition not satisfied
        x = mx.sym.var("x")
        y = mx.sym.var("y")
        fc_cond = mx.sym.FullyConnected(x, y, num_hidden=10, no_bias=True)
        res_converted = amp.convert_symbol(fc_cond, target_dtype="float16",
                                           target_dtype_ops=[],
                                           fp32_ops=["sin"],
                                           conditional_fp32_ops=[("FullyConnected", "no_bias", ["False"])])

        res_expected = mx.sym.FullyConnected(x, y, num_hidden=10, no_bias=True)
        assert same_symbol_structure(res_converted, res_expected), \
            "convert_symbol generating wrong computation graph when conditional ops is used"

        # Test for op in conditional ops with condition satisfied
        res_converted = amp.convert_symbol(fc_cond, target_dtype="float16", target_dtype_ops=[],
                                           fp32_ops=["sin"],
                                           conditional_fp32_ops=[("FullyConnected", "no_bias", ["True"])])
        x_fp32 = mx.sym.amp_cast(x, dtype="float32")
        y_fp32 = mx.sym.amp_cast(y, dtype="float32")
        res_expected = mx.sym.FullyConnected(x_fp32, y_fp32, num_hidden=10, no_bias=True)
        assert same_symbol_structure(res_converted, res_expected), \
            "convert_symbol generating wrong computation graph when conditional ops used with satisfying condition"

        # Test with a real world model, default inputs for convert_symbol
        dir_path = os.path.dirname(os.path.realpath(__file__))
        model_path = os.path.join(dir_path, 'model')
        if not os.path.isdir(model_path):
            os.mkdir(model_path)

        prefix, epoch = download_model("imagenet1k-resnet-18", dst_dir=model_path)
        sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
        inputs = {}
        inputs['data'] = mx.nd.ones((1, 3, 224, 224))
        inputs.update(arg_params)
        converted_sym = amp.convert_symbol(sym)
        exe = converted_sym.simple_bind(mx.gpu(0), data=(1, 3, 224, 224), grad_req='null')
        exe.forward(is_train=False, **inputs)
        exe.outputs[0].asnumpy()

        inputs2 = {}
        inputs2['data'] = mx.nd.ones((1, 3, 224, 224))
        inputs2['fc1_weight'] = inputs['fc1_weight'].astype(np.float16)
        inputs2['fc1_bias'] = inputs['fc1_bias'].astype(np.float16)

        # Test with a real world model, tweak inputs for convert_symbol
        converted_sym = amp.convert_symbol(sym, target_dtype="float16",
                                           target_dtype_ops=["Convolution"], data_names=["data"],
                                           cast_optional_params=True)
        converted_sym2 = amp.convert_symbol(sym, target_dtype="float16",
                                            target_dtype_ops=["Convolution"], data_names=["data"],
                                            cast_optional_params=False)

        exe = converted_sym.simple_bind(mx.gpu(0), data=(1, 3, 224, 224), grad_req='null')
        exe2 = converted_sym2.simple_bind(mx.gpu(), data=(1, 3, 224, 224), grad_req='null')

        converted_args = converted_sym.list_arguments()
        converted_auxs = converted_sym.list_auxiliary_states()
        for i, key in enumerate(exe.arg_arrays):
            if converted_args[i] in arg_params:
                arg_params[converted_args[i]] = arg_params[converted_args[i]].astype(exe.arg_arrays[i].dtype)
        for i, key in enumerate(exe.aux_arrays):
            if converted_auxs[i] in aux_params:
                aux_params[converted_auxs[i]] = aux_params[converted_auxs[i]].astype(exe.aux_arrays[i].dtype)

        inputs2.update(arg_params)
        exe.forward(is_train=False, **inputs2)
        exe.outputs[0].wait_to_read()

        inputs['fc1_weight'] = inputs['fc1_weight'].astype(np.float16)
        inputs['fc1_bias'] = inputs['fc1_bias'].astype(np.float16)
        exe2.forward(is_train=False, **inputs)
        exe2.outputs[0].wait_to_read()


    def check_amp_convert_model():
        # Test with real world model, default inputs for convert_model
        dir_path = os.path.dirname(os.path.realpath(__file__))
        model_path = os.path.join(dir_path, 'model')
        if not os.path.isdir(model_path):
            os.mkdir(model_path)
        prefix, epoch = download_model("imagenet1k-resnet-18", dst_dir=model_path)

        sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)

        # Test with real world model, tweak inputs for convert_model
        result_sym, result_arg_params, result_aux_params = amp.convert_model(sym,
                                                                             arg_params,
                                                                             aux_params,
                                                                             target_dtype="float16",
                                                                             target_dtype_ops=["Convolution"])
        mod = mx.mod.Module(result_sym, data_names=["data"], label_names=["softmax_label"], context=mx.gpu())
        mod.bind(data_shapes=[['data', (1, 3, 224, 224)]], label_shapes=[['softmax_label', (1,)]])

        mod.set_params(result_arg_params, result_aux_params)
        mod.forward(mx.io.DataBatch(data=[mx.nd.ones((1, 3, 224, 224))],
                                    label=[mx.nd.ones((1,))]))
        mod.get_outputs()[0].asnumpy()
        assert mod._arg_params["stage2_unit1_conv2_weight"].dtype == np.float32

        # Call convert_model with cast_optional_params set to True
        result_sym, result_arg_params, result_aux_params = amp.convert_model(sym,
                                                                             arg_params,
                                                                             aux_params,
                                                                             target_dtype="float16",
                                                                             target_dtype_ops=["Convolution"], cast_optional_params=True)
        mod = mx.mod.Module(result_sym, data_names=["data"], label_names=["softmax_label"], context=mx.gpu())
        mod.bind(data_shapes=[['data', (1, 3, 224, 224)]], label_shapes=[['softmax_label', (1,)]])
        mod.set_params(result_arg_params, result_aux_params)
        mod.forward(mx.io.DataBatch(data=[mx.nd.ones((1, 3, 224, 224))],
                                    label=[mx.nd.ones((1,))]))
        mod.get_outputs()[0].asnumpy()
        assert mod._arg_params["stage2_unit1_conv2_weight"].dtype == np.float16


    def check_amp_convert_hybrid_block():
        # Test conversion for hybrid block on CPU
        model_cpu = get_model("resnet50_v1")
        model_cpu.collect_params().initialize(ctx=mx.cpu())
        model_cpu.hybridize()
        model_cpu(mx.nd.random.uniform(0, 1, shape=(1, 3, 224, 224), ctx=mx.cpu()))
        converted_model_cpu = amp.convert_hybrid_block(model_cpu)

        # Test with real world model, default inputs for convert_hybrid_block
        model = get_model("resnet50_v1")
        model.collect_params().initialize(ctx=mx.gpu())
        model.hybridize()
        model(mx.nd.zeros((1, 3, 224, 224)))
        converted_model = amp.convert_hybrid_block(model)
        result = converted_model.forward(mx.nd.zeros((1, 3, 224, 224),
                                                     dtype=np.float32))
        result = converted_model.forward(mx.nd.zeros((1, 3, 224, 224),
                                                     dtype=np.float32))

        # Test with real world model, tweak inputs for convert_hybrid_block
        converted_model = amp.convert_hybrid_block(model, target_dtype="float16",
                                                   target_dtype_ops=["Convolution"])
        result = converted_model.forward(mx.nd.zeros((1, 3, 224, 224),
                                                      dtype=np.float32))
        result = converted_model.forward(mx.nd.zeros((1, 3, 224, 224),
                                                     dtype=np.float32))

        # Check symbolic block
        dir_path = os.path.dirname(os.path.realpath(__file__))
        model_path = os.path.join(dir_path, 'model')
        if not os.path.isdir(model_path):
            os.mkdir(model_path)
        prefix, epoch = download_model("imagenet1k-resnet-18", dst_dir=model_path)
        net = SymbolBlock.imports(os.path.join(model_path, "imagenet1k-resnet-18-symbol.json"),
                                  input_names=["data", "softmax_label"],
                                  param_file=os.path.join(model_path, "imagenet1k-resnet-18-0000.params"))
        net.collect_params().reset_ctx(ctx=mx.gpu())
        net.hybridize()
        net(mx.nd.zeros((1, 3, 224, 224)), mx.nd.zeros((1,)))
        converted_model = amp.convert_hybrid_block(net)
        result = converted_model.forward(mx.nd.zeros((1, 3, 224, 224)), mx.nd.zeros((1,)))
        result = converted_model.forward(mx.nd.zeros((1, 3, 224, 224)), mx.nd.zeros((1,)))

        # Check symbolic block, tweaked inputs
        converted_model = amp.convert_hybrid_block(net, target_dtype="float16", target_dtype_ops=["Convolution"])
        result = converted_model.forward(mx.nd.zeros((1, 3, 224, 224)), mx.nd.zeros((1, )))
        result = converted_model.forward(mx.nd.zeros((1, 3, 224, 224)), mx.nd.zeros((1, )))
        params = converted_model.collect_params()
        assert params["stage2_unit1_conv2_weight"].dtype == np.float32

        # Pass cast_optional_params as True to convert_hybrid_block
        converted_model = amp.convert_hybrid_block(net, target_dtype="float16", target_dtype_ops=["Convolution"],
                                                   cast_optional_params=True)
        params = converted_model.collect_params()
        assert params["stage2_unit1_conv2_weight"].dtype == np.float16

    with mx.Context(mx.gpu(0)):
        check_amp_convert_symbol()
        check_amp_convert_model()
        check_amp_convert_hybrid_block()
예제 #26
0
    def train(self, train_file, dev_file, test_file, save_dir,
              pretrained_embeddings=None, min_occur_count=2,
              lstm_layers=3, word_dims=100, tag_dims=100, dropout_emb=0.33, lstm_hiddens=400,
              dropout_lstm_input=0.33, dropout_lstm_hidden=0.33,
              mlp_arc_size=500, mlp_rel_size=100,
              dropout_mlp=0.33, learning_rate=2e-3, decay=.75, decay_steps=5000,
              beta_1=.9, beta_2=.9, epsilon=1e-12,
              num_buckets_train=40,
              num_buckets_valid=10, num_buckets_test=10, train_iters=50000, train_batch_size=5000,
              test_batch_size=5000, validate_every=100, save_after=5000, debug=False):
        """Train a deep biaffine dependency parser.

        Parameters
        ----------
        train_file : str
            path to training set
        dev_file : str
            path to dev set
        test_file : str
            path to test set
        save_dir : str
            a directory for saving model and related meta-data
        pretrained_embeddings : tuple
            (embedding_name, source), used for gluonnlp.embedding.create(embedding_name, source)
        min_occur_count : int
            threshold of rare words, which will be replaced with UNKs,
        lstm_layers : int
            layers of lstm
        word_dims : int
            dimension of word embedding
        tag_dims : int
            dimension of tag embedding
        dropout_emb : float
            word dropout
        lstm_hiddens : int
            size of lstm hidden states
        dropout_lstm_input : int
            dropout on x in variational RNN
        dropout_lstm_hidden : int
            dropout on h in variational RNN
        mlp_arc_size : int
            output size of MLP for arc feature extraction
        mlp_rel_size : int
            output size of MLP for rel feature extraction
        dropout_mlp : float
            dropout on the output of LSTM
        learning_rate : float
            learning rate
        decay : float
            see ExponentialScheduler
        decay_steps : int
            see ExponentialScheduler
        beta_1 : float
            see ExponentialScheduler
        beta_2 : float
            see ExponentialScheduler
        epsilon : float
            see ExponentialScheduler
        num_buckets_train : int
            number of buckets for training data set
        num_buckets_valid : int
            number of buckets for dev data set
        num_buckets_test : int
            number of buckets for testing data set
        train_iters : int
            training iterations
        train_batch_size : int
            training batch size
        test_batch_size : int
            test batch size
        validate_every : int
            validate on dev set every such number of batches
        save_after : int
            skip saving model in early epochs
        debug : bool
            debug mode

        Returns
        -------
        DepParser
            parser itself
        """
        logger = init_logger(save_dir)
        config = _Config(train_file, dev_file, test_file, save_dir, pretrained_embeddings,
                         min_occur_count,
                         lstm_layers, word_dims, tag_dims, dropout_emb, lstm_hiddens,
                         dropout_lstm_input, dropout_lstm_hidden, mlp_arc_size, mlp_rel_size,
                         dropout_mlp, learning_rate, decay, decay_steps,
                         beta_1, beta_2, epsilon, num_buckets_train, num_buckets_valid,
                         num_buckets_test, train_iters,
                         train_batch_size, debug)
        config.save()
        self._vocab = vocab = ParserVocabulary(train_file,
                                               pretrained_embeddings,
                                               min_occur_count)
        vocab.save(config.save_vocab_path)
        vocab.log_info(logger)

        with mx.Context(mxnet_prefer_gpu()):
            self._parser = parser = BiaffineParser(vocab, word_dims, tag_dims,
                                                   dropout_emb,
                                                   lstm_layers,
                                                   lstm_hiddens, dropout_lstm_input,
                                                   dropout_lstm_hidden,
                                                   mlp_arc_size,
                                                   mlp_rel_size, dropout_mlp, debug)
            parser.initialize()
            scheduler = ExponentialScheduler(learning_rate, decay, decay_steps)
            optimizer = mx.optimizer.Adam(learning_rate, beta_1, beta_2, epsilon,
                                          lr_scheduler=scheduler)
            trainer = gluon.Trainer(parser.collect_params(), optimizer=optimizer)
            data_loader = DataLoader(train_file, num_buckets_train, vocab)
            global_step = 0
            best_UAS = 0.
            batch_id = 0
            epoch = 1
            total_epoch = math.ceil(train_iters / validate_every)
            logger.info('Epoch %d out of %d', epoch, total_epoch)
            bar = Progbar(target=min(validate_every, data_loader.samples))
            while global_step < train_iters:
                for words, tags, arcs, rels in data_loader.get_batches(batch_size=train_batch_size,
                                                                       shuffle=True):
                    with autograd.record():
                        arc_accuracy, _, _, loss = parser.forward(words, tags, arcs, rels)
                        loss_value = loss.asscalar()
                    loss.backward()
                    trainer.step(train_batch_size)
                    batch_id += 1
                    try:
                        bar.update(batch_id,
                                   exact=[('UAS', arc_accuracy, 2),
                                          ('loss', loss_value)])
                    except OverflowError:
                        pass  # sometimes loss can be 0 or infinity, crashes the bar

                    global_step += 1
                    if global_step % validate_every == 0:
                        bar = Progbar(target=min(validate_every, train_iters - global_step))
                        batch_id = 0
                        UAS, LAS, speed = evaluate_official_script(parser, vocab,
                                                                   num_buckets_valid,
                                                                   test_batch_size,
                                                                   dev_file,
                                                                   os.path.join(save_dir,
                                                                                'valid_tmp'))
                        logger.info('Dev: UAS %.2f%% LAS %.2f%% %d sents/s', UAS, LAS, speed)
                        epoch += 1
                        if global_step < train_iters:
                            logger.info('Epoch %d out of %d', epoch, total_epoch)
                        if global_step > save_after and UAS > best_UAS:
                            logger.info('- new best score!')
                            best_UAS = UAS
                            parser.save(config.save_model_path)

        # When validate_every is too big
        if not os.path.isfile(config.save_model_path) or best_UAS != UAS:
            parser.save(config.save_model_path)

        return self
예제 #27
0
def test_amp_conversion():
    def check_amp_convert_symbol():
        x = mx.sym.var("x")
        y = mx.sym.var("y")
        z = mx.sym.FullyConnected(x, y, num_hidden=10, no_bias=True)
        siny = mx.sym.sin(y)
        res = z + siny
        # Compare symbols with similar computation graphs created using convert_symbol and manually.
        res_converted = amp.convert_symbol(res,
                                           target_dtype="float16",
                                           target_dtype_ops=["FullyConnected"],
                                           fp32_ops=["sin"])

        x_fp16 = mx.sym.amp_cast(x, dtype="float16")
        y_fp16 = mx.sym.amp_cast(y, dtype="float16")
        siny = mx.sym.sin(y)
        z = mx.sym.FullyConnected(x_fp16, y_fp16, num_hidden=10, no_bias=True)
        amp_casted_z = mx.sym.amp_cast(z, dtype="float32")
        res_expected = amp_casted_z + siny
        assert same_symbol_structure(res_converted, res_expected), \
            "convert_symbol generating wrong computation graph"

        # convert_symbol called with incorrect inputs
        assert_raises(AssertionError,
                      amp.convert_symbol,
                      res,
                      target_dtype="float16",
                      target_dtype_ops=["FullyConnected"],
                      fp32_ops=["elemwise_add"])
        assert_raises(AssertionError,
                      amp.convert_symbol,
                      res,
                      target_dtype="float16",
                      target_dtype_ops=["FullyConnected"],
                      fp32_ops=["Activation"],
                      conditional_fp32_ops=[('Activation', 'act_type',
                                             ['selu'])])
        assert_raises(AssertionError,
                      amp.convert_symbol,
                      res,
                      target_dtype="float16",
                      target_dtype_ops=["Activation"],
                      fp32_ops=["Activation"],
                      conditional_fp32_ops=[('Activation', 'act_type',
                                             ['selu'])])
        assert_raises(AssertionError,
                      amp.convert_symbol,
                      res,
                      target_dtype="float16",
                      target_dtype_ops=["FullyConnected"],
                      fp32_ops=["FullyConnected"])

        # Test for op in conditional ops with condition not satisfied
        x = mx.sym.var("x")
        y = mx.sym.var("y")
        fc_cond = mx.sym.FullyConnected(x, y, num_hidden=10, no_bias=True)
        res_converted = amp.convert_symbol(fc_cond,
                                           target_dtype="float16",
                                           target_dtype_ops=[],
                                           fp32_ops=["sin"],
                                           conditional_fp32_ops=[
                                               ("FullyConnected", "no_bias",
                                                ["False"])
                                           ])

        res_expected = mx.sym.FullyConnected(x, y, num_hidden=10, no_bias=True)
        assert same_symbol_structure(res_converted, res_expected), \
            "convert_symbol generating wrong computation graph when conditional ops is used"

        # Test for op in conditional ops with condition satisfied
        res_converted = amp.convert_symbol(fc_cond,
                                           target_dtype="float16",
                                           target_dtype_ops=[],
                                           fp32_ops=["sin"],
                                           conditional_fp32_ops=[
                                               ("FullyConnected", "no_bias",
                                                ["True"])
                                           ])
        x_fp32 = mx.sym.amp_cast(x, dtype="float32")
        y_fp32 = mx.sym.amp_cast(y, dtype="float32")
        res_expected = mx.sym.FullyConnected(x_fp32,
                                             y_fp32,
                                             num_hidden=10,
                                             no_bias=True)
        assert same_symbol_structure(res_converted, res_expected), \
            "convert_symbol generating wrong computation graph when conditional ops used with satisfying condition"

        # Test with a real world model, default inputs for convert_symbol
        dir_path = os.path.dirname(os.path.realpath(__file__))
        model_path = os.path.join(dir_path, 'model')
        if not os.path.isdir(model_path):
            os.mkdir(model_path)

        prefix, epoch = download_model("imagenet1k-resnet-18",
                                       dst_dir=model_path)
        sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
        inputs = {}
        inputs['data'] = mx.nd.ones((1, 3, 224, 224))
        inputs.update(arg_params)
        converted_sym = amp.convert_symbol(sym)
        exe = converted_sym.simple_bind(mx.gpu(0),
                                        data=(1, 3, 224, 224),
                                        grad_req='null')
        exe.forward(is_train=False, **inputs)
        exe.outputs[0].asnumpy()

        inputs2 = {}
        inputs2['data'] = mx.nd.ones((1, 3, 224, 224))
        inputs2['fc1_weight'] = inputs['fc1_weight'].astype(np.float16)
        inputs2['fc1_bias'] = inputs['fc1_bias'].astype(np.float16)

        # Test with a real world model, tweak inputs for convert_symbol
        converted_sym = amp.convert_symbol(sym,
                                           target_dtype="float16",
                                           target_dtype_ops=["Convolution"],
                                           data_names=["data"],
                                           cast_optional_params=True)
        converted_sym2 = amp.convert_symbol(sym,
                                            target_dtype="float16",
                                            target_dtype_ops=["Convolution"],
                                            data_names=["data"],
                                            cast_optional_params=False)

        exe = converted_sym.simple_bind(mx.gpu(0),
                                        data=(1, 3, 224, 224),
                                        grad_req='null')
        exe2 = converted_sym2.simple_bind(mx.gpu(),
                                          data=(1, 3, 224, 224),
                                          grad_req='null')

        converted_args = converted_sym.list_arguments()
        converted_auxs = converted_sym.list_auxiliary_states()
        for i, key in enumerate(exe.arg_arrays):
            if converted_args[i] in arg_params:
                arg_params[converted_args[i]] = arg_params[
                    converted_args[i]].astype(exe.arg_arrays[i].dtype)
        for i, key in enumerate(exe.aux_arrays):
            if converted_auxs[i] in aux_params:
                aux_params[converted_auxs[i]] = aux_params[
                    converted_auxs[i]].astype(exe.aux_arrays[i].dtype)

        inputs2.update(arg_params)
        exe.forward(is_train=False, **inputs2)
        exe.outputs[0].wait_to_read()

        inputs['fc1_weight'] = inputs['fc1_weight'].astype(np.float16)
        inputs['fc1_bias'] = inputs['fc1_bias'].astype(np.float16)
        exe2.forward(is_train=False, **inputs)
        exe2.outputs[0].wait_to_read()

    def check_amp_convert_model():
        # Test with real world model, default inputs for convert_model
        dir_path = os.path.dirname(os.path.realpath(__file__))
        model_path = os.path.join(dir_path, 'model')
        if not os.path.isdir(model_path):
            os.mkdir(model_path)
        prefix, epoch = download_model("imagenet1k-resnet-18",
                                       dst_dir=model_path)

        sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)

        # Test with real world model, tweak inputs for convert_model
        result_sym, result_arg_params, result_aux_params = amp.convert_model(
            sym,
            arg_params,
            aux_params,
            target_dtype="float16",
            target_dtype_ops=["Convolution"])
        mod = mx.mod.Module(result_sym,
                            data_names=["data"],
                            label_names=["softmax_label"],
                            context=mx.gpu())
        mod.bind(data_shapes=[['data', (1, 3, 224, 224)]],
                 label_shapes=[['softmax_label', (1, )]])

        mod.set_params(result_arg_params, result_aux_params)
        mod.forward(
            mx.io.DataBatch(data=[mx.nd.ones((1, 3, 224, 224))],
                            label=[mx.nd.ones((1, ))]))
        mod.get_outputs()[0].asnumpy()
        assert mod._arg_params["stage2_unit1_conv2_weight"].dtype == np.float32

        # Call convert_model with cast_optional_params set to True
        result_sym, result_arg_params, result_aux_params = amp.convert_model(
            sym,
            arg_params,
            aux_params,
            target_dtype="float16",
            target_dtype_ops=["Convolution"],
            cast_optional_params=True)
        mod = mx.mod.Module(result_sym,
                            data_names=["data"],
                            label_names=["softmax_label"],
                            context=mx.gpu())
        mod.bind(data_shapes=[['data', (1, 3, 224, 224)]],
                 label_shapes=[['softmax_label', (1, )]])
        mod.set_params(result_arg_params, result_aux_params)
        mod.forward(
            mx.io.DataBatch(data=[mx.nd.ones((1, 3, 224, 224))],
                            label=[mx.nd.ones((1, ))]))
        mod.get_outputs()[0].asnumpy()
        assert mod._arg_params["stage2_unit1_conv2_weight"].dtype == np.float16

    def check_amp_convert_hybrid_block():
        # Test conversion for hybrid block on CPU
        model_cpu = get_model("resnet50_v1")
        model_cpu.collect_params().initialize(ctx=mx.cpu())
        model_cpu.hybridize()
        model_cpu(
            mx.nd.random.uniform(0, 1, shape=(1, 3, 224, 224), ctx=mx.cpu()))
        converted_model_cpu = amp.convert_hybrid_block(model_cpu)

        # Test with real world model, default inputs for convert_hybrid_block
        model = get_model("resnet50_v1")
        model.collect_params().initialize(ctx=mx.gpu())
        model.hybridize()
        model(mx.nd.zeros((1, 3, 224, 224)))
        converted_model = amp.convert_hybrid_block(model)
        result = converted_model.forward(
            mx.nd.zeros((1, 3, 224, 224), dtype=np.float32))
        result = converted_model.forward(
            mx.nd.zeros((1, 3, 224, 224), dtype=np.float32))

        # Test with real world model, tweak inputs for convert_hybrid_block
        converted_model = amp.convert_hybrid_block(
            model, target_dtype="float16", target_dtype_ops=["Convolution"])
        result = converted_model.forward(
            mx.nd.zeros((1, 3, 224, 224), dtype=np.float32))
        result = converted_model.forward(
            mx.nd.zeros((1, 3, 224, 224), dtype=np.float32))

        # Check symbolic block
        dir_path = os.path.dirname(os.path.realpath(__file__))
        model_path = os.path.join(dir_path, 'model')
        if not os.path.isdir(model_path):
            os.mkdir(model_path)
        prefix, epoch = download_model("imagenet1k-resnet-18",
                                       dst_dir=model_path)
        net = SymbolBlock.imports(
            os.path.join(model_path, "imagenet1k-resnet-18-symbol.json"),
            input_names=["data", "softmax_label"],
            param_file=os.path.join(model_path,
                                    "imagenet1k-resnet-18-0000.params"))
        net.collect_params().reset_ctx(ctx=mx.gpu())
        net.hybridize()
        net(mx.nd.zeros((1, 3, 224, 224)), mx.nd.zeros((1, )))
        converted_model = amp.convert_hybrid_block(net)
        result = converted_model.forward(mx.nd.zeros((1, 3, 224, 224)),
                                         mx.nd.zeros((1, )))
        result = converted_model.forward(mx.nd.zeros((1, 3, 224, 224)),
                                         mx.nd.zeros((1, )))

        # Check symbolic block, tweaked inputs
        converted_model = amp.convert_hybrid_block(
            net, target_dtype="float16", target_dtype_ops=["Convolution"])
        result = converted_model.forward(mx.nd.zeros((1, 3, 224, 224)),
                                         mx.nd.zeros((1, )))
        result = converted_model.forward(mx.nd.zeros((1, 3, 224, 224)),
                                         mx.nd.zeros((1, )))
        params = converted_model.collect_params()
        assert params["stage2_unit1_conv2_weight"].dtype == np.float32

        # Pass cast_optional_params as True to convert_hybrid_block
        converted_model = amp.convert_hybrid_block(
            net,
            target_dtype="float16",
            target_dtype_ops=["Convolution"],
            cast_optional_params=True)
        params = converted_model.collect_params()
        assert params["stage2_unit1_conv2_weight"].dtype == np.float16

    def check_amp_convert_bucketing_module():
        model = train_model(context=mx.current_context())
        result_model = amp.convert_bucketing_module(model)
        val_sent = []
        batch_size = 128
        invalid_label = -1
        num_sentence = 1000
        buckets = [5, 10, 20, 30, 40]
        len_vocab = 50

        for _ in range(num_sentence):
            len_sentence = randint(6,
                                   max(buckets) -
                                   1)  # leave out the two last buckets empty
            val_sentence = []
            for _ in range(len_sentence):
                val_sentence.append(randint(1, len_vocab))
            val_sent.append(val_sentence)

        data_val = mx.rnn.BucketSentenceIter(val_sent,
                                             batch_size,
                                             buckets=buckets,
                                             invalid_label=invalid_label)
        result_model.bind(data_val.provide_data,
                          data_val.provide_label,
                          for_training=False)
        result_model.score(data_val,
                           mx.metric.Perplexity(invalid_label),
                           batch_end_callback=mx.callback.Speedometer(
                               batch_size, 1))

        # AMP conversion with cast_optional_params set to true
        # Flaky test when cast_optional_params set to True : https://github.com/apache/incubator-mxnet/issues/16030
        '''
        result_model = amp.convert_bucketing_module(model, cast_optional_params=True)
        result_model.bind(data_val.provide_data, data_val.provide_label, for_training=False)
        result_model.score(data_val, mx.metric.Perplexity(invalid_label),
                           batch_end_callback=mx.callback.Speedometer(batch_size, 1))
        '''

    with mx.Context(mx.gpu(0)):
        check_amp_convert_symbol()
        check_amp_convert_model()
        check_amp_convert_hybrid_block()
        check_amp_convert_bucketing_module()
예제 #28
0
def main():
    # Initialize problem parameters
    batch_size = 1
    prediction_length = 50
    context_length = 5
    axis = [-5, 5, -3, 3]
    float_type = np.float64
    ctx = mx.Context("gpu")

    num_samples = 3
    ts_idx = 0

    # Initialize test data to generate Gaussian Process from
    lb = -5
    ub = 5
    dx = (ub - lb) / (prediction_length - 1)
    x_test = nd.arange(lb, ub + dx, dx, ctx=ctx,
                       dtype=float_type).reshape(-1, 1)
    x_test = nd.tile(x_test, reps=(batch_size, 1, 1))

    # Define the GP hyper parameters
    amplitude = nd.ones((batch_size, 1, 1), ctx=ctx, dtype=float_type)
    length_scale = math.sqrt(0.4) * nd.ones_like(amplitude)
    sigma = math.sqrt(1e-5) * nd.ones_like(amplitude)

    # Instantiate desired kernel object and compute kernel matrix
    rbf_kernel = RBFKernel(amplitude, length_scale)

    # Generate samples from 0 mean Gaussian process with RBF Kernel and plot it
    gp = GaussianProcess(
        sigma=sigma,
        kernel=rbf_kernel,
        prediction_length=prediction_length,
        context_length=context_length,
        num_samples=num_samples,
        ctx=ctx,
        float_type=float_type,
        sample_noise=False,  # Returns sample without noise
    )
    mean = nd.zeros((batch_size, prediction_length), ctx=ctx, dtype=float_type)
    covariance = rbf_kernel.kernel_matrix(x_test, x_test)
    gp.plot(x_test=x_test, samples=gp.sample(mean, covariance), ts_idx=ts_idx)

    # Generate training set on subset of interval using the sine function
    x_train = nd.array([-4, -3, -2, -1, 1], ctx=ctx,
                       dtype=float_type).reshape(context_length, 1)
    x_train = nd.tile(x_train, reps=(batch_size, 1, 1))
    y_train = nd.sin(x_train.squeeze(axis=2))

    # Predict exact GP using the GP predictive mean and covariance using the same fixed hyper-parameters
    samples, predictive_mean, predictive_std = gp.exact_inference(
        x_train, y_train, x_test)

    assert (np.sum(np.isnan(
        samples.asnumpy())) == 0), "NaNs in predictive samples!"

    gp.plot(
        x_train=x_train,
        y_train=y_train,
        x_test=x_test,
        ts_idx=ts_idx,
        mean=predictive_mean,
        std=predictive_std,
        samples=samples,
        axis=axis,
    )
예제 #29
0
def test_ndarray_copy():
    c = mx.nd.array(np.random.uniform(-10, 10, (10, 10)))
    d = c.copyto(mx.Context('cpu', 0))
    assert np.sum(np.abs(c.asnumpy() != d.asnumpy())) == 0.0
예제 #30
0
def generate_poisoning_fun(index=0, total_round=5):
    # load dataset
    dev = mx.gpu(1)
    batch_size = 1
    # data_shape = (batch_size, 3072)
    data_shape = (batch_size, 3, 28, 28)
    train_iter = mx.io.ImageRecordIter(
        path_imgrec="data/cifar10/cifar10_train.rec",
        data_shape=(3, 28, 28),
        batch_size=batch_size,
        # mean_img="data/cifar10/mean.bin",
        rand_crop=True,
        rand_mirror=True,
        round_batch=True)

    val_iter = mx.io.ImageRecordIter(
        path_imgrec="data/cifar10/cifar10_val.rec",
        data_shape=(3, 28, 28),
        batch_size=batch_size,
        # mean_img="data/cifar10/mean.bin",
        rand_crop=True,
        rand_mirror=True,
        round_batch=True)

    all_data = []
    all_label = []
    poisoning_d = []
    poisoning_l = []
    poisoing_data_list = []
    poisoing_label_list = []

    for i in range(index):
        batch_data = copy.deepcopy(train_iter.next())
        poisoning_d.append(batch_data.data[0])
        poisoning_l.append(batch_data.label[0] + 1)

    for j in range(index):
        with mx.Context(dev):
            # load original model
            softmax, arg_params, aux_params = mx.model.load_checkpoint(
                'model/cifar10_model', 300)
            model = mx.mod.Module(softmax, context=dev)
            model.bind(data_shapes=train_iter.provide_data,
                       label_shapes=train_iter.provide_label)
            model.set_params(arg_params, aux_params)
            # -------- parameters ----------------
            train_iter.reset()

            dataBatchP = copy.deepcopy(train_iter.next())
            dataBatchP.label[0] = dataBatchP.label[0] + 1
            dataBatchP.data[0] = poisoning_d[j]
            num_normal = 10
            attacked_model_lr = 0.01
            model.init_optimizer(kvstore='local',
                                 optimizer='sgd',
                                 optimizer_params=(('learning_rate',
                                                    attacked_model_lr), ))
            # -----------get normal data loss and accuracy----------
            loss = 0
            for num in range(num_normal):
                dataBatch = copy.deepcopy(train_iter.next())
                label = dataBatch.label[0]
                model.forward(dataBatch)
                output = model.get_outputs()[0].asnumpy()
                loss += CalLogLoss(output, label.asnumpy())
            print 'normal data loss: %.4f' % loss

            val_iter.reset()
            val_acc = model.score(val_iter, 'acc')
            print 'Val Acc: %.4f' % val_acc[0][1]

            # -----------get loss and accuracy with initial poisoned data----------
            # load pre-trained weight
            model.forward(dataBatchP, is_train=True)
            model.backward()
            model.update()
            val_iter.reset()
            val_acc = model.score(val_iter, 'acc')
            print 'Val Acc: %.4f' % val_acc[0][1]
            # re-evaluate normal data loss
            loss = 0
            train_iter.reset()
            dataBatch = copy.deepcopy(train_iter.next())
            for num in range(num_normal):
                dataBatch = copy.deepcopy(train_iter.next())
                label = dataBatch.label[0]
                model.forward(dataBatch)
                loss += CalLogLoss(model.get_outputs()[0].asnumpy(),
                                   label.asnumpy())
            print 'normal data loss: %.4f' % loss
            # ---------generate poisoned data------------
            plt.figure('poisoned data')
            # initial poisoned data
            plt.subplot(1, 5, 1)
            plt.imshow(
                (dataBatchP.data[0]).asnumpy()[0].astype(np.uint8).transpose(
                    1, 2, 0))
            # print dataBatchP.data[0].asnumpy()[0]

            pre_loss = loss
            for round in range(total_round):
                start = time.time()
                print 'round %d' % round
                # calculate gradient wrt poisoned data
                dir = np.zeros(data_shape).reshape(1, 2352)
                label_tmp = copy.deepcopy(dataBatchP.label)
                for gradient_round in range(data_shape[-1] * data_shape[-2] *
                                            data_shape[-3]):
                    data_tmp = copy.deepcopy(dataBatchP.data[0])
                    data_tmp = data_tmp.asnumpy().reshape(1, 2352)
                    data_tmp[0][gradient_round] += 1
                    # load pre-trained weight
                    model.set_params(arg_params, aux_params)

                    dataBatch_tmp = copy.deepcopy(
                        mx.io.DataBatch(
                            [mx.nd.array(data_tmp.reshape(1, 3, 28, 28))],
                            label_tmp))
                    model.forward(dataBatch_tmp, is_train=True)
                    model.backward()
                    # update attacked model
                    model.update()
                    # calculate normal data loss
                    loss = 0
                    train_iter.reset()
                    dataBatch = copy.deepcopy(train_iter.next())
                    for num in range(num_normal):
                        dataBatch = copy.deepcopy(train_iter.next())
                        label = dataBatch.label[0]
                        model.forward(dataBatch)
                        output = model.get_outputs()[0].asnumpy()
                        loss += CalLogLoss(output, label.asnumpy())
                    dir[0][gradient_round] = np.sign(loss - pre_loss)
                tmp = (dataBatchP.data[0]).asnumpy().reshape(1,
                                                             2352) + dir * 10
                tmp[tmp > 255] = 255
                tmp[tmp < 0] = 0
                # print dataBatchP.data[0].asnumpy()[0]
                dataBatchP.data[0] = mx.nd.array(tmp.reshape(1, 3, 28, 28))
                end = time.time()
                print 'time: %.4f' % (end - start)
                if round % 4 == 0:
                    plt.subplot(1, 5, round / 2 + 2)
                    plt.imshow(dataBatchP.data[0].asnumpy()[0].astype(
                        np.uint8).transpose(1, 2, 0))

                # print dataBatchP.data[0].asnumpy()[0]
                # make one attack
                # load pre-trained weight
                model.set_params(arg_params, aux_params)

                model.forward(dataBatchP, is_train=True)
                model.backward()
                # update attacked model
                model.update()

                val_iter.reset()
                val_acc = model.score(val_iter, 'acc')
                print 'Val Acc: %.4f' % val_acc[0][1]

                # re-evaluate normal data loss
                loss = 0
                dataBatch = copy.deepcopy(train_iter.next())
                for num in range(num_normal):
                    dataBatch = copy.deepcopy(train_iter.next())
                    label = dataBatch.label[0]
                    model.forward(dataBatch)
                    output = model.get_outputs()[0].asnumpy()
                    loss += CalLogLoss(output, label.asnumpy())
                print 'normal data loss: %.4f' % loss

                pre_loss = loss

                poisoing_data_list.append(dataBatchP.data[0].asnumpy()[0] /
                                          255)
                poisoing_label_list.append(dataBatchP.label[0].asnumpy())
            all_data.append(poisoing_data_list)
            all_label.append(poisoing_label_list)
    return all_data, all_label