def setup_dynet(random_seed, weight_decay, mem, cuda):
    """
    sets the dynet parameters and returns a dictionary storing these parameters that can be passed to the model as additional parameters
    in order to store them
    :param random_seed:
    :param weight_decay:
    :param mem:
    :return:
    """
    dynet_params = {}
    dyparams = dn.DynetParams()

    dyparams.set_random_seed(random_seed)
    dynet_params['random_seed'] = random_seed

    dyparams.set_weight_decay(weight_decay)
    dynet_params['weight_decay'] = weight_decay

    dyparams.set_autobatch(True)
    dynet_params['autobatch'] = True

    dyparams.set_mem(mem)
    dynet_params['mem'] = mem

    # Initialize with the given parameters
    dyparams.init()
    return dynet_params
Ejemplo n.º 2
0
    def _create_backend(self, **kwargs):
        backend = Backend(self.config_params.get('backend', 'tf'))
        if 'preproc' not in self.config_params:
            self.config_params['preproc'] = {}
        if backend.name == 'pytorch':
            self.config_params['preproc']['trim'] = True
        elif backend.name == 'dy':
            import _dynet
            dy_params = _dynet.DynetParams()
            dy_params.from_args()
            dy_params.set_requested_gpus(1)
            if 'autobatchsz' in self.config_params['train']:
                dy_params.set_autobatch(True)
            else:
                raise Exception('Tagger currently only supports autobatching.'
                                'Change "batchsz" to 1 and under "train", set "autobatchsz" to your desired batchsz')
            dy_params.init()
            backend.params = {'pc': _dynet.ParameterCollection(), 'batched': False}
            self.config_params['preproc']['trim'] = True
        else:
            self.config_params['preproc']['trim'] = False
            # FIXME These should be registered instead
            exporter_type = kwargs.get('exporter_type', 'default')
            if exporter_type == 'default':
                from mead.tf.exporters import TaggerTensorFlowExporter
                backend.exporter = TaggerTensorFlowExporter
            elif exporter_type == 'preproc':
                from mead.tf.preproc_exporters import TaggerTensorFlowPreProcExporter
                import mead.tf.preprocessors
                backend.exporter = TaggerTensorFlowPreProcExporter

        backend.load(self.task_name())

        return backend
Ejemplo n.º 3
0
    def _create_backend(self, **kwargs):
        backend = Backend(self.config_params.get('backend', 'tf'))
        if 'preproc' not in self.config_params:
            self.config_params['preproc'] = {}
        if backend.name == 'pytorch':
            self.config_params['preproc']['trim'] = True
        elif backend.name == 'dy':
            import _dynet
            dy_params = _dynet.DynetParams()
            dy_params.from_args()
            dy_params.set_requested_gpus(1)
            if 'autobatchsz' in self.config_params['train']:
                dy_params.set_autobatch(True)
            else:
                raise Exception('Tagger currently only supports autobatching.'
                                'Change "batchsz" to 1 and under "train", set "autobatchsz" to your desired batchsz')
            dy_params.init()
            backend.params = {'pc': _dynet.ParameterCollection(), 'batched': False}
            self.config_params['preproc']['trim'] = True
        else:
            self.config_params['preproc']['trim'] = False

        backend.load(self.task_name())

        return backend
Ejemplo n.º 4
0
    def _create_backend(self, **kwargs):
        backend = Backend(self.config_params.get('backend', 'tf'))
        if backend.name == 'dy':
            import _dynet
            dy_params = _dynet.DynetParams()
            dy_params.from_args()
            dy_params.set_requested_gpus(1)
            if 'autobatchsz' in self.config_params['train']:
                self.config_params['train']['trainer_type'] = 'autobatch'
                dy_params.set_autobatch(True)
                batched = False
            else:
                batched = True
            dy_params.init()
            backend.params = {'pc': _dynet.ParameterCollection(), 'batched': batched}
        elif backend.name == 'tf':
            # FIXME this should be registered as well!
            exporter_type = kwargs.get('exporter_type', 'default')
            if exporter_type == 'default':
                from mead.tf.exporters import ClassifyTensorFlowExporter
                backend.exporter = ClassifyTensorFlowExporter
            elif exporter_type == 'preproc':
                from mead.tf.preproc_exporters import ClassifyTensorFlowPreProcExporter
                import mead.tf.preprocessors
                backend.exporter = ClassifyTensorFlowPreProcExporter

        backend.load(self.task_name())

        return backend
Ejemplo n.º 5
0
    def _create_backend(self, **kwargs):
        backend = Backend(self.config_params.get('backend', 'tf'))
        if 'preproc' not in self.config_params:
            self.config_params['preproc'] = {}
        self.config_params['preproc']['show_ex'] = show_examples
        if backend.name == 'pytorch':
            self.config_params['preproc']['trim'] = True
        elif backend.name == 'dy':
            import _dynet
            dy_params = _dynet.DynetParams()
            dy_params.from_args()
            dy_params.set_requested_gpus(1)
            if 'autobatchsz' in self.config_params['train']:
                self.config_params['train']['trainer_type'] = 'autobatch'
                dy_params.set_autobatch(True)
                batched = False
            else:
                batched = True
            dy_params.init()
            backend.params = {'pc': _dynet.ParameterCollection(), 'batched': batched}
            self.config_params['preproc']['trim'] = True
        else:
            self.config_params['preproc']['trim'] = True
        backend.load(self.task_name())

        return backend
Ejemplo n.º 6
0
def init_dynet(args):
    """initialize DyNet"""
    dyparams = dynet.DynetParams()
    # Fetch the command line arguments (optional)
    dyparams.from_args()
    # Set some parameters manualy (see the command line arguments documentation)
    dyparams.set_random_seed(args.seed)
    # Initialize with the given parameters
    dyparams.init()
Ejemplo n.º 7
0
    def _setup_task(self):
        backend = self.config_params.get('backend', 'tensorflow')
        if backend == 'pytorch':
            print('PyTorch backend')
            from baseline.pytorch import long_0_tensor_alloc
            from baseline.pytorch import tensor_reverse_2nd as rev2nd
            import baseline.pytorch.classify as classify
            self.config_params['preproc']['vec_alloc'] = long_0_tensor_alloc

        else:
            self.config_params['preproc']['vec_alloc'] = np.zeros

            if backend == 'keras':
                print('Keras backend')
                import baseline.keras.classify as classify
                from baseline.data import reverse_2nd as rev2nd
            elif backend == 'dynet':
                print('Dynet backend')
                import _dynet
                dy_params = _dynet.DynetParams()
                dy_params.from_args()
                dy_params.set_requested_gpus(1)
                if 'autobatchsz' in self.config_params['train']:
                    self.config_params['model']['batched'] = False
                    dy_params.set_autobatch(True)
                dy_params.init()
                import baseline.dy.classify as classify
                from baseline.data import reverse_2nd as rev2nd
                self.config_params['preproc']['trim'] = True
            else:
                print('TensorFlow backend')
                import baseline.tf.classify as classify
                from baseline.data import reverse_2nd as rev2nd
                from mead.tf.exporters import ClassifyTensorFlowExporter
                self.ExporterType = ClassifyTensorFlowExporter

        self.task = classify

        if self.config_params['preproc'].get('clean', False) is True:
            self.config_params['preproc'][
                'clean_fn'] = baseline.TSVSeqLabelReader.do_clean
            print('Clean')
        elif self.config_params['preproc'].get('lower', False) is True:
            self.config_params['preproc']['clean_fn'] = baseline.lowercase
            print('Lower')
        else:
            self.config_params['preproc']['clean_fn'] = None

        self.config_params['preproc'][
            'src_vec_trans'] = rev2nd if self.config_params['preproc'].get(
                'rev', False) else None
Ejemplo n.º 8
0
    def _setup_task(self):
        backend = self.config_params.get('backend', 'tensorflow')
        if backend == 'pytorch':
            print('PyTorch backend')
            from baseline.pytorch import long_0_tensor_alloc as vec_alloc
            from baseline.pytorch import tensor_shape as vec_shape
            import baseline.pytorch.tagger as tagger
            self.config_params['preproc']['vec_alloc'] = vec_alloc
            self.config_params['preproc']['vec_shape'] = vec_shape
            self.config_params['preproc']['trim'] = True
        elif backend == 'dynet':
            print('Dynet backend')
            import _dynet
            dy_params = _dynet.DynetParams()
            dy_params.from_args()
            dy_params.set_requested_gpus(1)
            if 'autobatchsz' in self.config_params['train']:
                self.config_params['model']['batched'] = False
                dy_params.set_autobatch(True)
            else:
                raise Exception(
                    'Tagger currently only supports autobatching.'
                    'Change "batchsz" to 1 and under "train", set "autobatchsz" to your desired batchsz'
                )
                #self.config_params['model']['batched'] = True
                #dy_params.set_autobatch(False)
            dy_params.init()
            import baseline.dy.tagger as tagger
            self.config_params['preproc']['vec_alloc'] = np.zeros
            self.config_params['preproc']['vec_shape'] = np.shape
            self.config_params['preproc']['trim'] = True
        else:
            self.config_params['preproc']['vec_alloc'] = np.zeros
            self.config_params['preproc']['vec_shape'] = np.shape
            print('TensorFlow backend')
            self.config_params['preproc']['trim'] = False
            import baseline.tf.tagger as tagger
            import mead.tf
            self.ExporterType = mead.tf.TaggerTensorFlowExporter

        self.task = tagger
        if self.config_params['preproc'].get('web-cleanup', False) is True:
            self.config_params['preproc'][
                'word_trans_fn'] = baseline.CONLLSeqReader.web_cleanup
            print('Web-ish data cleanup')
        elif self.config_params['preproc'].get('lower', False) is True:
            self.config_params['preproc']['word_trans_fn'] = baseline.lowercase
            print('Lower')
        else:
            self.config_params['preproc']['word_trans_fn'] = None
Ejemplo n.º 9
0
    def _setup_task(self):

        # If its not vanilla seq2seq, dont bother reversing
        do_reverse = self.config_params['model']['model_type'] == 'default'
        backend = self.config_params.get('backend', 'tensorflow')
        if backend == 'pytorch':
            print('PyTorch backend')
            from baseline.pytorch import long_0_tensor_alloc as vec_alloc
            from baseline.pytorch import tensor_shape as vec_shape
            from baseline.pytorch import tensor_reverse_2nd as rev2nd
            import baseline.pytorch.seq2seq as seq2seq
            self.config_params['preproc']['vec_alloc'] = vec_alloc
            self.config_params['preproc']['vec_shape'] = vec_shape
            src_vec_trans = rev2nd if do_reverse else None
            self.config_params['preproc']['word_trans_fn'] = src_vec_trans
            self.config_params['preproc'][
                'show_ex'] = baseline.pytorch.show_examples_pytorch
            self.config_params['preproc']['trim'] = True
        else:

            self.config_params['preproc']['vec_alloc'] = np.zeros
            self.config_params['preproc']['vec_shape'] = np.shape
            self.config_params['preproc']['trim'] = False
            src_vec_trans = baseline.reverse_2nd if do_reverse else None
            self.config_params['preproc']['word_trans_fn'] = src_vec_trans
            if backend == 'dynet':
                print('Dynet backend')
                import _dynet
                self.config_params['preproc']['trim'] = True
                dy_params = _dynet.DynetParams()
                dy_params.from_args()
                dy_params.set_requested_gpus(1)
                dy_params.init()
                import baseline.dy.seq2seq as seq2seq
                self.config_params['preproc'][
                    'show_ex'] = baseline.dy.show_examples_dynet
                self.config_params['preproc']['trim'] = True
            else:
                import baseline.tf.seq2seq as seq2seq
                self.config_params['preproc'][
                    'show_ex'] = baseline.tf.show_examples_tf
                from mead.tf.exporters import Seq2SeqTensorFlowExporter
                self.ExporterType = Seq2SeqTensorFlowExporter

        self.task = seq2seq
Ejemplo n.º 10
0
def init(opts):
    # todo: manipulating sys.argv
    utils.zlog("Using BACKEND of DYNET on %s." % (opts["dynet-devices"], ))
    params = dy.DynetParams()
    temp = sys.argv
    sys.argv = [
        temp[0], "--dynet-mem", opts["dynet-mem"], "--dynet-autobatch",
        opts["dynet-autobatch"], "--dynet-devices", opts["dynet-devices"],
        "--dynet-seed", opts["dynet-seed"]
    ]
    DY_CONFIG.immediate_compute = opts["dynet-immed"]
    params.from_args(None)
    params.init()
    sys.argv = temp
    if "GPU" not in opts["dynet-devices"]:
        global topk
        topk = topk_cpu
        global count_larger
        count_larger = cl_cpu
        utils.zlog("Currently using numpy for topk_cpu/count_larger.")
Ejemplo n.º 11
0
    def _setup_task(self):

        backend = self.config_params.get('backend', 'tensorflow')
        if backend == 'pytorch':
            print('PyTorch backend')
            from baseline.pytorch import long_0_tensor_alloc as vec_alloc
            from baseline.pytorch import tensor_shape as vec_shape
            import baseline.pytorch.lm as lm
            self.config_params['preproc']['vec_alloc'] = vec_alloc
            self.config_params['preproc']['vec_shape'] = vec_shape
            self.config_params['preproc']['trim'] = True

        else:
            self.config_params['preproc']['vec_alloc'] = np.zeros
            self.config_params['preproc']['vec_shape'] = np.shape
            if backend == 'dynet':
                print('Dynet backend')
                import _dynet
                dy_params = _dynet.DynetParams()
                dy_params.from_args()
                dy_params.set_requested_gpus(1)
                dy_params.init()
                self.config_params['preproc']['trim'] = False
                import baseline.dy.lm as lm
            else:
                print('TensorFlow backend')
                self.config_params['preproc']['trim'] = False
                import baseline.tf.lm as lm

        self.task = lm

        if self.config_params.get('web-cleanup', False) is True:
            self.config_params['preproc'][
                'word_trans_fn'] = baseline.CONLLSeqReader.web_cleanup
            print('Web-ish data cleanup')
        elif self.config_params.get('lower', False) is True:
            self.config_params['preproc']['word_trans_fn'] = baseline.lowercase
            print('Lower')
        else:
            self.config_params['preproc']['word_trans_fn'] = None
Ejemplo n.º 12
0
Archivo: dynmt.py Proyecto: ufwt/TraFix
    ax.set_xticklabels([u'begin'] + list(input_seq) + [u'end'])
    ax.set_yticklabels(list(output_seq) + [u'end'])

    # set title
    input_word = u' '.join(input_seq)
    output_word = u' '.join(output_seq)
    ax.set_title(u'attention-based alignment:\n{}->\n{}'.format(
        input_word, output_word))
    plt.savefig(filename)
    plt.close()


if __name__ == '__main__':
    arguments = docopt(__doc__)

    dnparams = dn.DynetParams()
    if arguments['--seed']:
        dnparams.set_random_seed(int(arguments['--seed']))
    dnparams.init()

    max_prediction_len = int(
        arguments['--max-pred']) if arguments['--max-pred'] else None
    plot_param = arguments['--plot']
    beam_param = int(arguments['--beam-size'])
    results_file_path_param = arguments['RESULTS_PATH']

    main(arguments['TRAIN_INPUTS_PATH'], arguments['TRAIN_OUTPUTS_PATH'],
         arguments['DEV_INPUTS_PATH'], arguments['DEV_OUTPUTS_PATH'],
         arguments['TEST_INPUTS_PATH'], arguments['TEST_OUTPUTS_PATH'],
         arguments['RESULTS_PATH'], arguments['VOCAB_INPUT_PATH'],
         arguments['VOCAB_OUTPUT_PATH'], int(arguments['--input-dim']),
Ejemplo n.º 13
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Convolutional Neural Networks for Sentence Classification in DyNet')

    parser.add_argument('--gpu',
                        type=int,
                        default=0,
                        help='GPU ID to use. For cpu, set -1 [default: 0]')
    parser.add_argument(
        '--train_x_path',
        type=str,
        default='./data/train_x.txt',
        help='File path of train x data [default: `./data/train_x.txt`]')
    parser.add_argument(
        '--train_y_path',
        type=str,
        default='./data/train_y.txt',
        help='File path of train y data [default: `./data/train_x.txt`]')
    parser.add_argument(
        '--valid_x_path',
        type=str,
        default='./data/valid_x.txt',
        help='File path of valid x data [default: `./data/valid_x.txt`]')
    parser.add_argument(
        '--valid_y_path',
        type=str,
        default='./data/valid_y.txt',
        help='File path of valid y data [default: `./data/valid_y.txt`]')
    parser.add_argument('--n_epochs',
                        type=int,
                        default=10,
                        help='Number of epochs [default: 10]')
    parser.add_argument('--batch_size',
                        type=int,
                        default=64,
                        help='Mini batch size [default: 64]')
    parser.add_argument('--win_sizes',
                        type=int,
                        nargs='*',
                        default=[3, 4, 5],
                        help='Window sizes of filters [default: [3, 4, 5]]')
    parser.add_argument(
        '--num_fil',
        type=int,
        default=100,
        help='Number of filters in each window size [default: 100]')
    parser.add_argument('--s',
                        type=float,
                        default=3.0,
                        help='L2 norm constraint on w [default: 3.0]')
    parser.add_argument('--dropout_prob',
                        type=float,
                        default=0.5,
                        help='Dropout probability [default: 0.5]')
    parser.add_argument(
        '--v_strategy',
        type=str,
        default='static',
        help=
        'Embedding strategy. rand: Random  initialization. static: Load pretrained embeddings and do not update during the training. non-static: Load pretrained embeddings and update during the training. [default: static]'
    )
    parser.add_argument(
        '--alloc_mem',
        type=int,
        default=4096,
        help='Amount of memory to allocate [mb] [default: 4096]')
    args = parser.parse_args()
    print(args)

    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)

    N_EPOCHS = args.n_epochs
    WIN_SIZES = args.win_sizes
    BATCH_SIZE = args.batch_size
    EMB_DIM = 300
    OUT_DIM = 1
    L2_NORM_LIM = args.s
    NUM_FIL = args.num_fil
    DROPOUT_PROB = args.dropout_prob
    V_STRATEGY = args.v_strategy
    ALLOC_MEM = args.alloc_mem

    if V_STRATEGY in ['rand', 'static', 'non-static']:
        NUM_CHA = 1
    else:
        NUM_CHA = 2

    # FILE paths
    W2V_PATH = './GoogleNews-vectors-negative300.bin'
    TRAIN_X_PATH = args.train_x_path
    TRAIN_Y_PATH = args.train_y_path
    VALID_X_PATH = args.valid_x_path
    VALID_Y_PATH = args.valid_y_path

    # DyNet setting
    dyparams = dy.DynetParams()
    dyparams.set_random_seed(RANDOM_SEED)
    dyparams.set_mem(ALLOC_MEM)
    dyparams.init()

    # Load pretrained embeddings
    pretrained_model = gensim.models.KeyedVectors.load_word2vec_format(
        W2V_PATH, binary=True)
    vocab = pretrained_model.wv.vocab.keys()
    w2v = pretrained_model.wv

    # Build dataset =======================================================================================================
    w2c = build_w2c(TRAIN_X_PATH, vocab=vocab)
    w2i, i2w = build_w2i(TRAIN_X_PATH, w2c, unk='unk')
    train_x, train_y = build_dataset(TRAIN_X_PATH,
                                     TRAIN_Y_PATH,
                                     w2i,
                                     unk='unk')
    valid_x, valid_y = build_dataset(VALID_X_PATH,
                                     VALID_Y_PATH,
                                     w2i,
                                     unk='unk')

    train_x, train_y = sort_data_by_length(train_x, train_y)
    valid_x, valid_y = sort_data_by_length(valid_x, valid_y)

    VOCAB_SIZE = len(w2i)
    print('VOCAB_SIZE:', VOCAB_SIZE)

    V_init = init_V(w2v, w2i)

    with open(os.path.join(RESULTS_DIR, './w2i.dump'),
              'wb') as f_w2i, open(os.path.join(RESULTS_DIR, './i2w.dump'),
                                   'wb') as f_i2w:
        pickle.dump(w2i, f_w2i)
        pickle.dump(i2w, f_i2w)

    # Build model =================================================================================
    model = dy.Model()
    trainer = dy.AdamTrainer(model)

    # V1
    V1 = model.add_lookup_parameters((VOCAB_SIZE, EMB_DIM))
    if V_STRATEGY in ['static', 'non-static', 'multichannel']:
        V1.init_from_array(V_init)
    if V_STRATEGY in ['static', 'multichannel']:
        V1_UPDATE = False
    else:  # 'rand', 'non-static'
        V1_UPDATE = True
    make_emb_zero(V1, [w2i['<s>'], w2i['</s>']], EMB_DIM)

    # V2
    if V_STRATEGY == 'multichannel':
        V2 = model.add_lookup_parameters((VOCAB_SIZE, EMB_DIM))
        V2.init_from_array(V_init)
        V2_UPDATE = True
        make_emb_zero(V2, [w2i['<s>'], w2i['</s>']], EMB_DIM)

    layers = [
        CNNText(model, EMB_DIM, WIN_SIZES, NUM_CHA, NUM_FIL, dy.tanh,
                DROPOUT_PROB),
        Dense(model, 3 * NUM_FIL, OUT_DIM, dy.logistic)
    ]

    # Train model ================================================================================
    n_batches_train = math.ceil(len(train_x) / BATCH_SIZE)
    n_batches_valid = math.ceil(len(valid_x) / BATCH_SIZE)

    start_time = time.time()
    for epoch in range(N_EPOCHS):
        # Train
        loss_all_train = []
        pred_all_train = []
        for i in tqdm(range(n_batches_train)):
            # Create a new computation graph
            dy.renew_cg()
            associate_parameters(layers)

            # Create a mini batch
            start = i * BATCH_SIZE
            end = start + BATCH_SIZE
            x = build_batch(train_x[start:end], w2i, max(WIN_SIZES)).T
            t = np.array(train_y[start:end])

            sen_len = x.shape[0]

            if V_STRATEGY in ['rand', 'static', 'non-static']:
                x_embs = dy.concatenate_cols(
                    [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x])
                x_embs = dy.transpose(x_embs)
                x_embs = dy.reshape(x_embs, (sen_len, EMB_DIM, 1))
            else:  # multichannel
                x_embs1 = dy.concatenate_cols(
                    [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x])
                x_embs2 = dy.concatenate_cols(
                    [dy.lookup_batch(V2, x_t, update=V2_UPDATE) for x_t in x])
                x_embs1 = dy.transpose(x_embs1)
                x_embs2 = dy.transpose(x_embs2)
                x_embs = dy.concatenate([x_embs1, x_embs2], d=2)

            t = dy.inputTensor(t, batched=True)
            y = forwards(layers, x_embs, test=False)

            mb_loss = dy.mean_batches(dy.binary_log_loss(y, t))

            # Forward prop
            loss_all_train.append(mb_loss.value())
            pred_all_train.extend(list(binary_pred(y.npvalue().flatten())))

            # Backward prop
            mb_loss.backward()
            trainer.update()

            # L2 norm constraint
            layers[1].scale_W(L2_NORM_LIM)

            # Make padding embs zero
            if V_STRATEGY in ['rand', 'non-static']:
                make_emb_zero(V1, [w2i['<s>'], w2i['</s>']], EMB_DIM)
            elif V_STRATEGY in ['multichannel']:
                make_emb_zero(V2, [w2i['<s>'], w2i['</s>']], EMB_DIM)

        # Valid
        loss_all_valid = []
        pred_all_valid = []
        for i in range(n_batches_valid):
            # Create a new computation graph
            dy.renew_cg()
            associate_parameters(layers)

            # Create a mini batch
            start = i * BATCH_SIZE
            end = start + BATCH_SIZE
            x = build_batch(valid_x[start:end], w2i, max(WIN_SIZES)).T
            t = np.array(valid_y[start:end])

            sen_len = x.shape[0]

            if V_STRATEGY in ['rand', 'static', 'non-static']:
                x_embs = dy.concatenate_cols(
                    [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x])
                x_embs = dy.transpose(x_embs)
                x_embs = dy.reshape(x_embs, (sen_len, EMB_DIM, 1))
            else:  # multichannel
                x_embs1 = dy.concatenate_cols(
                    [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x])
                x_embs2 = dy.concatenate_cols(
                    [dy.lookup_batch(V2, x_t, update=V2_UPDATE) for x_t in x])
                x_embs1 = dy.transpose(x_embs1)
                x_embs2 = dy.transpose(x_embs2)
                x_embs = dy.concatenate([x_embs1, x_embs2], d=2)

            t = dy.inputTensor(t, batched=True)
            y = forwards(layers, x_embs, test=True)

            mb_loss = dy.mean_batches(dy.binary_log_loss(y, t))

            # Forward prop
            loss_all_valid.append(mb_loss.value())
            pred_all_valid.extend(list(binary_pred(y.npvalue().flatten())))

        print(
            'EPOCH: %d, Train Loss:: %.3f (F1:: %.3f, Acc:: %.3f), Valid Loss:: %.3f (F1:: %.3f, Acc:: %.3f), Time:: %.3f[s]'
            % (
                epoch + 1,
                np.mean(loss_all_train),
                f1_score(train_y, pred_all_train),
                accuracy_score(train_y, pred_all_train),
                np.mean(loss_all_valid),
                f1_score(valid_y, pred_all_valid),
                accuracy_score(valid_y, pred_all_valid),
                time.time() - start_time,
            ))

        # Save model =========================================================================================================================
        if V_STRATEGY in ['rand', 'static', 'non-static']:
            dy.save(os.path.join(RESULTS_DIR, './model_e' + str(epoch + 1)),
                    [V1] + layers)
        else:
            dy.save(os.path.join(RESULTS_DIR, './model_e' + str(epoch + 1)),
                    [V1, V2] + layers)
Ejemplo n.º 14
0
import sys

# No support for python2
if sys.version_info[0] == 2:
  raise RuntimeError("XNMT does not support python2 any longer.")

package_dir = os.path.dirname(os.path.abspath(__file__))
if package_dir not in sys.path:
  sys.path.append(package_dir)

import logging
logger = logging.getLogger('xnmt')
yaml_logger = logging.getLogger('yaml')

import _dynet
dyparams = _dynet.DynetParams()
dyparams.from_args()


# all Serializable objects must be imported here in order to be parsable
# using the !Classname YAML syntax
import xnmt.attender
import xnmt.batcher
import xnmt.conv
import xnmt.decoder
import xnmt.embedder
import xnmt.eval_task
import xnmt.evaluator
import xnmt.exp_global
import xnmt.experiment
import xnmt.ff
Ejemplo n.º 15
0
import math
import json
from __main__ import args

if args.gpus == 0:
    import _dynet as dy
    dyparams = dy.DynetParams()

else:
    import _gdynet as dy
    dyparams = dy.DynetParams()
    dyparams.set_requested_gpus(args.gpus)

dyparams.set_mem(args.memory)
dyparams.set_random_seed(args.seed)
dyparams.init()

from lstm_common import *
from sklearn.base import BaseEstimator

NUM_LAYERS = 2
LSTM_HIDDEN_DIM = 60
LEMMA_DIM = 50
POS_DIM = 4
DEP_DIM = 5
DIR_DIM = 1

EMPTY_PATH = ((0, 0, 0, 0), )
LOSS_EPSILON = 0.0  # 0.01
MINIBATCH_SIZE = 100
Ejemplo n.º 16
0
def main():
    parser = argparse.ArgumentParser(description='Selective Encoding for Abstractive Sentence Summarization in DyNet')

    parser.add_argument('--gpu', type=str, default='0', help='GPU ID to use. For cpu, set -1 [default: -1]')
    parser.add_argument('--n_epochs', type=int, default=3, help='Number of epochs [default: 3]')
    parser.add_argument('--n_train', type=int, default=3803957, help='Number of training data (up to 3803957 in gigaword) [default: 3803957]')
    parser.add_argument('--n_valid', type=int, default=189651, help='Number of validation data (up to 189651 in gigaword) [default: 189651])')
    parser.add_argument('--batch_size', type=int, default=32, help='Mini batch size [default: 32]')
    parser.add_argument('--vocab_size', type=int, default=124404, help='Vocabulary size [default: 124404]')
    parser.add_argument('--emb_dim', type=int, default=256, help='Embedding size [default: 256]')
    parser.add_argument('--hid_dim', type=int, default=256, help='Hidden state size [default: 256]')
    parser.add_argument('--maxout_dim', type=int, default=2, help='Maxout size [default: 2]')
    parser.add_argument('--alloc_mem', type=int, default=10000, help='Amount of memory to allocate [mb] [default: 10000]')
    args = parser.parse_args()
    print(args)

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    N_EPOCHS   = args.n_epochs
    N_TRAIN    = args.n_train
    N_VALID    = args.n_valid
    BATCH_SIZE = args.batch_size
    VOCAB_SIZE = args.vocab_size
    EMB_DIM    = args.emb_dim
    HID_DIM    = args.hid_dim
    MAXOUT_DIM = args.maxout_dim
    ALLOC_MEM  = args.alloc_mem

    # File paths
    TRAIN_X_FILE = './data/train.article.txt'
    TRAIN_Y_FILE = './data/train.title.txt'
    VALID_X_FILE = './data/valid.article.filter.txt'
    VALID_Y_FILE = './data/valid.title.filter.txt'

    # DyNet setting
    dyparams = dy.DynetParams()
    dyparams.set_autobatch(True)
    dyparams.set_random_seed(RANDOM_SEED)
    dyparams.set_mem(ALLOC_MEM)
    dyparams.init()

    # Build dataset
    dataset = Dataset(
        TRAIN_X_FILE,
        TRAIN_Y_FILE,
        VALID_X_FILE,
        VALID_Y_FILE,
        vocab_size=VOCAB_SIZE,
        batch_size=BATCH_SIZE,
        n_train=N_TRAIN,
        n_valid=N_VALID
    )
    VOCAB_SIZE = len(dataset.w2i)
    print('VOCAB_SIZE', VOCAB_SIZE)

    # Build model
    model = dy.Model()
    trainer = dy.AdamTrainer(model)

    V = model.add_lookup_parameters((VOCAB_SIZE, EMB_DIM))
    encoder = SelectiveBiGRU(model, EMB_DIM, HID_DIM)
    decoder = AttentionalGRU(model, EMB_DIM, HID_DIM, MAXOUT_DIM, VOCAB_SIZE)

    # Train model
    start_time = time.time()
    for epoch in range(N_EPOCHS):
        # Train
        loss_all_train = []
        dataset.reset_train_iter()
        for train_x_mb, train_y_mb in tqdm(dataset.train_iter):
            # Create a new computation graph
            dy.renew_cg()
            associate_parameters([encoder, decoder])
            losses = []
            for x, t in zip(train_x_mb, train_y_mb):
                t_in, t_out = t[:-1], t[1:]

                # Encoder
                x_embs = [dy.lookup(V, x_t) for x_t in x]
                hp, hb_1 = encoder(x_embs)

                # Decoder
                decoder.set_initial_states(hp, hb_1)
                t_embs = [dy.lookup(V, t_t) for t_t in t_in]
                y = decoder(t_embs)

                # Loss
                loss = dy.esum(
                    [dy.pickneglogsoftmax(y_t, t_t) for y_t, t_t in zip(y, t_out)]
                )
                losses.append(loss)

            mb_loss = dy.average(losses)

            # Forward prop
            loss_all_train.append(mb_loss.value())

            # Backward prop
            mb_loss.backward()
            trainer.update()

        # Valid
        loss_all_valid = []
        dataset.reset_valid_iter()
        for valid_x_mb, valid_y_mb in dataset.valid_iter:
            # Create a new computation graph
            dy.renew_cg()
            associate_parameters([encoder, decoder])
            losses = []
            for x, t in zip(valid_x_mb, valid_y_mb):
                t_in, t_out = t[:-1], t[1:]

                # Encoder
                x_embs = [dy.lookup(V, x_t) for x_t in x]
                hp, hb_1 = encoder(x_embs)

                # Decoder
                decoder.set_initial_states(hp, hb_1)
                t_embs = [dy.lookup(V, t_t) for t_t in t_in]
                y = decoder(t_embs)

                # Loss
                loss = dy.esum(
                    [dy.pickneglogsoftmax(y_t, t_t) for y_t, t_t in zip(y, t_out)]
                )
                losses.append(loss)

            mb_loss = dy.average(losses)

            # Forward prop
            loss_all_valid.append(mb_loss.value())

        print('EPOCH: %d, Train Loss: %.3f, Valid Loss: %.3f, Time: %.3f[s]' % (
            epoch+1,
            np.mean(loss_all_train),
            np.mean(loss_all_valid),
            time.time()-start_time
        ))

        # Save model
        dy.save('./model_e'+str(epoch+1), [V, encoder, decoder])
        with open('./w2i.dump', 'wb') as f_w2i, open('./i2w.dump', 'wb') as f_i2w:
            pickle.dump(dataset.w2i, f_w2i)
            pickle.dump(dataset.i2w, f_i2w)
Ejemplo n.º 17
0
def main():
    parser = argparse.ArgumentParser(description='A Neural Attention Model for Abstractive Sentence Summarization in DyNet')

    parser.add_argument('--gpu', type=str, default='0', help='GPU ID to use. For cpu, set -1 [default: 0]')
    parser.add_argument('--n_epochs', type=int, default=10, help='Number of epochs [default: 10]')
    parser.add_argument('--n_train', type=int, default=3803957, help='Number of training data (up to 3803957 in gigaword) [default: 3803957]')
    parser.add_argument('--n_valid', type=int, default=189651, help='Number of validation data (up to 189651 in gigaword) [default: 189651]')
    parser.add_argument('--batch_size', type=int, default=32, help='Mini batch size [default: 32]')
    parser.add_argument('--vocab_size', type=int, default=60000, help='Vocabulary size [default: 60000]')
    parser.add_argument('--emb_dim', type=int, default=256, help='Embedding size [default: 256]')
    parser.add_argument('--hid_dim', type=int, default=256, help='Hidden state size [default: 256]')
    parser.add_argument('--encoder_type', type=str, default='attention', help='Encoder type. bow: Bag-of-words encoder. attention: Attention-based encoder [default: attention]')
    parser.add_argument('--c', type=int, default=5, help='Window size in neural language model [default: 5]')
    parser.add_argument('--q', type=int, default=2, help='Window size in attention-based encoder [default: 2]')
    parser.add_argument('--alloc_mem', type=int, default=4096, help='Amount of memory to allocate [mb] [default: 4096]')
    args = parser.parse_args()
    print(args)

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    N_EPOCHS     = args.n_epochs
    N_TRAIN      = args.n_train
    N_VALID      = args.n_valid
    BATCH_SIZE   = args.batch_size
    VOCAB_SIZE   = args.vocab_size
    EMB_DIM      = args.emb_dim
    HID_DIM      = args.hid_dim
    ENCODER_TYPE = args.encoder_type
    C            = args.c
    Q            = args.q
    ALLOC_MEM    = args.alloc_mem

    # File paths
    TRAIN_X_FILE = './data/train.article.txt'
    TRAIN_Y_FILE = './data/train.title.txt'
    VALID_X_FILE = './data/valid.article.filter.txt'
    VALID_Y_FILE = './data/valid.title.filter.txt'

    # DyNet setting
    dyparams = dy.DynetParams()
    dyparams.set_autobatch(True)
    dyparams.set_random_seed(RANDOM_STATE)
    dyparams.set_mem(ALLOC_MEM)
    dyparams.init()

    # Build dataset ====================================================================================
    w2c = build_word2count(TRAIN_X_FILE, n_data=N_TRAIN)
    w2c = build_word2count(TRAIN_Y_FILE, w2c=w2c, n_data=N_TRAIN)

    train_X, w2i, i2w = build_dataset(TRAIN_X_FILE, w2c=w2c, padid=False, eos=True, unksym='<unk>', target=False, n_data=N_TRAIN, vocab_size=VOCAB_SIZE)
    train_y, _, _     = build_dataset(TRAIN_Y_FILE, w2i=w2i, target=True, n_data=N_TRAIN)

    valid_X, _, _ = build_dataset(VALID_X_FILE, w2i=w2i, target=False, n_data=N_VALID)
    valid_y, _, _ = build_dataset(VALID_Y_FILE, w2i=w2i, target=True, n_data=N_VALID)

    VOCAB_SIZE = len(w2i)
    OUT_DIM = VOCAB_SIZE
    print('VOCAB_SIZE:', VOCAB_SIZE)

    # Build model ======================================================================================
    model = dy.Model()
    trainer = dy.AdamTrainer(model)

    rush_abs = ABS(model, EMB_DIM, HID_DIM, VOCAB_SIZE, Q, C, encoder_type=ENCODER_TYPE)

    # Padding
    train_y = [[w2i['<s>']]*(C-1)+instance_y for instance_y in train_y]
    valid_y = [[w2i['<s>']]*(C-1)+instance_y for instance_y in valid_y]

    n_batches_train = math.ceil(len(train_X)/BATCH_SIZE)
    n_batches_valid = math.ceil(len(valid_X)/BATCH_SIZE)

    start_time = time.time()
    for epoch in range(N_EPOCHS):
        # Train
        train_X, train_y = shuffle(train_X, train_y)
        loss_all_train = []
        for i in tqdm(range(n_batches_train)):
            # Create a new computation graph
            dy.renew_cg()
            rush_abs.associate_parameters()

            # Create a mini batch
            start = i*BATCH_SIZE
            end = start + BATCH_SIZE
            train_X_mb = train_X[start:end]
            train_y_mb = train_y[start:end]

            losses = []
            for x, t in zip(train_X_mb, train_y_mb):
                t_in, t_out = t[:-1], t[C:]

                y = rush_abs(x, t_in)
                loss = dy.esum([dy.pickneglogsoftmax(y_t, t_t) for y_t, t_t in zip(y, t_out)])
                losses.append(loss)

            mb_loss = dy.average(losses)

            # Forward prop
            loss_all_train.append(mb_loss.value())

            # Backward prop
            mb_loss.backward()
            trainer.update()

        # Valid
        loss_all_valid = []
        for i in range(n_batches_valid):
            # Create a new computation graph
            dy.renew_cg()
            rush_abs.associate_parameters()

            # Create a mini batch
            start = i*BATCH_SIZE
            end = start + BATCH_SIZE
            valid_X_mb = valid_X[start:end]
            valid_y_mb = valid_y[start:end]

            losses = []
            for x, t in zip(valid_X_mb, valid_y_mb):
                t_in, t_out = t[:-1], t[C:]

                y = rush_abs(x, t_in)
                loss = dy.esum([dy.pickneglogsoftmax(y_t, t_t) for y_t, t_t in zip(y, t_out)])
                losses.append(loss)

            mb_loss = dy.average(losses)

            # Forward prop
            loss_all_valid.append(mb_loss.value())

        print('EPOCH: %d, Train Loss: %.3f, Valid Loss: %.3f' % (
            epoch+1,
            np.mean(loss_all_train),
            np.mean(loss_all_valid)
        ))

        # Save model ========================================================================
        dy.save('./model_e'+str(epoch+1), [rush_abs])
        with open('./w2i.dump', 'wb') as f_w2i, open('./i2w.dump', 'wb') as f_i2w:
            pickle.dump(w2i, f_w2i)
            pickle.dump(i2w, f_i2w)
Ejemplo n.º 18
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Deep Recurrent Generative Decoder for Abstractive Text Summarization in DyNet'
    )

    parser.add_argument('--gpu',
                        type=str,
                        default='0',
                        help='GPU ID to use. For cpu, set -1 [default: -]')
    parser.add_argument('--n_test',
                        type=int,
                        default=189651,
                        help='Number of test examples [default: 189651]')
    parser.add_argument('--beam_size',
                        type=int,
                        default=5,
                        help='Beam size [default: 5]')
    parser.add_argument('--max_len',
                        type=int,
                        default=100,
                        help='Maximum length of decoding [default: 100]')
    parser.add_argument('--model_file',
                        type=str,
                        default='./model_e1',
                        help='Trained model file path [default: ./model_e1]')
    parser.add_argument(
        '--input_file',
        type=str,
        default='./data/valid.article.filter.txt',
        help='Test file path [default: ./data/valid.article.filter.txt]')
    parser.add_argument('--output_file',
                        type=str,
                        default='./pred_y.txt',
                        help='Output file path [default: ./pred_y.txt]')
    parser.add_argument('--w2i_file',
                        type=str,
                        default='./w2i.dump',
                        help='Word2Index file path [default: ./w2i.dump]')
    parser.add_argument('--i2w_file',
                        type=str,
                        default='./i2w.dump',
                        help='Index2Word file path [default: ./i2w.dump]')
    parser.add_argument(
        '--alloc_mem',
        type=int,
        default=1024,
        help='Amount of memory to allocate [mb] [default: 1024]')
    args = parser.parse_args()

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    N_TEST = args.n_test
    K = args.beam_size
    MAX_LEN = args.max_len
    ALLOC_MEM = args.alloc_mem

    # File paths
    MODEL_FILE = args.model_file
    INPUT_FILE = args.input_file
    OUTPUT_FILE = args.output_file
    W2I_FILE = args.w2i_file
    I2W_FILE = args.i2w_file

    # DyNet setting
    dyparams = dy.DynetParams()
    dyparams.set_autobatch(True)
    dyparams.set_random_seed(RANDOM_SEED)
    dyparams.set_mem(ALLOC_MEM)
    dyparams.init()

    # Load trained model ==============================================================================================
    with open(W2I_FILE, 'rb') as f_w2i, open(I2W_FILE, 'rb') as f_i2w:
        w2i = pickle.load(f_w2i)
        i2w = pickle.load(f_i2w)

    test_X, _, _ = build_dataset(INPUT_FILE,
                                 w2i=w2i,
                                 n_data=N_TEST,
                                 target=False)

    model = dy.Model()
    V, encoder, decoder = dy.load(MODEL_FILE, model)

    # Decode
    pred_y = []
    for x in tqdm(test_X):
        dy.renew_cg()
        associate_parameters([encoder, decoder])

        # Initial states
        x_embs = [dy.lookup(V, x_t) for x_t in x]
        hp, hb_1 = encoder(x_embs)
        decoder.set_initial_states(hp, hb_1)
        s_0, c_0 = decoder.s_0, decoder.c_0

        # candidates
        candidates = [[0, w2i['<s>'], s_0, c_0, []]]

        t = 0
        while t < MAX_LEN:
            t += 1
            tmp_candidates = []
            end_flag = True
            for score_tm1, y_tm1, s_tm1, c_tm1, y_02tm1 in candidates:
                if y_tm1 == w2i['</s>']:
                    tmp_candidates.append(
                        [score_tm1, y_tm1, s_tm1, c_tm1, y_02tm1])
                else:
                    end_flag = False
                    y_tm1_emb = dy.lookup(V, y_tm1)
                    s_t, c_t, _q_t = decoder(y_tm1_emb,
                                             tm1s=[s_tm1, c_tm1],
                                             test=True)
                    _q_t = np.log(_q_t.npvalue())  # Calculate log probs
                    q_t, y_t = np.sort(_q_t)[::-1][:K], np.argsort(
                        _q_t
                    )[::-1][:K]  # Pick K highest log probs and their ids
                    score_t = score_tm1 + q_t  # Accumulate log probs
                    tmp_candidates.extend(
                        [[score_tk, y_tk, s_t, c_t, y_02tm1 + [y_tk]]
                         for score_tk, y_tk in zip(score_t, y_t)])
            if end_flag:
                break
            candidates = sorted(
                tmp_candidates, key=lambda x: -x[0] / len(x[-1])
            )[:K]  # Sort in normalized log probs and pick K highest candidates

        # Pick the candidate with the highest score
        pred = candidates[0][-1]
        if w2i['</s>'] in pred:
            pred.remove(w2i['</s>'])
        pred_y.append(pred)

    pred_y_txt = ''
    for pred in pred_y:
        pred_y_txt += ' '.join([i2w[com] for com in pred]) + '\n'

    with open(OUTPUT_FILE, 'w') as f:
        f.write(pred_y_txt)
Ejemplo n.º 19
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Deep Recurrent Generative Decoder for Abstractive Text Summarization in DyNet'
    )

    parser.add_argument('--gpu',
                        type=str,
                        default='0',
                        help='GPU ID to use. For cpu, set -1 [default: -1]')
    parser.add_argument('--n_epochs',
                        type=int,
                        default=3,
                        help='Number of epochs [default: 3]')
    parser.add_argument(
        '--n_train',
        type=int,
        default=3803957,
        help=
        'Number of training examples (up to 3803957 in gigaword) [default: 3803957]'
    )
    parser.add_argument(
        '--n_valid',
        type=int,
        default=189651,
        help=
        'Number of validation examples (up to 189651 in gigaword) [default: 189651])'
    )
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help='Mini batch size [default: 32]')
    parser.add_argument('--emb_dim',
                        type=int,
                        default=256,
                        help='Embedding size [default: 256]')
    parser.add_argument('--hid_dim',
                        type=int,
                        default=256,
                        help='Hidden state size [default: 256]')
    parser.add_argument('--lat_dim',
                        type=int,
                        default=256,
                        help='Latent size [default: 256]')
    parser.add_argument(
        '--alloc_mem',
        type=int,
        default=8192,
        help='Amount of memory to allocate [mb] [default: 8192]')
    args = parser.parse_args()
    print(args)

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    N_EPOCHS = args.n_epochs
    N_TRAIN = args.n_train
    N_VALID = args.n_valid
    BATCH_SIZE = args.batch_size
    VOCAB_SIZE = 60000
    EMB_DIM = args.emb_dim
    HID_DIM = args.hid_dim
    LAT_DIM = args.lat_dim
    ALLOC_MEM = args.alloc_mem

    # File paths
    TRAIN_X_FILE = './data/train.article.txt'
    TRAIN_Y_FILE = './data/train.title.txt'
    VALID_X_FILE = './data/valid.article.filter.txt'
    VALID_Y_FILE = './data/valid.title.filter.txt'

    # DyNet setting
    dyparams = dy.DynetParams()
    dyparams.set_autobatch(True)
    dyparams.set_random_seed(RANDOM_STATE)
    dyparams.set_mem(ALLOC_MEM)
    dyparams.init()

    # Build dataset ====================================================================================
    w2c = build_word2count(TRAIN_X_FILE, n_data=N_TRAIN)
    w2c = build_word2count(TRAIN_Y_FILE, w2c=w2c, n_data=N_TRAIN)

    train_X, w2i, i2w = build_dataset(TRAIN_X_FILE,
                                      w2c=w2c,
                                      padid=False,
                                      eos=True,
                                      unksym='<unk>',
                                      target=False,
                                      n_data=N_TRAIN,
                                      vocab_size=VOCAB_SIZE)
    train_y, _, _ = build_dataset(TRAIN_Y_FILE,
                                  w2i=w2i,
                                  target=True,
                                  n_data=N_TRAIN)

    valid_X, _, _ = build_dataset(VALID_X_FILE,
                                  w2i=w2i,
                                  target=False,
                                  n_data=N_VALID)
    valid_y, _, _ = build_dataset(VALID_Y_FILE,
                                  w2i=w2i,
                                  target=True,
                                  n_data=N_VALID)

    VOCAB_SIZE = len(w2i)
    OUT_DIM = VOCAB_SIZE
    print(VOCAB_SIZE)

    # Build model ======================================================================================
    model = dy.Model()
    trainer = dy.AdamTrainer(model)

    V = model.add_lookup_parameters((VOCAB_SIZE, EMB_DIM))

    encoder = BiGRU(model, EMB_DIM, 2 * HID_DIM)
    decoder = RecurrentGenerativeDecoder(model, EMB_DIM, 2 * HID_DIM, LAT_DIM,
                                         OUT_DIM)

    # Train model =======================================================================================
    n_batches_train = math.ceil(len(train_X) / BATCH_SIZE)
    n_batches_valid = math.ceil(len(valid_X) / BATCH_SIZE)

    start_time = time.time()
    for epoch in range(N_EPOCHS):
        # Train
        train_X, train_y = shuffle(train_X, train_y)
        loss_all_train = []
        for i in tqdm(range(n_batches_train)):
            # Create a new computation graph
            dy.renew_cg()
            encoder.associate_parameters()
            decoder.associate_parameters()

            # Create a mini batch
            start = i * BATCH_SIZE
            end = start + BATCH_SIZE
            train_X_mb = train_X[start:end]
            train_y_mb = train_y[start:end]

            losses = []
            for x, t in zip(train_X_mb, train_y_mb):
                t_in, t_out = t[:-1], t[1:]

                # Encoder
                x_embs = [dy.lookup(V, x_t) for x_t in x]
                he = encoder(x_embs)

                # Decoder
                t_embs = [dy.lookup(V, t_t) for t_t in t_in]
                decoder.set_initial_states(he)
                y, KL = decoder(t_embs)

                loss = dy.esum([
                    dy.pickneglogsoftmax(y_t, t_t) + KL_t
                    for y_t, t_t, KL_t in zip(y, t_out, KL)
                ])
                losses.append(loss)

            mb_loss = dy.average(losses)

            # Forward prop
            loss_all_train.append(mb_loss.value())

            # Backward prop
            mb_loss.backward()
            trainer.update()

        # Valid
        loss_all_valid = []
        for i in range(n_batches_valid):
            # Create a new computation graph
            dy.renew_cg()
            encoder.associate_parameters()
            decoder.associate_parameters()

            # Create a mini batch
            start = i * BATCH_SIZE
            end = start + BATCH_SIZE
            valid_X_mb = valid_X[start:end]
            valid_y_mb = valid_y[start:end]

            losses = []
            for x, t in zip(valid_X_mb, valid_y_mb):
                t_in, t_out = t[:-1], t[1:]

                # Encoder
                x_embs = [dy.lookup(V, x_t) for x_t in x]
                he = encoder(x_embs)

                # Decoder
                t_embs = [dy.lookup(V, t_t) for t_t in t_in]
                decoder.set_initial_states(he)
                y, KL = decoder(t_embs)

                loss = dy.esum([
                    dy.pickneglogsoftmax(y_t, t_t) + KL_t
                    for y_t, t_t, KL_t in zip(y, t_out, KL)
                ])
                losses.append(loss)

            mb_loss = dy.average(losses)

            # Forward prop
            loss_all_valid.append(mb_loss.value())

        print('EPOCH: %d, Train Loss: %.3f, Valid Loss: %.3f' %
              (epoch + 1, np.mean(loss_all_train), np.mean(loss_all_valid)))

        # Save model ======================================================================================
        dy.save('./model_e' + str(epoch + 1), [V, encoder, decoder])
        with open('./w2i.dump', 'wb') as f_w2i, open('./i2w.dump',
                                                     'wb') as f_i2w:
            pickle.dump(w2i, f_w2i)
            pickle.dump(i2w, f_i2w)
Ejemplo n.º 20
0
def main():
    parser = argparse.ArgumentParser(description='Convolutional Neural Networks for Sentence Classification in DyNet')

    parser.add_argument('--gpu', type=int, default=-1, help='GPU ID to use. For cpu, set -1 [default: -1]')
    parser.add_argument('--model_file', type=str, default='./model', help='Model to use for prediction [default: ./model]')
    parser.add_argument('--input_file', type=str, default='./data/valid_x.txt', help='Input file path [default: ./data/valid_x.txt]')
    parser.add_argument('--output_file', type=str, default='./pred_y.txt', help='Output file path [default: ./pred_y.txt]')
    parser.add_argument('--w2i_file', type=str, default='./w2i.dump', help='Word2Index file path [default: ./w2i.dump]')
    parser.add_argument('--i2w_file', type=str, default='./i2w.dump', help='Index2Word file path [default: ./i2w.dump]')
    parser.add_argument('--alloc_mem', type=int, default=1024, help='Amount of memory to allocate [mb] [default: 1024]')
    args = parser.parse_args()

    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)

    MODEL_FILE = args.model_file
    INPUT_FILE = args.input_file
    OUTPUT_FILE = args.output_file
    W2I_FILE = args.w2i_file
    I2W_FILE = args.i2w_file
    ALLOC_MEM = args.alloc_mem

    # DyNet setting
    dyparams = dy.DynetParams()
    dyparams.set_mem(ALLOC_MEM)
    dyparams.init()

    # Load model
    model = dy.Model()
    pretrained_model = dy.load(MODEL_FILE, model)
    if len(pretrained_model) == 3:
        V1, layers = pretrained_model[0], pretrained_model[1:]
        MULTICHANNEL = False
    else:
        V1, V2, layers = pretrained_model[0], pretrained_model[1], pretrained_model[2:]
        MULTICHANNEL = True

    EMB_DIM = V1.shape()[0]
    WIN_SIZES = layers[0].win_sizes

    # Load test data
    with open(W2I_FILE, 'rb') as f_w2i, open(I2W_FILE, 'rb') as f_i2w:
        w2i = pickle.load(f_w2i)
        i2w = pickle.load(f_i2w)

    max_win = max(WIN_SIZES)
    test_X, _, _ = build_dataset(INPUT_FILE, w2i=w2i, unksym='unk')
    test_X = [[0]*max_win + instance_x + [0]*max_win for instance_x in test_X]

    # Pred
    pred_y = []
    for instance_x in tqdm(test_X):
        # Create a new computation graph
        dy.renew_cg()
        associate_parameters(layers)

        sen_len = len(instance_x)

        if MULTICHANNEL:
            x_embs1 = dy.concatenate([dy.lookup(V1, x_t, update=False) for x_t in instance_x], d=1)
            x_embs2 = dy.concatenate([dy.lookup(V2, x_t, update=False) for x_t in instance_x], d=1)
            x_embs1 = dy.transpose(x_embs1)
            x_embs2 = dy.transpose(x_embs2)
            x_embs = dy.concatenate([x_embs1, x_embs2], d=2)
        else:
            x_embs = dy.concatenate([dy.lookup(V1, x_t, update=False) for x_t in instance_x], d=1)
            x_embs = dy.transpose(x_embs)
            x_embs = dy.reshape(x_embs, (sen_len, EMB_DIM, 1))

        y = f_props(layers, x_embs, train=False)
        pred_y.append(str(int(binary_pred(y.value()))))

    with open(OUTPUT_FILE, 'w') as f:
        f.write('\n'.join(pred_y))
Ejemplo n.º 21
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'A Neural Attention Model for Abstractive Sentence Summarization in DyNet'
    )

    parser.add_argument('--gpu',
                        type=str,
                        default='0',
                        help='GPU ID to use. For cpu, set -1 [default: `-`]')
    parser.add_argument('--n_test',
                        type=int,
                        default=189651,
                        help='Number of test examples [default: `189651`]')
    parser.add_argument('--beam_size',
                        type=int,
                        default=5,
                        help='Beam size [default: `5`]')
    parser.add_argument('--max_len',
                        type=int,
                        default=100,
                        help='Maximum length of decoding [default: `100`]')
    parser.add_argument('--model_file',
                        type=str,
                        default='./model_e1',
                        help='Trained model file path [default: `./model_e1`]')
    parser.add_argument(
        '--input_file',
        type=str,
        default='./data/valid.article.filter.txt',
        help='Test file path [default: `./data/valid.article.filter.txt`]')
    parser.add_argument('--output_file',
                        type=str,
                        default='./pred_y.txt',
                        help='Output file path [default: `./pred_y.txt`]')
    parser.add_argument('--w2i_file',
                        type=str,
                        default='./w2i.dump',
                        help='Word2Index file path [default: `./w2i.dump`]')
    parser.add_argument('--i2w_file',
                        type=str,
                        default='./i2w.dump',
                        help='Index2Word file path [default: `./i2w.dump`]')
    parser.add_argument(
        '--alloc_mem',
        type=int,
        default=1024,
        help='Amount of memory to allocate [mb] [default: `1024`]')
    args = parser.parse_args()

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    N_TEST = args.n_test
    K = args.beam_size
    MAX_LEN = args.max_len
    ALLOC_MEM = args.alloc_mem

    # File paths
    MODEL_FILE = args.model_file
    INPUT_FILE = args.input_file
    OUTPUT_FILE = args.output_file
    W2I_FILE = args.w2i_file
    I2W_FILE = args.i2w_file

    # DyNet setting
    dyparams = dy.DynetParams()
    dyparams.set_autobatch(True)
    dyparams.set_random_seed(RANDOM_STATE)
    dyparams.set_mem(ALLOC_MEM)
    dyparams.init()

    # Load trained model ==============================================================================================
    with open(W2I_FILE, 'rb') as f_w2i, open(I2W_FILE, 'rb') as f_i2w:
        w2i = pickle.load(f_w2i)
        i2w = pickle.load(f_i2w)

    test_X, _, _ = build_dataset(INPUT_FILE, w2i=w2i, n_data=N_TEST)

    model = dy.Model()
    rush_abs = dy.load(MODEL_FILE, model)[0]
    ENCODER_TYPE = rush_abs.encoder_type
    C = rush_abs.c

    # Decode
    pred_y = []
    for x in tqdm(test_X):
        dy.renew_cg()
        rush_abs.associate_parameters()

        # Initial states
        rush_abs.set_initial_states(x)

        # [accum log prob, BOS, t_c, decoded sequence]
        candidates = [[0, w2i['<s>'], [w2i['<s>']] * C, []]]

        t = 0
        while t < MAX_LEN:
            t += 1
            tmp_candidates = []
            end_flag = True
            for score_tm1, y_tm1, y_c, y_02tm1 in candidates:
                if y_tm1 == w2i['</s>']:
                    tmp_candidates.append([score_tm1, y_tm1, y_c, y_02tm1])
                else:
                    end_flag = False
                    _q_t = rush_abs(t=y_c, test=True)
                    _q_t = np.log(_q_t.npvalue())  # Log probs
                    q_t, y_t = np.sort(_q_t)[::-1][:K], np.argsort(
                        _q_t
                    )[::-1][:K]  # Pick K highest log probs and their ids
                    score_t = score_tm1 + q_t  # Accum log probs
                    tmp_candidates.extend(
                        [[score_tk, y_tk, y_c[1:] + [y_tk], y_02tm1 + [y_tk]]
                         for score_tk, y_tk in zip(score_t, y_t)])

            if end_flag:
                break
            candidates = sorted(
                tmp_candidates, key=lambda x: -x[0] / len(x[-1])
            )[:K]  # Sort in normalized score and pick K highest candidates

        # Pick the highest-scored candidate
        pred_y.append(candidates[0][-1])

    pred_y_txt = ''
    for pred in pred_y:
        pred_y_txt += ' '.join([i2w[com] for com in pred]) + '\n'

    with open(OUTPUT_FILE, 'w') as f:
        f.write(pred_y_txt)