コード例 #1
0
 def __init__(self, config):
     super().__init__()
     self.encoder_word = Encoder(config, config.src_vocab_size)
     self.encoder_char = Encoder(config, config.tgt_vocab_size)
     self.pointer = Pointer(config)
     self.attention = Luong_Attention(config)
     self.decoder = Decoder(config)
     self.linear_out = nn.Linear(config.model_size, config.tgt_vocab_size)
     self.softmax = nn.Softmax(dim=-1)
     self.s_len = config.s_len
     self.bos = config.bos
コード例 #2
0
    def __init__(self, args):
        """
        Basic initialization of Transformer.

        Arguments
        ---------
        args: <argparse.Namespace> Arguments used for overall process.
        """
        super().__init__()

        self.args = args
        self.num_stacks = self.args.num_stacks
        self.d_model = self.args.d_model
        self.vocab_size = self.args.vocab_size

        self.emb = EmbeddingLayer(self.args)

        encoders = [Encoder(self.args) for _ in range(self.num_stacks)]
        self.encoder_stack = nn.Sequential(*encoders)

        decoders = [Decoder(self.args) for _ in range(self.num_stacks)]
        self.decoder_stack = nn.ModuleList(decoders)

        self.output_linear = nn.Linear(in_features=self.d_model,
                                       out_features=self.vocab_size,
                                       bias=False)
        self.output_linear.weight = self.emb.embedding_layer.weight

        self.softmax = nn.LogSoftmax(dim=-1)
        self.dropout = nn.Dropout(p=0.1)
コード例 #3
0
 def __init__(self, config):
     super().__init__()
     self.encoder = Encoder(config, config.src_vocab_size)
     self.decoder = Decoder(config)
     self.bos = config.bos
     self.s_len = config.s_len
     self.linear_out = nn.Linear(config.model_size, config.tgt_vocab_size)
コード例 #4
0
 def __init__(self,
              vocabulary_size,
              sos_token,
              eos_token,
              pad_token,
              max_string_length=default_eda['string_max_length'],
              attention_size=default_attention['size'],
              embedding_size=default_embedding['size'],
              hidden_size=default_gru['hidden_size'],
              num_layers=default_gru['num_layers'],
              dropout=default_gru['dropout'],
              fixed_encoder=None):
     super().__init__()
     self.max_string_length = max_string_length
     self.attention_size = attention_size
     self.vocabulary_size = vocabulary_size
     if fixed_encoder:
         # Fix encoder's weights
         for p in fixed_encoder.parameters():
             p.requires_grad_(False)
         self.encoder = fixed_encoder
     else:
         self.encoder = Encoder(vocabulary_size, embedding_size,
                                hidden_size, num_layers, dropout)
     # self.decoder = Decoder(vocabulary_size)
     self.decoder = DecoderAndPointer(vocabulary_size,
                                      embedding_size,
                                      hidden_size,
                                      num_layers,
                                      dropout,
                                      attention_size,
                                      pad_token,
                                      shift_focus=True)
     self.sos_token = sos_token
     self.eos_token = eos_token
コード例 #5
0
    def __init__(self, args, text_data):

        super(ModelGumbel, self).__init__()
        self.args = args
        self.text_data = text_data
        # embedding layer
        if self.args.pre_embedding and not self.args.elmo:
            # pre_trained embeddings are 300 dimensional, trainable
            self.embedding_layer = nn.Embedding.from_pretrained(torch.Tensor(
                self.text_data.pre_trained_embedding),
                                                                freeze=False)

        elif self.args.elmo:
            self.embedding_layer = Elmo(options_file,
                                        weight_file,
                                        1,
                                        dropout=1.0 - self.args.drop_out,
                                        requires_grad=self.args.train_elmo)
        else:
            self.embedding_layer = nn.Embedding(
                num_embeddings=self.text_data.getVocabularySize(),
                embedding_dim=self.args.embedding_size)

        # first generator
        self.generator = Generator(args=self.args)

        # then encoder
        self.encoder = Encoder(args=self.args)
コード例 #6
0
ファイル: vqvae.py プロジェクト: yifr/brainqa
    def __init__(self,
                 h_dim,
                 res_h_dim,
                 n_res_layers,
                 n_embeddings,
                 embedding_dim,
                 beta,
                 restart=True):
        super(VQVAE, self).__init__()
        # encode image into continuous latent space
        self.encoder = Encoder(in_dim=256,
                               h_dim=h_dim,
                               n_res_layers=n_res_layers,
                               res_h_dim=res_h_dim)
        self.pre_quantization_conv = nn.Conv1d(h_dim,
                                               embedding_dim,
                                               kernel_size=3,
                                               stride=1,
                                               padding=1)

        # Define discretization bottleneck
        if not restart:
            self.vector_quantization = VectorQuantizer(n_embeddings,
                                                       embedding_dim, beta)
        else:
            self.vector_quantization = VectorQuantizerRandomRestart(
                n_embeddings, embedding_dim, beta)

        # decode the discrete latent representation
        self.decoder = Decoder(embedding_dim, h_dim, n_res_layers, res_h_dim)

        #E_indices used in sampling, just save last to rep last latent state
        self.e_indices = None
コード例 #7
0
    def __init__(self,
                 channels,
                 h_dim,
                 res_h_dim,
                 n_res_layers,
                 n_embeddings,
                 embedding_dim,
                 beta,
                 save_img_embedding_map=False):
        super(VQVAE, self).__init__()
        # encode image into continuous latent space
        self.encoder = Encoder(channels, h_dim, n_res_layers, res_h_dim)
        self.pre_quantization_conv = nn.Conv2d(h_dim,
                                               embedding_dim,
                                               kernel_size=1,
                                               stride=1)
        # pass continuous latent vector through discretization bottleneck
        self.vector_quantization = VectorQuantizer(n_embeddings, embedding_dim,
                                                   beta)
        # decode the discrete latent representation
        self.decoder = Decoder(channels, embedding_dim, h_dim, n_res_layers,
                               res_h_dim)

        if save_img_embedding_map:
            self.img_to_embedding_map = {i: [] for i in range(n_embeddings)}
        else:
            self.img_to_embedding_map = None
コード例 #8
0
ファイル: model.py プロジェクト: zgfatipe/Informer2020
    def __init__(self,
                 enc_in,
                 dec_in,
                 c_out,
                 seq_len,
                 label_len,
                 out_len,
                 factor=5,
                 d_model=512,
                 n_heads=8,
                 e_layers=3,
                 d_layers=2,
                 d_ff=512,
                 dropout=0.0,
                 attn='prob',
                 embed='fixed',
                 data='ETTh',
                 activation='gelu',
                 device=torch.device('cuda:0')):
        super(Informer, self).__init__()
        self.pred_len = out_len
        self.attn = attn

        # Encoding
        self.enc_embedding = DataEmbedding(enc_in, d_model, embed, data,
                                           dropout)
        self.dec_embedding = DataEmbedding(dec_in, d_model, embed, data,
                                           dropout)
        # Attention
        Attn = ProbAttention if attn == 'prob' else FullAttention
        # Encoder
        self.encoder = Encoder([
            EncoderLayer(AttentionLayer(
                Attn(False, factor, attention_dropout=dropout), d_model,
                n_heads),
                         d_model,
                         d_ff,
                         dropout=dropout,
                         activation=activation) for l in range(e_layers)
        ], [ConvLayer(d_model) for l in range(e_layers - 1)],
                               norm_layer=torch.nn.LayerNorm(d_model))
        # Decoder
        self.decoder = Decoder([
            DecoderLayer(
                AttentionLayer(
                    FullAttention(True, factor, attention_dropout=dropout),
                    d_model, n_heads),
                AttentionLayer(
                    FullAttention(False, factor, attention_dropout=dropout),
                    d_model, n_heads),
                d_model,
                d_ff,
                dropout=dropout,
                activation=activation,
            ) for l in range(d_layers)
        ],
                               norm_layer=torch.nn.LayerNorm(d_model))
        # self.end_conv1 = nn.Conv1d(in_channels=label_len+out_len, out_channels=out_len, kernel_size=1, bias=True)
        # self.end_conv2 = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=1, bias=True)
        self.projection = nn.Linear(d_model, c_out, bias=True)
コード例 #9
0
    def _build(self, images):
        embedder = Embedder()
        embedded_grads_weights = embedder.embed_all_grads_weights(
            self._placeholders)
        # Fake batching
        embedded_grads_weights = tf.expand_dims(embedded_grads_weights, 0)
        encoder = Encoder(self._source_num_way, self._target_num_way)
        encoded = encoder.encode(embedded_grads_weights)
        decoded = encoder.decode(encoded)
        # Fake batching
        decoded = tf.squeeze(decoded, [0])
        weight_updates = embedder.unembed_all_weights(decoded)

        the_list = [tf.nn.moments(w, [0]) for w in weight_updates]
        mean_means = tf.reduce_mean([tf.reduce_mean(v[0]) for v in the_list])
        mean_vars = tf.reduce_mean([tf.reduce_mean(v[1]) for v in the_list])
        tf.summary.scalar('weight_updates_mean', mean_means,
                          [META_TRAIN_COMBINED_SUMMARIES])
        tf.summary.scalar('weight_updates_var', mean_vars,
                          [META_TRAIN_COMBINED_SUMMARIES])
        # Get the updated model
        new_weights = [
            self._placeholders[0][1] + weight_updates[0],
            self._placeholders[1][1] + weight_updates[1],
            self._placeholders[2][1] + weight_updates[2],
            self._placeholders[3][1] + weight_updates[3],
            self._placeholders[4][1] + weight_updates[4]
        ]
        self.outputs = self.new_model_forward(new_weights, images)
        return self.outputs
コード例 #10
0
ファイル: builder.py プロジェクト: SaiKeshav/coordparser
    def _build_encoder(self):
        loader = self.loader
        inputs = self.inputs

        contextualized_embeddings = None
        if sum(('elmo' in inputs, 'bert-base' in inputs, 'bert-large'
                in inputs)) > 1:
            raise ValueError(
                'at most 1 contextualized emebeddings can be chosen')
        elif 'elmo' in inputs:
            contextualized_embeddings = ElmoEmbedding(usage='weighted_sum')
        elif 'bert-base' in inputs:
            contextualized_embeddings \
                = BertBaseEmbedding(usage='second_to_last')
        elif 'bert-large' in inputs:
            contextualized_embeddings \
                = BertLargeEmbedding(usage='second_to_last')

        encoder = Encoder(
            loader.get_embeddings(
                'word',
                normalize=lambda W: W / np.std(W)
                if loader.use_pretrained_embed and np.std(W) > 0. else W),
            loader.get_embeddings('pos') if 'postag' in inputs else None,
            loader.get_embeddings('char') if 'char' in inputs else None,
            contextualized_embeddings, self.char_feature_size,
            self.char_pad_id, self.char_window_size, self.char_dropout,
            self.n_lstm_layers, self.lstm_hidden_size, self.embeddings_dropout,
            self.lstm_dropout, self.recurrent_dropout, self.bert_model,
            self.bert_dir)
        return encoder
コード例 #11
0
ファイル: trainer.py プロジェクト: maozhiqiang/TTS-Cube
    def phase_3_train_encoder(params):
        from io_modules.dataset import Dataset
        from models.encoder import Encoder
        from trainers.encoder import Trainer
        trainset = Dataset("data/processed/train")
        devset = Dataset("data/processed/dev")
        sys.stdout.write('Found ' + str(len(trainset.files)) +
                         ' training files and ' + str(len(devset.files)) +
                         ' development files\n')

        character2int = {}
        for train_file in trainset.files:
            from io_modules.dataset import DatasetIO
            dio = DatasetIO()
            lab_list = dio.read_lab(train_file + ".txt")
            for entry in lab_list:
                if entry.phoneme not in character2int:
                    character2int[entry.phoneme] = len(character2int)
        sys.stdout.write('Found ' + str(len(character2int)) +
                         ' unique phonemes\n')

        f = open('data/models/encoder.chars', 'w')
        for char in character2int:
            f.write(
                char.encode('utf-8') + '\t' + str(character2int[char]) + '\n')
        f.close()

        encoder = Encoder(params, len(character2int), character2int)
        if params.resume:
            sys.stdout.write('Resuming from previous checkpoint\n')
            encoder.load('data/models/rnn_encoder')
        trainer = Trainer(encoder, trainset, devset)
        trainer.start_training(10, 1000)
コード例 #12
0
ファイル: synthesis.py プロジェクト: rinleit/TTS-Cube
def synthesize(speaker, input_file, output_file, params):
    print("[Encoding]")
    from io_modules.dataset import Dataset
    from io_modules.dataset import Encodings
    from models.encoder import Encoder
    from trainers.encoder import Trainer
    encodings = Encodings()
    encodings.load('data/models/encoder.encodings')
    encoder = Encoder(params, encodings, runtime=True)
    encoder.load('data/models/rnn_encoder')

    seq = create_lab_input(input_file, speaker)
    mgc, att = encoder.generate(seq)
    _render_spectrogram(mgc, output_file + '.png')

    print("[Vocoding]")
    from models.vocoder import Vocoder
    from trainers.vocoder import Trainer
    vocoder = Vocoder(params, runtime=True)
    vocoder.load('data/models/rnn_vocoder')

    import time
    start = time.time()
    signal = vocoder.synthesize(mgc,
                                batch_size=1000,
                                temperature=params.temperature)
    stop = time.time()
    sys.stdout.write(" execution time=" + str(stop - start))
    sys.stdout.write('\n')
    sys.stdout.flush()
    from io_modules.dataset import DatasetIO
    dio = DatasetIO()
    enc = dio.b16_dec(signal, discreete=True)
    dio.write_wave(output_file, enc, params.target_sample_rate)
コード例 #13
0
ファイル: preprocess.py プロジェクト: Alqatf/BUTD_model
def extract_imgs_feat():
    encoder = Encoder(opt.resnet101_file)
    encoder.to(opt.device)
    encoder.eval()

    imgs = os.listdir(opt.imgs_dir)
    imgs.sort()

    if not os.path.exists(opt.out_feats_dir):
        os.makedirs(opt.out_feats_dir)
    with h5py.File(os.path.join(opt.out_feats_dir, '%s_fc.h5' % opt.dataset_name)) as file_fc, \
            h5py.File(os.path.join(opt.out_feats_dir, '%s_att.h5' % opt.dataset_name)) as file_att:
        try:
            for img_nm in tqdm.tqdm(imgs, ncols=100):
                img = skimage.io.imread(os.path.join(opt.imgs_dir, img_nm))
                with torch.no_grad():
                    img = encoder.preprocess(img)
                    img = img.to(opt.device)
                    img_fc, img_att = encoder(img)
                file_fc.create_dataset(img_nm,
                                       data=img_fc.cpu().float().numpy())
                file_att.create_dataset(img_nm,
                                        data=img_att.cpu().float().numpy())
        except BaseException as e:
            file_fc.close()
            file_att.close()
            print(
                '--------------------------------------------------------------------'
            )
            raise e
コード例 #14
0
def main(model_filename, pitch_model_filename, output_dir, batch_size):
    model = torch.nn.Module()
    model.add_module('encoder', Encoder(**encoder_config))
    model.add_module('generator',
                     Generator(sum(encoder_config['n_out_channels'])))
    model = load_checkpoint(model_filename, model).cuda()
    model.eval()

    if os.path.isfile(pitch_model_filename):
        global pitch_model, use_predicted_pitch
        use_predicted_pitch = True
        pitch_model = PitchModel(**pitch_config)
        pitch_model = load_checkpoint(pitch_model_filename, pitch_model).cuda()
        pitch_model.eval()

    testset = TestSet(**(data_config))
    cond, name = testset[0]
    for files in chunker(testset, batch_size):
        files = list(zip(*files))
        cond_input, file_paths = files[:-1], files[-1]
        cond_input = [
            utils.to_gpu(torch.from_numpy(np.stack(x))).float()
            for x in cond_input
        ]

        #cond_input = model.encoder(cond_input.transpose(1, 2)).transpose(1, 2)
        cond_input = model.encoder(cond_input[0])
        audio = model.generator(cond_input)

        for i, file_path in enumerate(file_paths):
            print("writing {}".format(file_path))
            wav = audio[i].cpu().squeeze().detach().numpy() * 32768.0
            write("{}/{}.wav".format(output_dir, file_path),
                  data_config['sampling_rate'], wav.astype(np.int16))
コード例 #15
0
    def test_train_method(self):
        file_name = 'test/test_data/attention_test.txt'
        fine_tune_model_name = '../models/glove_model_40.pth'
        self.test_data_loader_attention = DataLoaderAttention(
            file_name=file_name)
        self.test_data_loader_attention.load_data()
        source2index, index2source, target2index, index2target, train_data = \
            self.test_data_loader_attention.load_data()
        EMBEDDING_SIZE = 50
        HIDDEN_SIZE = 32

        encoder = Encoder(len(source2index), EMBEDDING_SIZE, HIDDEN_SIZE, 3,
                          True)
        decoder = Decoder(len(target2index), EMBEDDING_SIZE, HIDDEN_SIZE * 2)

        self.trainer = Trainer(fine_tune_model=fine_tune_model_name)

        self.trainer.train_attention(
            train_data=train_data,
            source2index=source2index,
            target2index=target2index,
            index2source=index2source,
            index2target=index2target,
            encoder_model=encoder,
            decoder_model=decoder,
        )
コード例 #16
0
def detect(path, encoder=None, decoder=None):
    torch.backends.cudnn.benchmark = True

    dataset = LoadImages(path, img_size=config.IMAGE_SIZE, used_layers=config.USED_LAYERS)

    if not encoder or not decoder:
        in_channels = num_channels(config.USED_LAYERS)
        encoder = Encoder(in_channels=in_channels)
        decoder = Decoder(num_classes=config.NUM_CLASSES+1)
        encoder = encoder.to(config.DEVICE)
        decoder = decoder.to(config.DEVICE)

        _, encoder, decoder = load_checkpoint(encoder, decoder, config.CHECKPOINT_FILE, config.DEVICE)

    encoder.eval()
    decoder.eval()

    for _, layers, path in dataset:
        with torch.no_grad():
            layers = torch.from_numpy(layers).to(config.DEVICE, non_blocking=True)
            if layers.ndimension() == 3:
                layers = layers.unsqueeze(0)

            features = encoder(layers)
            predictions = decoder(features)
            _, out = predictions, predictions.sigmoid()

            plot_volumes(to_volume(out, config.VOXEL_THRESH).cpu(), [path], config.NAMES)
コード例 #17
0
 def __init__(self, d_model, d_ff, d_K, d_V, n_heads, n_layers,
              sourceVocabSize, sourceLength, targetVocabSize, targetLength):
     super(Transformer, self).__init__()
     self.encoder = Encoder(sourceVocabSize, sourceLength, d_model, d_ff,
                            d_K, d_V, n_heads, n_layers)
     self.decoder = Decoder(targetVocabSize, targetLength, d_model, d_ff,
                            d_K, d_V, n_heads, n_layers)
     self.projection = nn.Linear(d_model, targetVocabSize, bias=False)
コード例 #18
0
ファイル: model.py プロジェクト: toandaominh1997/OCR
 def __init__(self, num_classes, fixed_height = 48, net='efficientnet'):
     super(Model, self).__init__()
     self.encoder = Encoder(net = net)
     self.decoder = Decoder(input_dim=int(fixed_height * 288 / 8), num_class=num_classes)
     self.crnn = nn.Sequential(
         self.encoder,
         self.decoder
     )
     self.log_softmax = nn.LogSoftmax(dim=2)
コード例 #19
0
ファイル: concat.py プロジェクト: raineydavid/Kaggle-CV
    def __init__(self, que_dim: int, que_input_embs: list,
                 que_output_embs: list, pro_dim: int, pro_input_embs: list,
                 pro_output_embs: list, inter_dim: int, output_dim: int):
        super().__init__()

        self.que_model = Encoder(que_dim, inter_dim, output_dim,
                                 que_input_embs, que_output_embs)
        self.pro_model = Encoder(pro_dim, inter_dim, output_dim,
                                 pro_input_embs, pro_output_embs)

        self.merged = Concatenate()(
            [self.que_model.outputs[0], self.pro_model.outputs[0]])

        self.inter = Dense(16, activation='tanh')(self.merged)
        self.outputs = Dense(1, activation='sigmoid')(self.inter)

        super().__init__([self.que_model.inputs[0], self.pro_model.inputs[0]],
                         self.outputs)
コード例 #20
0
 def test_forward(self):
     encoder = Encoder(self.input_size,
                       self.hidden_size,
                       self.num_layers,
                       bidirectional=self.bidirectional,
                       rnn_type=self.rnn_type)
     output, hidden = encoder(self.padded_input, self.input_lengths)
     self.assertTrue(output.size(),
                     torch.Size([self.N, self.T, self.hidden_size]))
コード例 #21
0
def evaluate_hand_draw_net(cfg):
    # Enable the inbuilt cudnn auto-tuner to find the best algorithm to use
    torch.backends.cudnn.benchmark = True

    IMG_SIZE = cfg.CONST.IMG_H, cfg.CONST.IMG_W
    CROP_SIZE = cfg.CONST.CROP_IMG_H, cfg.CONST.CROP_IMG_W

    eval_transforms = utils.data_transforms.Compose([
        utils.data_transforms.CenterCrop(IMG_SIZE, CROP_SIZE),
        utils.data_transforms.RandomBackground(cfg.TEST.RANDOM_BG_COLOR_RANGE),
        utils.data_transforms.Normalize(mean=cfg.DATASET.MEAN,
                                        std=cfg.DATASET.STD),
        utils.data_transforms.ToTensor(),
    ])

    # Set up networks
    encoder = Encoder(cfg)
    decoder = Decoder(cfg)
    azi_classes, ele_classes = int(360 / cfg.CONST.BIN_SIZE), int(
        180 / cfg.CONST.BIN_SIZE)
    view_estimater = ViewEstimater(cfg,
                                   azi_classes=azi_classes,
                                   ele_classes=ele_classes)

    if torch.cuda.is_available():
        encoder = torch.nn.DataParallel(encoder).cuda()
        decoder = torch.nn.DataParallel(decoder).cuda()
        view_estimater = torch.nn.DataParallel(view_estimater).cuda()

    # Load weight
    # Load weight for encoder, decoder
    print('[INFO] %s Loading reconstruction weights from %s ...' %
          (dt.now(), cfg.EVALUATE_HAND_DRAW.RECONSTRUCTION_WEIGHTS))
    rec_checkpoint = torch.load(cfg.EVALUATE_HAND_DRAW.RECONSTRUCTION_WEIGHTS)
    encoder.load_state_dict(rec_checkpoint['encoder_state_dict'])
    decoder.load_state_dict(rec_checkpoint['decoder_state_dict'])
    print('[INFO] Best reconstruction result at epoch %d ...' %
          rec_checkpoint['epoch_idx'])

    # Load weight for view estimater
    print('[INFO] %s Loading view estimation weights from %s ...' %
          (dt.now(), cfg.EVALUATE_HAND_DRAW.VIEW_ESTIMATION_WEIGHTS))
    view_checkpoint = torch.load(
        cfg.EVALUATE_HAND_DRAW.VIEW_ESTIMATION_WEIGHTS)
    view_estimater.load_state_dict(
        view_checkpoint['view_estimator_state_dict'])
    print('[INFO] Best view estimation result at epoch %d ...' %
          view_checkpoint['epoch_idx'])

    for img_path in os.listdir(cfg.EVALUATE_HAND_DRAW.INPUT_IMAGE_FOLDER):
        eval_id = int(img_path[:-4])
        input_img_path = os.path.join(
            cfg.EVALUATE_HAND_DRAW.INPUT_IMAGE_FOLDER, img_path)
        print(input_img_path)
        evaluate_hand_draw_img(cfg, encoder, decoder, view_estimater,
                               input_img_path, eval_transforms, eval_id)
コード例 #22
0
 def __init__(self, h_dim, res_h_dim, n_res_layers, embedding_dim,
              n_dimension_changes):
     super(E2EEncoder, self).__init__()
     # encode image into continuous latent space
     self.encoder = Encoder(3, h_dim, n_res_layers, res_h_dim,
                            n_dimension_changes)
     self.pre_quantization_conv = nn.Conv2d(h_dim,
                                            embedding_dim,
                                            kernel_size=1,
                                            stride=1)
コード例 #23
0
def main():
    parser = argparse.ArgumentParser(description="Training attention model")

    parser.add_argument(
        "-t",
        "--train_data",
        metavar="train_data",
        type=str,
        default='../data/processed/source_replay_twitter_data.txt',
        dest="train_data",
        help="set the training data ")
    parser.add_argument("-e",
                        "--embedding_size",
                        metavar="embedding_size",
                        type=int,
                        default=50,
                        dest="embedding_size",
                        help="set the embedding size ")
    parser.add_argument("-H",
                        "--hidden_size",
                        metavar="hidden_size",
                        type=int,
                        default=512,
                        dest="hidden_size",
                        help="set the hidden size ")
    parser.add_argument("-f",
                        "--fine_tune_model_name",
                        metavar="fine_tune_model_name",
                        type=str,
                        default='../models/glove_wiki/glove_model_40.pth',
                        dest="fine_tune_model_name",
                        help="set the fine tune model name ")
    args = parser.parse_args()

    data_loader_attention = DataLoaderAttention(file_name=args.train_data)
    data_loader_attention.load_data()
    source2index, index2source, target2index, index2target, train_data = \
        data_loader_attention.load_data()
    EMBEDDING_SIZE = args.embedding_size
    HIDDEN_SIZE = args.hidden_size

    encoder = Encoder(len(source2index), EMBEDDING_SIZE, HIDDEN_SIZE, 3, True)
    decoder = Decoder(len(target2index), EMBEDDING_SIZE, HIDDEN_SIZE * 2)

    trainer = Trainer(epoch=600,
                      batch_size=64,
                      fine_tune_model=args.fine_tune_model_name)

    trainer.train_attention(train_data=train_data,
                            source2index=source2index,
                            target2index=target2index,
                            index2source=index2source,
                            index2target=index2target,
                            encoder_model=encoder,
                            decoder_model=decoder)
コード例 #24
0
    def __init__(self, faqdataset, embedding_layer, args):
        '''
            INFObot/qbot

            Uses an encoder network for input sequences (questions, answers and
            history) and a decoder network for generating a response (question).
        '''
        super(INFOBOT, self).__init__()

        self.args = args
        self.tagw2i = faqdataset.tagw2i
        self.tagi2w = faqdataset.tagi2w

        self.tagw2i['STOP'] = len(self.tagw2i)
        self.tagi2w.append('STOP')

        self.faqpool = faqdataset.faqlist
        self.faqnum = len(faqdataset)
        self.actiondim = len(
            self.tagw2i)  #The last action is 'STOP guessing'/'provide faq'

        self.statedim = 300  #args.embedding_dim

        self.hidden_size = self.statedim

        self.state_encoder = Encoder(embedding_layer, args)

        self.policynet = DQN(self.state_encoder, self.statedim, self.actiondim)
        self.targetnet = DQN(self.state_encoder, self.statedim, self.actiondim)

        if args.sharing_encoder:
            self.faq_encoder = self.state_encoder  #
            print('The faq embedding and state encoding are shared')
        else:
            self.faq_encoder = Encoder(embedding_layer, args)

        self.faqguessed = 0  #make a change here
        self.steps = 0
        print('action size: {}'.format(self.actiondim))
        print('Infobot initialized: {} {}'.format(self.faqnum,
                                                  self.faqpool[1]))
        '''
コード例 #25
0
ファイル: synthesis.py プロジェクト: tiberiu44/TTS-Cube
def load_encoder(params, base_path='data/models'):
    from io_modules.dataset import Encodings
    from models.encoder import Encoder

    encodings = Encodings()
    encodings.load('%s/encoder.encodings' % base_path)

    encoder = Encoder(params, encodings, runtime=True)
    encoder.load('%s/rnn_encoder' % base_path)

    return encoder
コード例 #26
0
def train():
    torch.backends.cudnn.benchmark = True

    _, dataloader = create_dataloader(config.IMG_DIR + "/train", config.MESH_DIR + "/train",
                                            batch_size=config.BATCH_SIZE, used_layers=config.USED_LAYERS,
                                            img_size=config.IMAGE_SIZE, map_size=config.MAP_SIZE,
                                            augment=config.AUGMENT, workers=config.NUM_WORKERS,
                                            pin_memory=config.PIN_MEMORY, shuffle=True)

    in_channels = num_channels(config.USED_LAYERS)
    encoder = Encoder(in_channels=in_channels)
    decoder = Decoder(num_classes=config.NUM_CLASSES+1)
    encoder.apply(init_weights)
    decoder.apply(init_weights)
    encoder_solver = torch.optim.Adam(filter(lambda p: p.requires_grad, encoder.parameters()),
                                      lr=config.ENCODER_LEARNING_RATE,
                                      betas=config.BETAS)
    decoder_solver = torch.optim.Adam(decoder.parameters(),
                                      lr=config.DECODER_LEARNING_RATE,
                                      betas=config.BETAS)
    encoder_lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(encoder_solver,
                                                                milestones=config.ENCODER_LR_MILESTONES,
                                                                gamma=config.GAMMA)
    decoder_lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(decoder_solver,
                                                                milestones=config.DECODER_LR_MILESTONES,
                                                                gamma=config.GAMMA)
    encoder = encoder.to(config.DEVICE)
    decoder = decoder.to(config.DEVICE)

    loss_fn = LossFunction()

    init_epoch = 0
    if config.CHECKPOINT_FILE and config.LOAD_MODEL:
        init_epoch, encoder, decoder = load_checkpoint(encoder, decoder, config.CHECKPOINT_FILE, config.DEVICE)

    output_dir = os.path.join(config.OUT_PATH, re.sub("[^0-9a-zA-Z]+", "-", dt.now().isoformat()))

    for epoch_idx in range(init_epoch, config.NUM_EPOCHS):
        encoder.train()
        decoder.train()
        train_one_epoch(encoder, decoder, dataloader, loss_fn, encoder_solver, decoder_solver, epoch_idx)
        encoder_lr_scheduler.step()
        decoder_lr_scheduler.step()

        if config.TEST:
            test(encoder, decoder)
        if config.SAVE_MODEL:
            save_checkpoint(epoch_idx, encoder, decoder, output_dir)

    if not config.TEST:
        test(encoder, decoder)
    if not config.SAVE_MODEL:
        save_checkpoint(config.NUM_EPOCHS - 1, encoder, decoder, output_dir)
コード例 #27
0
ファイル: distance.py プロジェクト: raineydavid/Kaggle-CV
    def __init__(self, que_dim: int, que_input_embs: list,
                 que_output_embs: list, pro_dim: int, pro_input_embs: list,
                 pro_output_embs: list, inter_dim: int, output_dim: int):
        """
        :param que_dim: dimension of question's raw feature vector
        :param que_input_embs: number of unique classes in question's categorical features
        :param que_output_embs: embedding dimensions of question's categorical features
        :param pro_dim: dimension of professional's raw feature vector
        :param pro_input_embs: number of unique classes in professional's categorical features
        :param pro_output_embs: embedding dimensions of professional's categorical features
        :param inter_dim: dimension of Encoder's intermediate layer
        :param output_dim: dimension of high-level feature vectors
        """
        super().__init__()

        # build an Encoder model for questions
        self.que_model = Encoder(que_dim,
                                 inter_dim,
                                 output_dim,
                                 que_input_embs,
                                 que_output_embs,
                                 reg=2.0)
        # same for professionals
        self.pro_model = Encoder(pro_dim,
                                 inter_dim,
                                 output_dim,
                                 pro_input_embs,
                                 pro_output_embs,
                                 reg=0.2)

        # calculate distance between high-level feature vectors
        self.merged = Lambda(
            lambda x: tf.reduce_sum(tf.square(x[0] - x[1]), axis=-1))(
                [self.que_model.outputs[0], self.pro_model.outputs[0]])
        # and apply activation - e^-x here, actually
        self.outputs = Lambda(lambda x: tf.reshape(tf.exp(-self.merged),
                                                   (-1, 1)))(self.merged)

        super().__init__([self.que_model.inputs[0], self.pro_model.inputs[0]],
                         self.outputs)
コード例 #28
0
    def phase_3_train_encoder(params):
        from io_modules.dataset import Dataset
        from io_modules.dataset import Encodings
        from models.encoder import Encoder
        from trainers.encoder import Trainer
        trainset = Dataset("data/processed/train")
        devset = Dataset("data/processed/dev")
        sys.stdout.write('Found ' + str(len(trainset.files)) +
                         ' training files and ' + str(len(devset.files)) +
                         ' development files\n')

        encodings = Encodings()
        count = 0
        if not params.resume:
            for train_file in trainset.files:
                count += 1
                if count % 100 == 0:
                    sys.stdout.write('\r' + str(count) + '/' +
                                     str(len(trainset.files)) +
                                     ' processed files')
                    sys.stdout.flush()
                from io_modules.dataset import DatasetIO
                dio = DatasetIO()
                lab_list = dio.read_lab(train_file + ".lab")
                for entry in lab_list:
                    encodings.update(entry)
            sys.stdout.write('\r' + str(count) + '/' +
                             str(len(trainset.files)) + ' processed files\n')
            sys.stdout.write('Found ' + str(len(encodings.char2int)) +
                             ' unique symbols, ' +
                             str(len(encodings.context2int)) +
                             ' unique features and ' +
                             str(len(encodings.speaker2int)) +
                             ' unique speakers\n')
            encodings.store('data/models/encoder.encodings')
        else:
            encodings.load('data/models/encoder.encodings')
        if params.resume:
            runtime = True  # avoid ortonormal initialization
        else:
            runtime = False
        encoder = Encoder(params, encodings, runtime=runtime)
        if params.resume:
            sys.stdout.write('Resuming from previous checkpoint\n')
            encoder.load('data/models/rnn_encoder')
        if params.no_guided_attention:
            sys.stdout.write('Disabling guided attention\n')
        if params.no_bounds:
            sys.stdout.write(
                'Using internal stopping condition for synthesis\n')
        trainer = Trainer(encoder, trainset, devset)
        trainer.start_training(10, 1000, params)
コード例 #29
0
 def __init__(self, vocabulary_size, sos_token, eos_token, pad_token,
              max_string_length=default_eda['string_max_length'], attention_size=default_attention['size'],
              embedding_size=default_embedding['size'], hidden_size=default_gru['hidden_size'],
              num_layers=default_gru['num_layers'], dropout=default_gru['dropout']):
     super().__init__()
     self.max_string_length = max_string_length
     self.attention_size = attention_size
     self.vocabulary_size = vocabulary_size
     self.encoder = Encoder(vocabulary_size, embedding_size, hidden_size, num_layers, dropout)
     self.decoder = Decoder(vocabulary_size, embedding_size, hidden_size, num_layers, dropout, attention_size,
                            pad_token)
     self.sos_token = sos_token
     self.eos_token = eos_token
コード例 #30
0
    def __init__(self, config, device):
        super(MedicalFSS, self).__init__()
        self.config = config
        resize_dim = self.config['input_size']
        self.encoded_h = int(resize_dim[0] / 2**self.config['n_pool'])
        self.encoded_w = int(resize_dim[1] / 2**self.config['n_pool'])

        self.encoder = Encoder(self.config['path']['init_path'],
                               device)  # .to(device)
        self.decoder = Decoder(input_res=(self.encoded_h, self.encoded_w),
                               output_res=resize_dim).to(device)
        self.q_slice_n = self.config['q_slice']
        self.ch = 256  # number of channels of embedding vector