def __init__(self, root): self.model = Decoder() self.view = MainWindow(root) # устанавливаем обработчики событий на нажатия кнопок self.view.decode_button.config(command=self.decode_qr) self.view.search_qr_button.config(command=self.search_qr)
class EncoderDecoder(nn.Module): def __init__(self, embedding_size, hidden_size, vocab_size, gru_layers): super(EncoderDecoder, self).__init__() self.encoder = Encoder(embedding_size) self.decoder = Decoder(embedding_size, hidden_size, vocab_size, gru_layers) def forward(self, images, captions): features = self.encoder(images) outputs = self.decoder(features, captions) return outputs def caption_image(self, image, vocabulary, max_length=50): result_caption = [] with torch.no_grad(): x = self.encoder(image).unsqueeze(0) states = None for _ in range(max_length): hiddens, states = self.decoder.gru(x, states) output = self.decoder.linear(hiddens.squeeze(0)) predicted = output.argmax(1) item = predicted.item() result_caption.append(item) x = self.decoder.embed(predicted).unsqueeze(0) if vocabulary.itos[item] == "<end>": break return [vocabulary.itos[idx] for idx in result_caption]
class Controller: def __init__(self, root): self.model = Decoder() self.view = MainWindow(root) # устанавливаем обработчики событий на нажатия кнопок self.view.decode_button.config(command=self.decode_qr) self.view.search_qr_button.config(command=self.search_qr) def decode_qr(self): image_file = filedialog.askopenfilename( initialdir='/', title="Select file", filetypes=(("all files", "*.*"), ("jpeg files", "*.jpg"), ("png files", "*.png"), ("gif files", "*.gif"), ("bmp files", "*.bmp"))) if image_file != '': try: image = Image.open(image_file).convert('RGB') if image.width != 400 and image.height != 400: image = image.resize((400, 400)) output_image = ImageTk.PhotoImage(image) self.view.img = output_image self.view.image_uploaded = image self.view.image_label.config(image=output_image) decoded_list = self.model.decodeImage(image) self.view.codes_list.delete(0, END) if len(decoded_list) == 0: self.view.codes_list.insert(0, "QR коды не найдены!") else: for qr_data in decoded_list: string_data = qr_data.data.decode('utf-8') self.view.codes_list.insert(0, string_data) except OSError: messagebox.showerror('Ошибка', 'Невозможно открыть файл') def search_qr(self): output_image = ImageTk.PhotoImage( self.model.search_qr_code(self.view.image_uploaded)) self.view.img = output_image self.view.image_label.config(image=output_image)
def __init__(self): super(UNet, self).__init__() self.enc1 = Encoder(3, 128) self.enc2 = Encoder(128, 256) self.enc3 = Encoder(256, 512) self.enc4 = Encoder(512, 1024) self.conv1 = nn.Conv2d(1024, 2048, 3, padding=1) self.conv2 = nn.Conv2d(2048, 2048, 3, padding=1) self.t_conv = nn.ConvTranspose2d(2048, 1024, 2, stride=2) self.dec4 = Decoder(2048, 1024) self.dec3 = Decoder(1024, 512) self.dec2 = Decoder(512, 256) self.dec1 = Decoder(256, 128, is_final=True)
def load(): global session global graph global model global data_result data_result = DataResult(None, None) with open(script_dir + '/../temp/processed_data.json', 'r') as output: json_data = json.load(output) data_result.loadJSON(json_data) graph = Graph() with graph.as_default(): session = Session(graph=graph) with session.as_default(): temp_encoder = Encoder(data_result.input_data) temp_decoder = Decoder(data_result.output_data, temp_encoder) temp_model = Model([temp_encoder.inputs, temp_decoder.inputs], temp_decoder.outputs) temp_model.compile(optimizer='rmsprop', loss='categorical_crossentropy') temp_model.load_weights( os.path.dirname(__file__) + '/../model_weights.h5') model = temp_model
def __init__(self, dictionary_size=100, embedding_dim=1100, rnn_hidden_size=600, rnn_num_layers=2, z_dim=1100): #Does embedding_dim should be the same as z_dim? super(ParaphraseModel, self).__init__() self.embedding = nn.Embedding( dictionary_size, embedding_dim) #should be replaced in word embedding like word2vec self.encoder = Encoder(embedding_dim, rnn_hidden_size, rnn_num_layers, z_dim) self.decoder = Decoder(embedding_dim, rnn_hidden_size, rnn_num_layers, dictionary_size) self.cel = nn.CrossEntropyLoss(ignore_index=-1) #cross entrpoy self.dictionary_size = dictionary_size self.embedding_dim = embedding_dim
def __init__(self, mc): # d_input: int, # d_model: int, # d_output: int, # q: int, # v: int, # h: int, # N: int, # attention_size: int = None, # dropout: float = 0.3, # chunk_mode: str = 'chunk', # pe: str = None, # pe_period: int = 24):a """Create transformer structure from Encoder and Decoder blocks.""" super().__init__() self.mc = mc self._d_model = mc.d_model self.layers_encoding = nn.ModuleList([ Encoder(mc.d_model, mc.q, mc.v, mc.h, attention_size=mc.attention_size, dropout=mc.dropout, chunk_mode=mc.chunk_mode) for _ in range(mc.N) ]) self.layers_decoding = nn.ModuleList([ Decoder(mc.d_model, mc.q, mc.v, mc.h, attention_size=mc.attention_size, dropout=mc.dropout, chunk_mode=mc.chunk_mode) for _ in range(mc.N) ]) self._embedding = nn.Linear(mc.d_input, mc.d_model) self._linear = nn.Linear(mc.d_model, mc.d_output) pe_functions = { 'original': generate_original_PE, 'regular': generate_regular_PE, } if mc.pe in pe_functions.keys(): self._generate_PE = pe_functions[mc.pe] self._pe_period = mc.pe_period elif mc.pe is None: self._generate_PE = None else: raise NameError( f'PE "{mc.pe}" not understood. Must be one of {", ".join(pe_functions.keys())} or None.' ) self.name = 'transformer'
def __init__(self, ch_vocab_size, **kwargs): super(Transformer, self).__init__(**kwargs) # here no need Encoder because the pre-trained word-embedding by bert has done this work self.decoder = Decoder(ch_vocab_size) with self.name_scope(): # make the input english word embedding unit to the model_dim self.en_input_dense = nn.Dense(ghp.model_dim, use_bias=False, flatten=False) # make the output pred dim to the size of chinese vocab_size self.linear = nn.Dense(ch_vocab_size, use_bias=False, flatten=False)
def __init__(self, data_result, model=None): self.data_result = data_result if (model == None): temp_encoder = Encoder(self.data_result.input_data) temp_decoder = Decoder(self.data_result.output_data, temp_encoder) temp_model = Model([temp_encoder.inputs, temp_decoder.inputs], temp_decoder.outputs) temp_model.compile(optimizer='rmsprop', loss='categorical_crossentropy') temp_model.load_weights( os.path.dirname(__file__) + '/../model_weights.h5') self.model = temp_model else: self.model = model self.input_token_index = dict([ (char, i) for i, char in enumerate(self.data_result.input_data.chars) ]) self.target_token_index = dict([ (char, i) for i, char in enumerate(self.data_result.output_data.chars) ]) self.encoder_inputs = self.model.input[0] # input_1 self.encoder_outputs, state_h_enc, state_c_enc = self.model.layers[ 2].output # lstm_1 self.encoder_states = [state_h_enc, state_c_enc] self.encoder_model = Model(self.encoder_inputs, self.encoder_states) self.decoder_inputs = self.model.input[1] # input_2 self.decoder_state_input_h = Input(shape=(UNIT_SIZE, ), name='input_3') self.decoder_state_input_c = Input(shape=(UNIT_SIZE, ), name='input_4') self.decoder_states_inputs = [ self.decoder_state_input_h, self.decoder_state_input_c ] self.decoder_lstm = self.model.layers[3] self.decoder_outputs, self.state_h_dec, self.state_c_dec = self.decoder_lstm( self.decoder_inputs, initial_state=self.decoder_states_inputs) self.decoder_states = [self.state_h_dec, self.state_c_dec] self.decoder_dense = self.model.layers[4] self.decoder_outputs = self.decoder_dense(self.decoder_outputs) self.decoder_model = Model( [self.decoder_inputs] + self.decoder_states_inputs, [self.decoder_outputs] + self.decoder_states) # Reverse-lookup token index to decode sequences back to # something readable. self.reverse_input_char_index = dict( (i, char) for char, i in self.input_token_index.items()) self.reverse_target_char_index = dict( (i, char) for char, i in self.target_token_index.items())
def __init__(self, args, device): super().__init__() self.args = args self.model_infos = args.model_infos self.latent_dim = self.model_infos["encoder_dims"][-1] self.latent_sample_dim = self.model_infos["latent_dim"] self.encoder = Encoder(self.args) self.decoder = Decoder(self.args) self.optimizer = optim.Adam(self.parameters(), lr=self.model_infos["learning_rate"]) self.device = device self.mse_loss = nn.MSELoss()
train_targets_vocab, args.test_input_path, None, shuffle=False, batch_size=args.batch_size, device=args.device, is_train=False) ############################### # get models ############################### encoder = Encoder(train_loader.train_inputs_vocab.word_counts, args.encoder_embedded_size, args.encoder_hidden_size).to(args.device) decoder = Decoder(train_loader.train_targets_vocab.word_counts, args.decoder_embedded_size, args.decoder_hidden_size, train_loader.SOS_IDX, train_loader.EOS_IDX, args.teacher_forcing_ratio, args.device).to(args.device) seq2seq = Seq2Seq(encoder, decoder, args.device) ############################### # get optimizer ############################### optimizer = torch.optim.Adam(seq2seq.parameters(), lr=args.learning_rate) ############################### # check direcotories exist ############################### os.makedirs(args.save_dir_path, exist_ok=True) def main():
def __init__(self, embedding_size, hidden_size, vocab_size, gru_layers): super(EncoderDecoder, self).__init__() self.encoder = Encoder(embedding_size) self.decoder = Decoder(embedding_size, hidden_size, vocab_size, gru_layers)
torch.cuda.empty_cache() print(f'Working on {str(device).upper()}') '''Initializations''' # Initialize and load dataset train_loader, valid_loader = initialize_data(args) # Initialize models encoder = Encoder(num_nodes=args.num_nodes, node_size=args.inputNodeSize, latent_node_size=args.latentNodeSize, num_hidden_node_layers=args.num_hiddenNodeLayers, hidden_edge_size=args.hiddenEdgeSize, output_edge_size=args.outputEdgeSize, num_mps=args.num_mps, dropout=args.dropout, alpha=args.alpha, intensity=args.intensity, batch_norm=args.batch_norm, device=device).to(device) decoder = Decoder(num_nodes=args.num_nodes, node_size=args.inputNodeSize, latent_node_size=args.latentNodeSize, num_hidden_node_layers=args.num_hiddenNodeLayers, hidden_edge_size=args.hiddenEdgeSize, output_edge_size=args.outputEdgeSize, num_mps=args.num_mps, dropout=args.dropout, alpha=args.alpha, intensity=args.intensity, batch_norm=args.batch_norm, device=device).to(device) # Both on gpu if (next(encoder.parameters()).is_cuda and next(encoder.parameters()).is_cuda): print('The models are initialized on GPU...') # One on cpu and the other on gpu elif (next(encoder.parameters()).is_cuda or next(encoder.parameters()).is_cuda): raise AssertionError("The encoder and decoder are not trained on the same device!") # Both on cpu else: print('The models are initialized on CPU...') print(f'Training over {args.num_epochs} epochs...') '''Training'''
with open(script_dir + './temp/processed_data.json', 'r') as output: json_data = json.load(output) data_result.loadJSON(json_data) print('END: loading_data') print('') # Create the encoder print('STARTING: create encoder') encoder = Encoder(data_result.input_data) print('END: create encoder') print('') # Create the decoder print('STARTING: create decoder') decoder = Decoder(data_result.output_data, encoder) print('STARTING: create decoder') print('') # Create the model print('STARTING: create model') model = Model([encoder.inputs, decoder.inputs], decoder.outputs) print('END: create model') print('') # Compile the model print('STARTING: compile model') model.compile(optimizer='rmsprop', loss='categorical_crossentropy') print('END: compile model') print('')
def main(): global char2index global index2char global SOS_token global EOS_token global PAD_token parser = argparse.ArgumentParser(description='Speech hackathon Baseline') parser.add_argument('--hidden_size', type=int, default=512, help='hidden size of model (default: 256)') parser.add_argument('--layer_size', type=int, default=3, help='number of layers of model (default: 3)') parser.add_argument('--dropout', type=float, default=0.2, help='dropout rate in training (default: 0.2)') parser.add_argument('--bidirectional', action='store_true', help='use bidirectional RNN for encoder (default: False)') parser.add_argument('--use_attention', action='store_true', help='use attention between encoder-decoder (default: False)') parser.add_argument('--batch_size', type=int, default=32, help='batch size in training (default: 32)') parser.add_argument('--workers', type=int, default=4, help='number of workers in dataset loader (default: 4)') parser.add_argument('--max_epochs', type=int, default=10, help='number of max epochs in training (default: 10)') parser.add_argument('--lr', type=float, default=1e-04, help='learning rate (default: 0.0001)') parser.add_argument('--teacher_forcing', type=float, default=0.5, help='teacher forcing ratio in decoder (default: 0.5)') parser.add_argument('--max_len', type=int, default=80, help='maximum characters of sentence (default: 80)') parser.add_argument('--no_cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') parser.add_argument('--save_name', type=str, default='model', help='the name of model in nsml or local') parser.add_argument('--mode', type=str, default='train') parser.add_argument("--pause", type=int, default=0) parser.add_argument("--visdom", type=bool, default=False) # Encoder parser.add_argument('--d_input', default=80, type=int, help='Dim of encoder input (before LFR)') parser.add_argument('--n_layers_enc', default=6, type=int, help='Number of encoder stacks') parser.add_argument('--n_head', default=8, type=int, help='Number of Multi Head Attention (MHA)') parser.add_argument('--d_k', default=64, type=int, help='Dimension of key') parser.add_argument('--d_v', default=64, type=int, help='Dimension of value') parser.add_argument('--d_model', default=512, type=int, help='Dimension of model') parser.add_argument('--d_inner', default=2048, type=int, help='Dimension of inner') parser.add_argument('--dropout', default=0.1, type=float, help='Dropout rate') parser.add_argument('--pe_maxlen', default=5000, type=int, help='Positional Encoding max len') # Decoder parser.add_argument('--d_word_vec', default=512, type=int, help='Dim of decoder embedding') parser.add_argument('--n_layers_dec', default=6, type=int, help='Number of decoder stacks') parser.add_argument('--tgt_emb_prj_weight_sharing', default=1, type=int, help='share decoder embedding with decoder projection') args = parser.parse_args() char2index, index2char = label_loader.load_label('./data/hackathon.labels') SOS_token = char2index['<s>'] EOS_token = char2index['</s>'] PAD_token = char2index['_'] random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) args.cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device('cuda' if args.cuda else 'cpu') # N_FFT: defined in loader.py feature_size = 256 # log me sprctogram #feature_size = N_FFT / 2 + 1 # stft encoder = Encoder(args.d_input * args.LFR_m, args.n_layers_enc, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, pe_maxlen=args.pe_maxlen) decoder = Decoder(SOS_token, EOS_token, len(char2index), args.d_word_vec, args.n_layers_dec, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout,s tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing, pe_maxlen=args.pe_maxlen) model = Transformer(encoder, decoder) model.flatten_parameters() for param in model.parameters(): param.data.uniform_(-0.08, 0.08) model = nn.DataParallel(model).to(device) optimizer = optim.Adam(model.module.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss(reduction='sum', ignore_index=PAD_token).to(device) bind_model(model, optimizer) if args.pause == 1: nsml.paused(scope=locals()) if args.mode != "train": return data_list = os.path.join(DATASET_PATH, 'train_data', 'data_list.csv') wav_paths = list() script_paths = list() with open(data_list, 'r') as f: for line in f: # line: "aaa.wav,aaa.label" wav_path, script_path = line.strip().split(',') wav_paths.append(os.path.join(DATASET_PATH, 'train_data', wav_path)) script_paths.append(os.path.join(DATASET_PATH, 'train_data', script_path)) best_loss = 1e10 best_cer = 1e10 begin_epoch = 0 # load all target scripts for reducing disk i/o target_path = os.path.join(DATASET_PATH, 'train_label') load_targets(target_path) train_batch_num, train_dataset_list, valid_dataset = split_dataset(args, wav_paths, script_paths, valid_ratio=0.05) logger.info('start') if args.visdom: train_visual = Visual(train_batch_num) eval_visual = Visual(1) train_begin = time.time() for epoch in range(begin_epoch, args.max_epochs): train_queue = queue.Queue(args.workers * 2) train_loader = MultiLoader(train_dataset_list, train_queue, args.batch_size, args.workers) train_loader.start() if args.visdom: train_loss, train_cer = train(model, train_batch_num, train_queue, criterion, optimizer, device, train_begin, args.workers, 10, args.teacher_forcing, train_visual) else: train_loss, train_cer = train(model, train_batch_num, train_queue, criterion, optimizer, device, train_begin, args.workers, 10, args.teacher_forcing) logger.info('Epoch %d (Training) Loss %0.4f CER %0.4f' % (epoch, train_loss, train_cer)) train_loader.join() valid_queue = queue.Queue(args.workers * 2) valid_loader = BaseDataLoader(valid_dataset, valid_queue, args.batch_size, 0) valid_loader.start() if args.visdom: eval_loss, eval_cer = evaluate(model, valid_loader, valid_queue, criterion, device, eval_visual) else: eval_loss, eval_cer = evaluate(model, valid_loader, valid_queue, criterion, device) logger.info('Epoch %d (Evaluate) Loss %0.4f CER %0.4f' % (epoch, eval_loss, eval_cer)) valid_loader.join() nsml.report(False, step=epoch, train_epoch__loss=train_loss, train_epoch__cer=train_cer, eval__loss=eval_loss, eval__cer=eval_cer) best_loss_model = (eval_loss < best_loss) best_cer_model = (eval_cer < best_cer) nsml.save(args.save_name) if best_loss_model: nsml.save('best_loss') best_loss = eval_loss if best_cer_model: nsml.save('best_cer') best_cer = eval_cer
class ParaphraseModel(nn.Module): ''' dicstionary_size = the size of the dictionary in the dataset embedding_dim = each word in the dictionary is embedded in a vector space with that dimension rnn_hidden_size = rnn_num_layers = the numbers of the layers in the each LSTM in the model z_dim = the encoder encodes the sentence to a z-vector space with that dimension ''' def __init__(self, dictionary_size=100, embedding_dim=1100, rnn_hidden_size=600, rnn_num_layers=2, z_dim=1100): #Does embedding_dim should be the same as z_dim? super(ParaphraseModel, self).__init__() self.embedding = nn.Embedding( dictionary_size, embedding_dim) #should be replaced in word embedding like word2vec self.encoder = Encoder(embedding_dim, rnn_hidden_size, rnn_num_layers, z_dim) self.decoder = Decoder(embedding_dim, rnn_hidden_size, rnn_num_layers, dictionary_size) self.cel = nn.CrossEntropyLoss(ignore_index=-1) #cross entrpoy self.dictionary_size = dictionary_size self.embedding_dim = embedding_dim def train_model(self, xo, xp, xo_len, xp_len, kld_coef=1): logits, z, mu, logvar = self.AE_forward(xo, xp, xo_len, xp_len) cel_loss = self.cel( logits.view(-1, self.dictionary_size).contiguous(), xp.cuda().view(-1)) kl_loss = -0.5 * (1 + logvar - mu.pow(2) - logvar.exp()).sum(1).mean() total_loss = cel_loss + kld_coef * kl_loss #print(cel_loss, kl_loss) return total_loss def AE_forward(self, xo, xp, xo_len, xp_len): xo_embed = self.embedding(xo.cuda()) xp_embed = self.embedding(xp.cuda()) mu, logvar = self.encoder(xo_embed, xp_embed, xo_len, xp_len) std = torch.exp(0.5 * logvar) nd = Normal(torch.ones_like(mu), torch.zeros_like(std)) z = nd.sample() * std + mu logits = self.decoder(xo_embed, z, xo_len, xp_len) return logits, z, mu, logvar def infer(self, xo, xo_len): xo_embed = self.embedding(xo.cuda()) _, (hT, cT) = self.decoder.ose(xo_embed, xo_len) completed_sentences = torch.zeros(len(xo_embed)) sentences = [] mu, sigma = torch.zeros(len(xo), self.embedding_dim), torch.ones( len(xo), self.embedding_dim) nd = Normal(mu, sigma) z = nd.sample().cuda() out = hT[-1] steps = 0 while not all(completed_sentences): real_inp = torch.cat((z, out), 1).unsqueeze(1) output, (hT, cT) = self.decoder.pse(real_inp, torch.tensor([1] * len(z)), h0=hT, c0=cT) out = hT[-1] probs = self.decoder.linear(out) topwords = [word_probs.topk(1)[1] for word_probs in probs] for j, result in enumerate(topwords): if int(result) == EOS_TOKEN: completed_sentences[j] = 1 sentences.append(topwords) steps += 1 if steps == MAX_PARA_LENGTH: break return sentences
def __init__(self, root): self.model = Decoder() self.view = MainWindow(root) self.view.decode_button.config(command=self.decode_qr) self.view.search_qr_button.config(command=self.search_qr)