def main(): # get and process data data = utils.DateData(2000) print("Chinese time order: yy/mm/dd ", data.date_cn[:3], "\nEnglish time order: dd/M/yyyy ", data.date_en[:3]) print("vocabularies: ", data.vocab) print( "x index sample: \n{}\n{}".format(data.idx2str(data.x[0]), data.x[0]), "\ny index sample: \n{}\n{}".format(data.idx2str(data.y[0]), data.y[0])) model = Transformer(MODEL_DIM, MAX_LEN, N_LAYER, N_HEAD, data.num_word, DROP_RATE) # training t0 = time.time() for t in range(1000): bx, by, seq_len = data.sample(64) bx, by = utils.pad_zero(bx, max_len=MAX_LEN), utils.pad_zero( by, max_len=MAX_LEN + 1) loss = model.step(bx, by) if t % 50 == 0: logits = model(bx[:1], by[:1, :-1], False)[0].numpy() t1 = time.time() print( "step: ", t, "| time: %.2f" % (t1 - t0), "| loss: %.4f" % loss.numpy(), "| target: ", "".join([ data.i2v[i] for i in by[0, 1:] if i != data.v2i["<PAD>"] ]), "| inference: ", "".join([ data.i2v[i] for i in np.argmax(logits, axis=1) if i != data.v2i["<PAD>"] ]), ) t0 = t1 os.makedirs("./visual_helper/transformer", exist_ok=True) model.save_weights("./visual_helper/transformer/model.ckpt") with open("./visual_helper/transformer_v2i_i2v.pkl", "wb") as f: pickle.dump({"v2i": data.v2i, "i2v": data.i2v}, f) # prediction src_seq = "02-11-30" print("src: ", src_seq, "\nprediction: ", model.translate(src_seq, data.v2i, data.i2v)) # save attention matrix for visualization _ = model(bx[:1], by[:1, :-1], training=False) data = { "src": [data.i2v[i] for i in data.x[0]], "tgt": [data.i2v[i] for i in data.y[0]], "attentions": model.attentions } with open("./visual_helper/transformer_attention_matrix.pkl", "wb") as f: pickle.dump(data, f)
def train(model, data, step): # training t0 = time.time() for t in range(step): bx, by, seq_len = data.sample(64) bx, by = utils.pad_zero(bx, max_len=MAX_LEN), utils.pad_zero( by, max_len=MAX_LEN + 1) loss, logits = model.step(bx, by) if t % 50 == 0: logits = logits[0].numpy() t1 = time.time() print( "step: ", t, "| time: %.2f" % (t1 - t0), "| loss: %.4f" % loss.numpy(), "| target: ", "".join([data.i2v[i] for i in by[0, 1:10]]), "| inference: ", "".join([data.i2v[i] for i in np.argmax(logits, axis=1)[:10]]), ) t0 = t1 os.makedirs("./visual/models/transformer", exist_ok=True) model.save_weights("./visual/models/transformer/model.ckpt") with open("./visual/tmp/transformer_v2i_i2v.pkl", "wb") as f: pickle.dump({"v2i": data.v2i, "i2v": data.i2v}, f)
def translate(self, src, v2i, i2v): src_pad = utils.pad_zero(src, self.max_len) tgt = utils.pad_zero( np.array([[ v2i["<GO>"], ] for _ in range(len(src))]), self.max_len + 1) tgti = 0 x_embed = self.embed(src_pad) encoded_z = self.encoder.call(x_embed, False, mask=self._pad_mask(src_pad)) while True: y = tgt[:, :-1] y_embed = self.embed(y) decoded_z = self.decoder.call( y_embed, encoded_z, False, look_ahead_mask=self._look_ahead_mask(y), pad_mask=self._pad_mask(y)) logit = self.o(decoded_z)[0, tgti, :].numpy() idx = np.argmax(logit) tgti += 1 tgt[0, tgti] = idx if idx == v2i["<EOS>"] or tgti >= self.max_len: break return "".join([i2v[i] for i in tgt[0, 1:tgti]])
def get_date_url(self, dateobj): qs = [('datef', utils.dateobj_to_str(dateobj, '-', reverse = True)), \ ('selfday', utils.pad_zero(dateobj.day)), \ ('selfmonth', utils.pad_zero(dateobj.month)), \ ('selfyear', utils.pad_zero(dateobj.year)), \ ('B1', 'Search') \ ] query = string.join(['%s=%s' % (q[0], q[1]) for q in qs], '&') return self.courturl + 'dojqry.asp?' + query
def train(emb_dim=32, n_layer=3, n_head=4): dataset = utils.DateData(4000) print("Chinese time order: yy/mm/dd ", dataset.date_cn[:3], "\nEnglish time order: dd/M/yyyy", dataset.date_en[:3]) print("Vocabularies: ", dataset.vocab) print( f"x index sample: \n{dataset.idx2str(dataset.x[0])}\n{dataset.x[0]}", f"\ny index sample: \n{dataset.idx2str(dataset.y[0])}\n{dataset.y[0]}" ) loader = DataLoader(dataset, batch_size=32, shuffle=True) model = Transformer(n_vocab=dataset.num_word, max_len=MAX_LEN, n_layer=n_layer, emb_dim=emb_dim, n_head=n_head, drop_rate=0.1, padding_idx=0) if torch.cuda.is_available(): print("GPU train avaliable") device = torch.device("cuda") model = model.cuda() else: device = torch.device("cpu") model = model.cpu() for i in range(100): for batch_idx, batch in enumerate(loader): bx, by, decoder_len = batch bx, by = torch.from_numpy(utils.pad_zero( bx, max_len=MAX_LEN)).type( torch.LongTensor).to(device), torch.from_numpy( utils.pad_zero(by, MAX_LEN + 1)).type( torch.LongTensor).to(device) loss, logits = model.step(bx, by) if batch_idx % 50 == 0: target = dataset.idx2str(by[0, 1:-1].cpu().data.numpy()) pred = model.translate(bx[0:1], dataset.v2i, dataset.i2v) res = dataset.idx2str(pred[0].cpu().data.numpy()) src = dataset.idx2str(bx[0].cpu().data.numpy()) print( "Epoch: ", i, "| t: ", batch_idx, "| loss: %.3f" % loss, "| input: ", src, "| target: ", target, "| inference: ", res, )
def translate(self, src, v2i, i2v): src_pad = utils.pad_zero(np.array([v2i[v] for v in src])[None, :], self.max_len) tgt_seq = "<GO>" tgt = utils.pad_zero(np.array([v2i[tgt_seq], ])[None, :], self.max_len + 1) tgti = 0 while True: logit = self.sess.run(self.logits, {self.tfx: src_pad, self.tfy: tgt, self.training: False})[0, tgti, :] idx = np.argmax(logit) tgti += 1 tgt[0, tgti] = idx if idx == v2i["<EOS>"] or tgti >= self.max_len: break return "".join([i2v[i] for i in tgt[0, 1:tgti]])
def post_data(self, dateobj): postdata = [('user_id', ''), ('find_val', ''), \ ('cic_val', 'all'), ('val', 'CA'), \ ('fromday', utils.pad_zero(dateobj.day)), \ ('frommonth', utils.pad_zero(dateobj.month)), \ ('fromyear', dateobj.year), \ ('today', utils.pad_zero(dateobj.day)), \ ('tomonth', utils.pad_zero(dateobj.month)), \ ('toyear', dateobj.year), \ ('submit', 'Go') \ ] return postdata
def translate(self, src, v2i, i2v): self.eval() device = next(self.parameters()).device src_pad = src # Initialize Decoder input by constructing a matrix M([n, self.max_len+1]) with initial value: # M[n,0] = start token id # M[n,:] = 0 target = torch.from_numpy( utils.pad_zero( np.array([[ v2i["<GO>"], ] for _ in range(len(src))]), self.max_len + 1)).to(device) x_embed = self.embed(src_pad) encoded_z = self.encoder(x_embed, False, mask=self._pad_mask(src_pad)) for i in range(0, self.max_len): y = target[:, :-1] y_embed = self.embed(y) decoded_z = self.decoder(y_embed, encoded_z, False, self._look_ahead_mask(y), self._pad_mask(src_pad)) o = self.o(decoded_z)[:, i, :] idx = o.argmax(dim=1).detach() # Update the Decoder input, to predict for the next position. target[:, i + 1] = idx self.train() return target
def translate(self, src, i2v, v2i): src = tf.reshape(src, (-1, src.shape[-1])) src_pad = utils.pad_zero(src, self.max_len) tgt = utils.pad_zero(v2i["<GO>"] * tf.ones_like(src), self.max_len + 1) tgti = 0 x_embed = self.embed(src_pad) encoded_z = self.encoder(x_embed, mask=self._pad_mask(src_pad)) while True: y = tgt[:, :-1] y_embed = self.embed(y) decoded_z = self.decoder((encoded_z, y_embed), look_ahead_mask=self._look_ahead_mask(y), pad_mask=self._pad_mask(src_pad)) logit = self.o(decoded_z)[:, tgti, :].numpy() idx = np.argmax(logit, 1) tgti += 1 tgt[:, tgti] = idx if tgti >= self.max_len: break return [ "".join([i2v[i] for i in tgt[j, 1:tgti]]) for j in range(len(src)) ]
def translate(self, src, v2i, i2v): src_pad = utils.pad_zero( np.array([v2i[v] for v in src])[None, :], self.max_len) tgt = utils.pad_zero( np.array([ v2i["<GO>"], ])[None, :], self.max_len + 1) tgti = 0 x_embed = self.embed(src_pad) encoded_z = self.encoder(x_embed, False, mask=self._pad_mask(src_pad)) while True: y_embed = self.embed(tgt[:, :-1]) decoded_z = self.decoder(y_embed, encoded_z, False, mask=self._look_ahead_mask()) logit = self.o(decoded_z)[0, tgti, :].numpy() idx = np.argmax(logit) tgti += 1 tgt[0, tgti] = idx if idx == v2i["<EOS>"] or tgti >= self.max_len: break return "".join([i2v[i] for i in tgt[0, 1:tgti]])
def translate(self, src, v2i, i2v): src_pad = utils.pad_zero(src, self.max_len) tgt = utils.pad_zero( np.array([[v2i["<GO>"], ] for _ in range(len(src))]), self.max_len+1) tgti = 0 x_embed = self.embed(src_pad) encoded_z = self.encoder.call( x_embed, False, mask=self._pad_mask(src_pad)) # 用上一个预测生成的词去预测生成下一个词, 直到超出最大句子长度 while True: y = tgt[:, :-1] y_embed = self.embed(y) decoded_z = self.decoder.call( y_embed, encoded_z, False, yz_look_ahead_mask=self._look_ahead_mask(y), xz_pad_mask=self._pad_mask(src_pad)) # 将所有词都放在一起 decoder(即使有些词还没有被生成,还是空的), # 在 decoder 结束后, 只取出当前所要预测生成的那一个即可 logits = self.o(decoded_z)[:, tgti, :].numpy() idx = np.argmax(logits, axis=1) tgti += 1 # 将预测得到的词的索引作为结果保存起来 tgt[:, tgti] = idx if tgti >= self.max_len: break return ["".join([i2v[i] for i in tgt[j, 1:tgti]]) for j in range(len(src))]
def get_post_data(self, tags, dateobj): postdata = [] today = datetime.date.today() for tag in tags: name = None value = None if tag.name == 'input': name = tag.get('name') value = tag.get('value') t = tag.get('type') if t == 'image': continue if name in['btnGono', 'btnOfficeName', 'btnreset']: continue elif tag.name == 'select': name = tag.get('name') if name == 'ddloffcode': value = '0' elif name == 'ddlcatcode': value = '0' elif name == 'ddldeptcode': value = 'All Department' elif name == 'ddlgodatefrom' or name == 'ddlgodateto': value = utils.pad_zero(dateobj.day) elif name == 'ddlgomonfrom' or name == 'ddlgomonto': value = utils.pad_zero(dateobj.month) elif name == 'ddlgoyearfrom' or name == 'ddlgoyearto': value = utils.pad_zero(dateobj.year) elif name == 'ddlfromdate_day' or name == 'ddlfromdate_mon': value = '01' elif name == 'ddlfromdate_year': value = '2013' elif name == 'ddltodate_date': value = utils.pad_zero(today.day) elif name == 'ddltodate_mon': value = utils.pad_zero(today.month) elif name == 'ddltodate_year': value = utils.pad_zero(today.year) if name: if value == None: value = u'' postdata.append((name, value)) return postdata
def date_in_form(self, dateobj): return [ ("jday", utils.pad_zero(dateobj.day)), ("jmonth", utils.pad_zero(dateobj.month)), ("jyear", utils.pad_zero(dateobj.year)), ]
def date_in_form(self, dateobj): return [('jday', utils.pad_zero(dateobj.day)), \ ('jmonth', utils.pad_zero(dateobj.month)), \ ('jyear', utils.pad_zero(dateobj.year)) \ ]
def get_post_data(self, dateobj): return [('bsubmit', 'Submit'), ('select', utils.pad_zero(dateobj.day)),\ ('select2', utils.pad_zero(dateobj.month)), \ ('select3', utils.pad_zero(dateobj.year % 2000))]
def load_data(data, size): x, y, seq_len = data.sample(size) x = utils.pad_zero(x, MAX_LEN) y = utils.pad_zero(y, MAX_LEN + 1) return (x, y[:, :-1]), y[:, 1:]
def get_date_string(self, dateobj): return '%s/%s/%s' % (utils.pad_zero(dateobj.day), \ calendar.month_abbr[dateobj.month].lower(), \ utils.pad_zero(dateobj.year))
def date_in_form(self, dateobj): return '%s/%s/%s' % (utils.pad_zero(dateobj.day), \ utils.pad_zero(dateobj.month), \ utils.pad_zero(dateobj.year))
def date_in_form(self, dateobj): return [('juddt', '%s/%s/%s' % (utils.pad_zero(dateobj.day), \ utils.pad_zero(dateobj.month), \ utils.pad_zero(dateobj.year)) \ )]
def download_oneday(self, relpath, dateobj): dls = [] if dateobj >= self.flip_date1: if dateobj >= self.flip_date2: datestr = '%d-%d-%d' % (dateobj.day, dateobj.month, dateobj.year) else: datestr = '%s-%s-%d' % (utils.pad_zero( dateobj.day), utils.pad_zero(dateobj.month), dateobj.year) mainhref = 'Contents-(%s).pdf' % datestr else: datestr = utils.dateobj_to_str(dateobj, '', reverse=True) mainhref = 'Contents(%s-%s-%s).pdf' % (utils.pad_zero( dateobj.day), utils.pad_zero( dateobj.month), utils.pad_zero(dateobj.year % 100)) dateurl = self.baseurl % datestr docurl = urllib.basejoin(dateurl, mainhref) mainmeta = utils.MetaInfo() mainmeta.set_date(dateobj) mainmeta.set_url(self.url_fix(docurl)) response = self.download_url(docurl) if not response or not response.webpage or response.error: return dls mainrelurl = os.path.join(relpath, 'main') updated = False if self.storage_manager.save_rawdoc(self.name, mainrelurl, response.srvresponse, response.webpage): self.logger.info(u'Saved rawfile %s' % mainrelurl) updated = True page_type = self.get_file_extension(response.webpage) if page_type != 'pdf': self.logger.warn( 'Got a non-pdf page and we can\'t handle it for datte %s', dateobj) return dls links = [] linknames = [] hrefs = utils.extract_links_from_pdf(StringIO(response.webpage)) for href in hrefs: reobj = re.search('(?P<num>Part-\w+)', href) if reobj: partnum = reobj.groupdict()['num'] else: partnum = '%s' % href reobj = re.search('.pdf$', partnum) if partnum: partnum = partnum[:reobj.start()] relurl = os.path.join(relpath, partnum) docurl = urllib.basejoin(dateurl, href) metainfo = utils.MetaInfo() metainfo.set_date(dateobj) metainfo['partnum'] = partnum links.append(relurl) linknames.append(partnum) if self.save_gazette(relurl, docurl, metainfo): dls.append(relurl) mainmeta['links'] = links mainmeta['linknames'] = linknames if self.storage_manager.save_metainfo(self.name, mainrelurl, mainmeta): updated = True self.logger.info(u'Saved metainfo %s' % mainrelurl) if updated: dls.append(mainrelurl) return dls
break return "".join([i2v[i] for i in tgt[0, 1:tgti]]) # get and process data vocab, x, y, v2i, i2v, date_cn, date_en = utils.get_date_data() print("Chinese time order: ", date_cn[:3], "\nEnglish time order: ", date_en[:3]) print("vocabularies: ", vocab) print("x index sample: \n", x[:2], "\ny index sample: \n", y[:2]) model = Transformer(MODEL_DIM, MAX_LEN, N_LAYER, N_HEAD, len(vocab), DROP_RATE) # training t0 = time.time() for t in range(1000): bi = np.random.randint(0, len(x), size=64) bx, by = utils.pad_zero(x[bi], max_len=MAX_LEN), utils.pad_zero(y[bi], max_len=MAX_LEN+1) _, loss_ = model.sess.run([model.train_op, model.loss], {model.tfx: bx, model.tfy: by, model.training: True}) if t % 50 == 0: logits_ = model.sess.run(model.logits, {model.tfx: bx[:1, :], model.tfy: by[:1, :], model.training: False}) t1 = time.time() print( "step: ", t, "| time: %.2f" % (t1-t0), "| loss: %.3f" % loss_, "| target: ", "".join([i2v[i] for i in by[0, 1:] if i != v2i["<PAD>"]]), "| inference: ", "".join([i2v[i] for i in np.argmax(logits_[0], axis=1) if i != v2i["<PAD>"]]), ) t0 = t1 # prediction src_seq = "02-11-30"
# forward path - joint demosaicing & denoising in action for t in range(num_steps): # get the parameters c = np.reshape(Theta[t,:Nk*(Ks**2*Kd-1)], (Nk, (Ks**2*Kd-1))) normalized_c = c / np.sqrt(np.sum(c**2, axis=0)) k = B.dot(normalized_c) k = np.reshape(k.T, (Nk, Ks, Ks, Kd)) w = np.reshape(Theta[t,Nk*(Ks**2*Kd-1):-1], (Nk, Nw)) l = np.reshape(Theta[t,-1], (1,)) tmp = np.zeros((Kd, u_t[t,0].shape[0]+2*padding, u_t[t,0].shape[1]+2*padding)) for i in range(Nk): conv_k_sum = np.zeros((u_t[t,0].shape[0], u_t[t,0].shape[1])) for d in range(Kd): ki = k[i,:,:,d] u_conv_k = pad_zero(u_t[t,d], padding, padding) conv_k_sum += conv2(u_conv_k,ki,'valid') phi_u = RBFforward(conv_k_sum, w[i,:]) for d in range(Kd): ki = k[i,:,:,d] tmp[d,:,:] += conv2(phi_u,ki[::-1,::-1],'full') u_t[t+1] = np.clip(u_t[t] - crop_zero(tmp, padding, padding) - l*bwd_bayer(fwd_bayer(u_t[t]) - f), 0.0,255.0) print '.', #Evaluate print "\nTest image: %d" % data_config['indices'][example] #get the result result = u_t[num_steps] plt.figure(1) plt.imshow(swapimdims_3HW_HW3(result).astype('uint8'), interpolation="none") plt.show()