def generateInstance_3a(seed): x1 = np.random.randint(0, 2) x2 = np.random.randint(0, 2) x = [float(x1), float(x2)] y = 1.0 if x1 == x2 else 0.0 return Instance(x, y)
def generateInstance_5(seed): n = np.random.randint(0, 101) x_old = [np.sin(m) for m in range(n - 10, n)] x = [np.sin(n - 10 + idx + x1 * x1) for idx, x1 in enumerate(x_old)] y_old = [np.sin(m) for m in range(n + 1, n + 4)] y = [np.sin(n + 1 + idx + y1 * y1) for idx, y1 in enumerate(y_old)] return Instance(x, y)
def generateInstance_4(seed): t = 2 * np.pi * np.random.sample(1)[0] u = np.random.sample(1)[0] + np.random.sample(1)[0] r = 2 - u if u > 1 else u x = [r * np.cos(t), r * np.sin(t)] n = 0 if (x[0] >= 0.0): n = 0 if x[1] >= 0.0 else 3 else: n = 1 if x[1] >= 0.0 else 2 n += 4 if np.absolute(x[1]) > 1.0 - np.absolute(x[0]) else 0 y = [0.0 for k in range(0, 8)] y[n] = 1.0 return Instance(x, y)
def generateInstance_1(seed): x1 = np.round(np.random.sample(3)) phi = np.random.random() * 2.0 * np.pi cos_theta = np.random.random() * 2.0 - 1.0 theta = np.arccos(cos_theta) u = np.random.random() r = 0.1 * np.cbrt(u) x = r * np.sin(theta) * np.cos(phi) y = r * np.sin(theta) * np.sin(phi) z = r * np.cos(theta) x2 = [x, y, z] x = [i + j for i, j in zip(x1, x2)] n = int(x1[0]) * 4 + int(x1[1]) * 2 + int(x1[2]) y = [0.0 for k in range(0, 8)] y[n] = 1.0 return Instance(x, y)
def parse_sentence(line): str_len = len(line) chars = [] for idx in range(str_len): chars.append(line[idx]) bichars = [] for idx in range(str_len): if idx == 0: bichar = '-NULL-' + chars[idx] else: bichar = chars[idx - 1] + chars[idx] bichars.append(bichar) inst = Instance() inst.chars = chars inst.bichars = bichars return inst
def parse_conll(info): chars = [] gold_labels = [] for line in info: id, c, l = line.split("\t") chars.append(c) gold_labels.append(l) bichars = [] char_len = len(chars) for idx in range(char_len): if idx == 0: bichar = '-NULL-' + chars[idx] else: bichar = chars[idx - 1] + chars[idx] bichars.append(bichar) inst = Instance() inst.chars = chars inst.gold_labels = gold_labels return inst
def parse_sent(info): words = info.split(' ') chars = [] bichars = [] inst = Instance() for w in words: for c in w: chars.append(c) char_len = len(chars) for idx in range(char_len): if idx == 0: bichar = '-NULL-' + chars[idx] else: bichar = chars[idx - 1] + chars[idx] bichars.append(bichar) #bichars.append(chars[char_len - 1] + '</s>') inst.words = words inst.chars = chars inst.bichars = bichars return inst
def batch_variable_inst(insts, tagids, vocab): for inst, tagid in zip(insts, tagids): pred_tag = vocab.id2tag(tagid) yield Instance(inst.src_words, inst.src_heads, inst.src_rels, \ inst.tgt_words, inst.tgt_heads, inst.tgt_rels, pred_tag), pred_tag == inst.tag
'edu.stanford.nlp.process.PTBTokenizer', '-preserveLines', '-ioFileList', MAP_PATH ] subprocess.call(" ".join(command), shell=True) to_predict = [] if CFG['dir_mode']: for tok in sorted(os.listdir(TOK_DIR)): tok_path = os.path.join(TOK_DIR, tok) to_predict.append(tok_path) else: to_predict.append(TOK_PATH) for tok in to_predict: article = read_text_file(tok) instance = Instance(" ".join(article), None, STEPPER.vocab, CONFIG, None) print("Article: ", " ".join(article)) oovs = [instance.encoder_oovs] idx = torch.from_numpy(instance.encoder_pointer_idx).unsqueeze(0) idx_no_oov = mask_oov(idx, STEPPER.vocab) if CONFIG.encoder == 'Recurrent': enc_outputs, enc_state = STEPPER.encoder(idx_no_oov) dec_first_state = STEPPER.encoder.hidden_final(enc_state) else: # Transformer enc_outputs = STEPPER.encoder(idx_no_oov) dec_first_state = STEPPER.encoder.hidden_final(enc_outputs) STEPPER.bsdecoder.batch_size = 1 STEPPER.bsdecoder.dec_max_len = CONFIG.dec_max_len
def generateInstance_3b(seed): x = [np.random.random() * 4.0 + 0.001] y = [np.sin(x[0] * np.pi) / (x[0] * np.pi)] return Instance(x, y)
def batch_variable_inst(insts, tagids, vocab, tag_logits): for inst, tagid, tag_logits in zip(insts, tagids, tag_logits): pred_tag = vocab.id2tag(tagid) yield Instance(inst.src_words, inst.src_heads, inst.src_childs, inst.src_key, \ inst.tgt_words, inst.tgt_heads, inst.tgt_childs, inst.tgt_key, \ pred_tag, inst.type, tag_logits), inst, pred_tag == inst.tag
def batch_variable_inst(insts, tagids, vocab, tokenizer): for inst, tagid in zip(insts, tagids): pred_tag = vocab.id2tag(tagid) yield Instance(inst.src_words, inst.tgt_words, pred_tag, tokenizer), pred_tag == inst.tag
def vis_dict(): with open(MAP_PATH, "a") as mf: mf.write(f"{PATH} \t {TOK_PATH}\n") command = [ 'java', '-cp', PARSER_JAR_PATH, 'edu.stanford.nlp.process.PTBTokenizer', '-preserveLines', '-ioFileList', MAP_PATH ] subprocess.call(" ".join(command), shell=True) article = read_text_file(TOK_PATH) article = " ".join(article) instance = Instance(article, None, STEPPER.vocab, CONFIG, None) idx = torch.from_numpy(instance.encoder_pointer_idx).unsqueeze(0) idx_no_oov = mask_oov(idx, STEPPER.vocab) if CONFIG.encoder == 'Recurrent': enc_outputs, enc_state = STEPPER.encoder(idx_no_oov) dec_first_state = STEPPER.encoder.hidden_final(enc_state) else: # Transformer enc_outputs = STEPPER.encoder(idx_no_oov) dec_first_state = STEPPER.encoder.hidden_final(enc_outputs) STEPPER.decoder.dec_max_len = CONFIG.dec_max_len dec_outputs, att_weights = STEPPER.decoder(enc_outputs, dec_first_state, None, idx) pred = torch.argmax(dec_outputs.transpose(1, 2), dim=-1).squeeze().cpu().numpy() pred = ids2sentence(pred, STEPPER.vocab, instance.encoder_oovs) if CONFIG.windowing and CONFIG.w_type == 'dynamic': num_w = STEPPER.decoder.windower.scheduler.num_w( instance.encoder_pointer_idx, CONFIG.ws, CONFIG.ss) if pred.find(STOP_DEC) != -1: eos_pos = tuple(re.finditer(STOP_DEC, pred)) last_eos = min(num_w, len(eos_pos)) last_eos_pos = eos_pos[last_eos - 1].start() pred = pred[:last_eos_pos].strip() pred = pred.replace(STOP_DEC, "-->") else: pred = make_readable(pred, False) transitions = None if CONFIG.windowing: if CONFIG.w_type == 'static': transitions = STEPPER.decoder.windower( instance.encoder_pointer_idx) else: #dynamic transitions = np.where(np.array(pred.split(" ")) == "-->")[0] + 1 slen_ = pred.split(" ").__len__() alen_ = article.split(" ").__len__() w_d_ = { "weights": att_weights.squeeze().detach().cpu().numpy()[:slen_, :alen_], "summary": pred.split(" "), "article": article.split(" "), "transitions": transitions } os.remove(MAP_PATH) return w_d_