def __init__(self):
        self.instances1 = Instance.read('./data/test1.dat')
        dt = DecisionTree(self.instances1, Id3())
        dpath = dt.train()
        dpath.dump('./data/test1.dat.path')
        self.path1 = DecisionTreeResult.load('./data/test1.dat.path')

        self.instances2 = Instance.read('./data/test2.dat')
        dt = DecisionTree(self.instances2, Id3())
        dpath = dt.train()
        dpath.dump('./data/test2.dat.path')
        self.path2 = DecisionTreeResult.load('./data/test2.dat.path')

        self.dtr = DecisionTreeRefiner()
예제 #2
0
def training(train_filepath):
    train_data = Instance.read(train_filepath)

    algo = Id3()
    dt = DecisionTree(train_data, algo)
    path = dt.train()
    return path
예제 #3
0
def validation(valid_filepath, path):
    validation_data = Instance.read(valid_filepath)

    r = DecisionTreeRefiner()
    result = r.refine(path, validation_data)

    return result
def generateInstance_3a(seed):
    x1 = np.random.randint(0, 2)
    x2 = np.random.randint(0, 2)
    x = [float(x1), float(x2)]
    y = 1.0 if x1 == x2 else 0.0

    return Instance(x, y)
def generateInstance_5(seed):
    n = np.random.randint(0, 101)
    x_old = [np.sin(m) for m in range(n - 10, n)]
    x = [np.sin(n - 10 + idx + x1 * x1) for idx, x1 in enumerate(x_old)]

    y_old = [np.sin(m) for m in range(n + 1, n + 4)]
    y = [np.sin(n + 1 + idx + y1 * y1) for idx, y1 in enumerate(y_old)]

    return Instance(x, y)
예제 #6
0
 def __init__(self):
     self.instances1 = Instance.read('./data/test1.dat')
     self.instances2 = Instance.read('./data/test2.dat')
     self.gt1 = DecisionTreeResult([
         Path([('a', True), ('b', True), ('c', True)], 0),
         Path([('a', True), ('b', True), ('c', False)], 1),
         Path([('a', True), ('b', False)], 0),
         Path([('a', False), ('c', True)], 1),
         Path([('a', False), ('c', False), ('q', True)], 1),
         Path([('a', False), ('c', False), ('q', False), ('e', True)], 1),
         Path([('a', False), ('c', False), ('q', False), ('e', False)], 0)
         ])
     self.gt2 = DecisionTreeResult([
         Path([('a', True), ('b', True), ('c', True)], 0),
         Path([('a', True), ('b', True), ('c', False)], 1),
         Path([('a', True), ('b', False)], 0),
         Path([('a', False), ('c', True)], 1),
         Path([('a', False), ('c', False), ('e', True)], 1),
         Path([('a', False), ('c', False), ('e', False)], 0)
         ])
예제 #7
0
def parse_sentence(line):
    str_len = len(line)

    chars = []

    for idx in range(str_len):
        chars.append(line[idx])

    bichars = []
    for idx in range(str_len):
        if idx == 0:
            bichar = '-NULL-' + chars[idx]
        else:
            bichar = chars[idx - 1] + chars[idx]
        bichars.append(bichar)

    inst = Instance()
    inst.chars = chars
    inst.bichars = bichars
    return inst
예제 #8
0
def parse_conll(info):
    chars = []
    gold_labels = []
    for line in info:
        id, c, l = line.split("\t")
        chars.append(c)
        gold_labels.append(l)

    bichars = []
    char_len = len(chars)
    for idx in range(char_len):
        if idx == 0:
            bichar = '-NULL-' + chars[idx]
        else:
            bichar = chars[idx - 1] + chars[idx]
        bichars.append(bichar)

    inst = Instance()
    inst.chars = chars
    inst.gold_labels = gold_labels
    return inst
예제 #9
0
def parse_sent(info):
    words = info.split(' ')
    chars = []
    bichars = []
    inst = Instance()
    for w in words:
        for c in w:
            chars.append(c)
    char_len = len(chars)

    for idx in range(char_len):
        if idx == 0:
            bichar = '-NULL-' + chars[idx]
        else:
            bichar = chars[idx - 1] + chars[idx]
        bichars.append(bichar)
    #bichars.append(chars[char_len - 1] + '</s>')

    inst.words = words
    inst.chars = chars
    inst.bichars = bichars
    return inst
def generateInstance_4(seed):
    t = 2 * np.pi * np.random.sample(1)[0]
    u = np.random.sample(1)[0] + np.random.sample(1)[0]
    r = 2 - u if u > 1 else u
    x = [r * np.cos(t), r * np.sin(t)]

    n = 0
    if (x[0] >= 0.0):
        n = 0 if x[1] >= 0.0 else 3
    else:
        n = 1 if x[1] >= 0.0 else 2

    n += 4 if np.absolute(x[1]) > 1.0 - np.absolute(x[0]) else 0

    y = [0.0 for k in range(0, 8)]
    y[n] = 1.0

    return Instance(x, y)
def generateInstance_1(seed):
    x1 = np.round(np.random.sample(3))

    phi = np.random.random() * 2.0 * np.pi
    cos_theta = np.random.random() * 2.0 - 1.0
    theta = np.arccos(cos_theta)
    u = np.random.random()

    r = 0.1 * np.cbrt(u)
    x = r * np.sin(theta) * np.cos(phi)
    y = r * np.sin(theta) * np.sin(phi)
    z = r * np.cos(theta)
    x2 = [x, y, z]
    x = [i + j for i, j in zip(x1, x2)]

    n = int(x1[0]) * 4 + int(x1[1]) * 2 + int(x1[2])
    y = [0.0 for k in range(0, 8)]
    y[n] = 1.0

    return Instance(x, y)
예제 #12
0
def vis_dict():

    with open(MAP_PATH, "a") as mf:
        mf.write(f"{PATH} \t {TOK_PATH}\n")

    command = [
        'java', '-cp', PARSER_JAR_PATH,
        'edu.stanford.nlp.process.PTBTokenizer', '-preserveLines',
        '-ioFileList', MAP_PATH
    ]
    subprocess.call(" ".join(command), shell=True)

    article = read_text_file(TOK_PATH)
    article = " ".join(article)
    instance = Instance(article, None, STEPPER.vocab, CONFIG, None)

    idx = torch.from_numpy(instance.encoder_pointer_idx).unsqueeze(0)
    idx_no_oov = mask_oov(idx, STEPPER.vocab)

    if CONFIG.encoder == 'Recurrent':
        enc_outputs, enc_state = STEPPER.encoder(idx_no_oov)
        dec_first_state = STEPPER.encoder.hidden_final(enc_state)
    else:  # Transformer
        enc_outputs = STEPPER.encoder(idx_no_oov)
        dec_first_state = STEPPER.encoder.hidden_final(enc_outputs)

    STEPPER.decoder.dec_max_len = CONFIG.dec_max_len
    dec_outputs, att_weights = STEPPER.decoder(enc_outputs, dec_first_state,
                                               None, idx)
    pred = torch.argmax(dec_outputs.transpose(1, 2),
                        dim=-1).squeeze().cpu().numpy()
    pred = ids2sentence(pred, STEPPER.vocab, instance.encoder_oovs)

    if CONFIG.windowing and CONFIG.w_type == 'dynamic':
        num_w = STEPPER.decoder.windower.scheduler.num_w(
            instance.encoder_pointer_idx, CONFIG.ws, CONFIG.ss)
        if pred.find(STOP_DEC) != -1:
            eos_pos = tuple(re.finditer(STOP_DEC, pred))
            last_eos = min(num_w, len(eos_pos))
            last_eos_pos = eos_pos[last_eos - 1].start()
            pred = pred[:last_eos_pos].strip()
            pred = pred.replace(STOP_DEC, "-->")
    else:
        pred = make_readable(pred, False)

    transitions = None
    if CONFIG.windowing:
        if CONFIG.w_type == 'static':
            transitions = STEPPER.decoder.windower(
                instance.encoder_pointer_idx)
        else:  #dynamic
            transitions = np.where(np.array(pred.split(" ")) == "-->")[0] + 1

    slen_ = pred.split(" ").__len__()
    alen_ = article.split(" ").__len__()

    w_d_ = {
        "weights":
        att_weights.squeeze().detach().cpu().numpy()[:slen_, :alen_],
        "summary": pred.split(" "),
        "article": article.split(" "),
        "transitions": transitions
    }

    os.remove(MAP_PATH)

    return w_d_
예제 #13
0
파일: Dataloader.py 프로젝트: HMJW/DepSAWR
def batch_variable_inst(insts, tagids, vocab):
    for inst, tagid in zip(insts, tagids):
        pred_tag = vocab.id2tag(tagid)
        yield Instance(inst.src_words, inst.src_heads, inst.src_rels, \
            inst.tgt_words, inst.tgt_heads, inst.tgt_rels, pred_tag), pred_tag == inst.tag
예제 #14
0
        'edu.stanford.nlp.process.PTBTokenizer', '-preserveLines',
        '-ioFileList', MAP_PATH
    ]
    subprocess.call(" ".join(command), shell=True)

    to_predict = []
    if CFG['dir_mode']:
        for tok in sorted(os.listdir(TOK_DIR)):
            tok_path = os.path.join(TOK_DIR, tok)
            to_predict.append(tok_path)
    else:
        to_predict.append(TOK_PATH)

    for tok in to_predict:
        article = read_text_file(tok)
        instance = Instance(" ".join(article), None, STEPPER.vocab, CONFIG,
                            None)
        print("Article: ", " ".join(article))

        oovs = [instance.encoder_oovs]
        idx = torch.from_numpy(instance.encoder_pointer_idx).unsqueeze(0)
        idx_no_oov = mask_oov(idx, STEPPER.vocab)

        if CONFIG.encoder == 'Recurrent':
            enc_outputs, enc_state = STEPPER.encoder(idx_no_oov)
            dec_first_state = STEPPER.encoder.hidden_final(enc_state)
        else:  # Transformer
            enc_outputs = STEPPER.encoder(idx_no_oov)
            dec_first_state = STEPPER.encoder.hidden_final(enc_outputs)

        STEPPER.bsdecoder.batch_size = 1
        STEPPER.bsdecoder.dec_max_len = CONFIG.dec_max_len
예제 #15
0
def batch_variable_inst(insts, tagids, vocab, tokenizer):
    for inst, tagid in zip(insts, tagids):
        pred_tag = vocab.id2tag(tagid)
        yield Instance(inst.src_words, inst.tgt_words, pred_tag, tokenizer), pred_tag == inst.tag
예제 #16
0
def batch_variable_inst(insts, tagids, vocab, tag_logits):
    for inst, tagid, tag_logits in zip(insts, tagids, tag_logits):
        pred_tag = vocab.id2tag(tagid)
        yield Instance(inst.src_words, inst.src_heads, inst.src_childs, inst.src_key, \
                       inst.tgt_words, inst.tgt_heads, inst.tgt_childs, inst.tgt_key, \
                       pred_tag, inst.type, tag_logits), inst, pred_tag == inst.tag
def generateInstance_3b(seed):
    x = [np.random.random() * 4.0 + 0.001]
    y = [np.sin(x[0] * np.pi) / (x[0] * np.pi)]

    return Instance(x, y)
예제 #18
0
    from data import Instance
    from algo import Id3, Gini
    from fmeasure import Fmeasure
    import time
    import optparse
    import os
    parser = optparse.OptionParser(usage="usage: %prog [options] filepath")
    parser.add_option("-a", type="choice", choices=['id3', 'gini', 'f'],
        dest="algo", help="algorithm for decision tree", default="id3")
    (options, args) = parser.parse_args()
    if len(args) == 0:
        parser.error("needs filepath")

    start_time = time.clock()
    filepath = args[0]
    instances = Instance.read(filepath)
    print '%s used (#pos: %s, #neg: %s)' % (filepath, 
        len([d for d in instances if d.label == 1]),
        len([d for d in instances if d.label != 1]))

    if options.algo == 'id3':
        algo = Id3()
    elif options.algo == 'gini':
        algo = Gini()
    elif options.algo == 'f':
        relnum = len([d for d in instances if d.label == 1])
        algo = Fmeasure(relnum)

    dt = DecisionTree(instances, algo)
    dpath = dt.train()
    print "Paths generated:"