def main():
    if len(sys.argv) != 3:
        raise NotImplementedError(
            'Program only takes two arguments:  train file and dev file (for vocabulary mapping purposes)'
        )
    train_file = open(sys.argv[1], 'r')
    lines = train_file.readlines()
    train_file.close()
    dev_file = open(sys.argv[2], 'r')
    dev_lines = dev_file.readlines()
    dev_file.close()
    words_list = get_dictionary.get_dict(lines)
    line_ctr = 0
    # get the oracle for the train file
    for line in dev_lines:
        line_ctr += 1
        # assert that the parenthesis are balanced
        if line.count('(') != line.count(')'):
            raise NotImplementedError(
                'Unbalanced number of parenthesis in line ' + str(line_ctr))
        # first line: the bracketed tree itself itself
        print '# ' + line.rstrip()
        tags, tokens, lowercase = get_tags_tokens_lowercase(line)
        assert len(tags) == len(tokens)
        assert len(tokens) == len(lowercase)
        print ' '.join(tags)
        print ' '.join(tokens)
        print ' '.join(lowercase)
        unkified = unkify(tokens, words_list)
        print ' '.join(unkified)
        output_actions = get_actions(line)
        for action in output_actions:
            print action
        print ''
Exemple #2
0
def main():
    if len(sys.argv) != 3:
        raise NotImplementedError('Program only takes two arguments:  train file and dev file (for vocabulary mapping purposes)')
    train_file = open(sys.argv[1], 'r')
    lines = train_file.readlines()
    train_file.close()
    dev_file = open(sys.argv[2], 'r')
    dev_lines = dev_file.readlines()
    dev_file.close()
    words_list = get_dictionary.get_dict(lines) 
    line_ctr = 0
    # get the oracle for the train file
    for line in dev_lines:
        line_ctr += 1
        # assert that the parenthesis are balanced
        if line.count('(') != line.count(')'):
            raise NotImplementedError('Unbalanced number of parenthesis in line ' + str(line_ctr)) 
        # first line: the bracketed tree itself itself 
        print '# ' + line.rstrip()
        tags, tokens, lowercase = get_tags_tokens_lowercase(line)
        assert len(tags) == len(tokens)
        assert len(tokens) == len(lowercase)
        #print ' '.join(tags)
        print ' '.join(tokens)
        #print ' '.join(lowercase)
        unkified = unkify(tokens, words_list)    
        print ' '.join(unkified)
        output_actions = get_actions(line)
        for action in output_actions:
            print action
        print ''
def main():
    if len(sys.argv) != 4:
        raise NotImplementedError(
            'Program only takes three arguments:  en|ch train file and dev file (for vocabulary mapping purposes)'
        )
    assert sys.argv[1] == "ch" or sys.argv[1] == "en"

    train_file = open(sys.argv[2], 'r')
    lines = train_file.readlines()
    train_file.close()
    dev_file = open(sys.argv[3], 'r')
    dev_lines = dev_file.readlines()
    dev_file.close()
    words_list = get_dictionary.get_dict(lines)
    line_ctr = 0
    # get the oracle for the train file
    for line in dev_lines:
        line_ctr += 1
        # assert that the parenthesis are balanced
        if line.count('(') != line.count(')'):
            raise NotImplementedError(
                'Unbalanced number of parenthesis in line ' + str(line_ctr))
        # first line: the bracketed tree itself itself
        print '!# ' + line.rstrip(
        )  #Ponemos ! para que coincida con lo que envio LIU
        tags, tokens, lowercase = get_tags_tokens_lowercase(line)
        assert len(tags) == len(tokens)
        assert len(tokens) == len(lowercase)
        print ' '.join(tags)
        print ' '.join(tokens)
        print ' '.join(lowercase)
        unkified = unkify(tokens, words_list, sys.argv[1])
        print ' '.join(unkified)
        output_actions = get_actions(line)
        _, trees = construct(output_actions, [])

        #print "AAA", trees #muestra el arbol donde NT coinciden con no-terminales y SHIFT con los elementos lexicales (terminales)

        output_actions2 = get_actions2(trees[0], [])
        for action in output_actions2:
            print action
        print 'TERM'
        print ''
Exemple #4
0
def main(args):
    words_list = None
    if args.vocab_file is not None and os.path.exists(args.vocab_file):
        # Load vocab.
        with open(args.vocab_file, "rb") as vocab_f:
            words_list = pickle.load(vocab_f)
    elif args.train_file is not None:
        with open(args.train_file, "r") as train_f:
            train_lines = train_f.readlines()
        words_list = get_dictionary.get_dict(train_lines)

        if args.vocab_file is not None:
            # Save.
            with open(args.vocab_file, "wb") as vocab_f:
                pickle.dump(words_list, vocab_f)

    with open(args.input_file, "r") as input_f:
        lines = input_f.readlines()

    line_ctr = 0
    # get the oracle for the train file
    for line in lines:
        line_ctr += 1
        # assert that the parenthesis are balanced
        if line.count('(') != line.count(')'):
            raise NotImplementedError(
                'Unbalanced number of parenthesis in line ' + str(line_ctr))
        # first line: the bracketed tree itself itself
        # print '# ' + line.rstrip()
        # tags, tokens, lowercase = get_tags_tokens_lowercase(line)
        # assert len(tags) == len(tokens)
        # assert len(tokens) == len(lowercase)
        # # print ' '.join(tags)
        # print ' '.join(tokens)
        # #print ' '.join(lowercase)

        tokens = line.strip().split()
        unkified = unkify(tokens, words_list)
        print(' '.join(unkified))