def main(): if len(sys.argv) != 3: raise NotImplementedError( 'Program only takes two arguments: train file and dev file (for vocabulary mapping purposes)' ) train_file = open(sys.argv[1], 'r') lines = train_file.readlines() train_file.close() dev_file = open(sys.argv[2], 'r') dev_lines = dev_file.readlines() dev_file.close() words_list = get_dictionary.get_dict(lines) line_ctr = 0 # get the oracle for the train file for line in dev_lines: line_ctr += 1 # assert that the parenthesis are balanced if line.count('(') != line.count(')'): raise NotImplementedError( 'Unbalanced number of parenthesis in line ' + str(line_ctr)) # first line: the bracketed tree itself itself print '# ' + line.rstrip() tags, tokens, lowercase = get_tags_tokens_lowercase(line) assert len(tags) == len(tokens) assert len(tokens) == len(lowercase) print ' '.join(tags) print ' '.join(tokens) print ' '.join(lowercase) unkified = unkify(tokens, words_list) print ' '.join(unkified) output_actions = get_actions(line) for action in output_actions: print action print ''
def main(): if len(sys.argv) != 3: raise NotImplementedError('Program only takes two arguments: train file and dev file (for vocabulary mapping purposes)') train_file = open(sys.argv[1], 'r') lines = train_file.readlines() train_file.close() dev_file = open(sys.argv[2], 'r') dev_lines = dev_file.readlines() dev_file.close() words_list = get_dictionary.get_dict(lines) line_ctr = 0 # get the oracle for the train file for line in dev_lines: line_ctr += 1 # assert that the parenthesis are balanced if line.count('(') != line.count(')'): raise NotImplementedError('Unbalanced number of parenthesis in line ' + str(line_ctr)) # first line: the bracketed tree itself itself print '# ' + line.rstrip() tags, tokens, lowercase = get_tags_tokens_lowercase(line) assert len(tags) == len(tokens) assert len(tokens) == len(lowercase) #print ' '.join(tags) print ' '.join(tokens) #print ' '.join(lowercase) unkified = unkify(tokens, words_list) print ' '.join(unkified) output_actions = get_actions(line) for action in output_actions: print action print ''
def main(): if len(sys.argv) != 4: raise NotImplementedError( 'Program only takes three arguments: en|ch train file and dev file (for vocabulary mapping purposes)' ) assert sys.argv[1] == "ch" or sys.argv[1] == "en" train_file = open(sys.argv[2], 'r') lines = train_file.readlines() train_file.close() dev_file = open(sys.argv[3], 'r') dev_lines = dev_file.readlines() dev_file.close() words_list = get_dictionary.get_dict(lines) line_ctr = 0 # get the oracle for the train file for line in dev_lines: line_ctr += 1 # assert that the parenthesis are balanced if line.count('(') != line.count(')'): raise NotImplementedError( 'Unbalanced number of parenthesis in line ' + str(line_ctr)) # first line: the bracketed tree itself itself print '!# ' + line.rstrip( ) #Ponemos ! para que coincida con lo que envio LIU tags, tokens, lowercase = get_tags_tokens_lowercase(line) assert len(tags) == len(tokens) assert len(tokens) == len(lowercase) print ' '.join(tags) print ' '.join(tokens) print ' '.join(lowercase) unkified = unkify(tokens, words_list, sys.argv[1]) print ' '.join(unkified) output_actions = get_actions(line) _, trees = construct(output_actions, []) #print "AAA", trees #muestra el arbol donde NT coinciden con no-terminales y SHIFT con los elementos lexicales (terminales) output_actions2 = get_actions2(trees[0], []) for action in output_actions2: print action print 'TERM' print ''
def main(args): words_list = None if args.vocab_file is not None and os.path.exists(args.vocab_file): # Load vocab. with open(args.vocab_file, "rb") as vocab_f: words_list = pickle.load(vocab_f) elif args.train_file is not None: with open(args.train_file, "r") as train_f: train_lines = train_f.readlines() words_list = get_dictionary.get_dict(train_lines) if args.vocab_file is not None: # Save. with open(args.vocab_file, "wb") as vocab_f: pickle.dump(words_list, vocab_f) with open(args.input_file, "r") as input_f: lines = input_f.readlines() line_ctr = 0 # get the oracle for the train file for line in lines: line_ctr += 1 # assert that the parenthesis are balanced if line.count('(') != line.count(')'): raise NotImplementedError( 'Unbalanced number of parenthesis in line ' + str(line_ctr)) # first line: the bracketed tree itself itself # print '# ' + line.rstrip() # tags, tokens, lowercase = get_tags_tokens_lowercase(line) # assert len(tags) == len(tokens) # assert len(tokens) == len(lowercase) # # print ' '.join(tags) # print ' '.join(tokens) # #print ' '.join(lowercase) tokens = line.strip().split() unkified = unkify(tokens, words_list) print(' '.join(unkified))