Ejemplo n.º 1
0
def main():
  viterbi_parser = pcfg_chartparser("wsjp.cfg")
  sents = cfg_parse.read_sentences("sentences.txt")
#  print ("sentence\ttime elapsed (us)")
  for sent in sents:
      print (sent)
      for tree in viterbi_parser.parse(sent):
          print (tree)
Ejemplo n.º 2
0
def main():
    viterbi_parser = pcfg_chartparser("wsjp.cfg")
    sents = cfg_parse.read_sentences("sentences.txt")
    #  print ("sentence\ttime elapsed (us)")
    for sent in sents:
        print(sent)
        for tree in viterbi_parser.parse(sent):
            print(tree)
Ejemplo n.º 3
0
def main():
    grammarp = pcfg_chartparser("wsjp.cfg")
    sents = cfg_parse.read_sentences("sentences.txt")
    print "sentence\ttime elapsed (us)"
    for sent in sents:
        start = datetime.now()
        tree = grammarp.parse(sent)
        end = datetime.now()
        elapsed = end - start
        microseconds = elapsed.microseconds + 1000000 * elapsed.seconds
        print "%s\t%d" % (sent, microseconds)
        print tree
Ejemplo n.º 4
0
def main():
  grammarp = pcfg_chartparser("wsjp.cfg")
  sents = cfg_parse.read_sentences("sentences.txt")
  print "sentence\ttime elapsed (us)"
  for sent in sents:
    start = datetime.now()
    tree = grammarp.parse(sent) 
    end = datetime.now()
    elapsed = end - start
    microseconds = elapsed.microseconds + 1000000*elapsed.seconds
    print "%s\t%d" %(sent, microseconds)
    print tree 
Ejemplo n.º 5
0
	tokens = line.split()
	total += len(tokens)
    fd.close()
    return total

if __name__ == "__main__":
    if len(sys.argv) >= 4:
	root = sys.argv[1]
	sentence_file = sys.argv[2]
	sent_num = int(sys.argv[3])
    else:
	print "usage %s <grammar directory> <sentence file> <sentence number>" %(sys.argv[0])
	exit(1)

    dirs = os.listdir(root)
    sentences = cfg_parse.read_sentences(sentence_file)
    if sent_num < 0 or sent_num >= len(sentences):
	print "error: sentence %d not in %s" %(sent_num, sentence_file)
	exit(1)

    sys.stdout.write("filename\tgrammar_size\tnum_branches\tnum_parses\ttime_taken\n");
    dirs.sort()
    for filename in dirs:

	if re.match(".*\.cfg$", filename):
	    grammar_file = "%s/%s" %(root, filename)
	    grammar_size = num_symbols(grammar_file)
	    sent, num_parses, time_taken, trees = \
		  cfg_parse.parse(sentences[sent_num], grammar_file)
	    nbranches = num_branches(grammar_file)