eventCounter[eventType] += 1 print "------ Event Statistics ------" for k, v in eventCounter.items(): print "Event Type {}, count {}".format(k, v) return eventCounter if __name__ == '__main__': p = argparse.ArgumentParser() p.add_argument('--parse', type=str, default='McCC') p.add_argument('--tokenization', type=str, default=None) p.add_argument('--corpus_file', type=str, default='../reparse_from_installed_GE09/GE09-devel.xml') p.add_argument('--merge', action='store_true') p.add_argument('--apply_alg', action='store_true', help='apply the heuristics of taking the longest chain for unmerging') args = p.parse_args() structureAnalyzer = StructureAnalyzer() structureAnalyzer.analyze(args.corpus_file) print >> sys.stderr, "--- Structure Analysis ----" print >> sys.stderr, structureAnalyzer.toString() corpus = loadCorpus(args.corpus_file, args.parse, tokenization=args.tokenization, removeNameInfo=False, removeIntersentenceInteractionsFromCorpusElements=True, merge=False) triggers = [[j for j in i.entities if j.get('type') != 'Protein' and j.get('type') != 'Entity'] for i in corpus.sentences] triggers = [i for j in triggers for i in j] entities = [[j for j in i.entities if j.get('type') == 'Protein' or j.get('type') == 'Entity'] for i in corpus.sentences] entities = [i for j in entities for i in j] interactions = [i.interactions for i in corpus.sentences] interactions = [i for j in interactions for i in j] print "------In original annotation:------" print "Total triggers {}, Total named entities {}, Total interactions {}".format(len(triggers), format(len(entities)), format(len(interactions)))