Example #1
0
            eventCounter[eventType] += 1
    print "------ Event Statistics ------"
    for k, v in eventCounter.items():
        print "Event Type {}, count {}".format(k, v)
    return eventCounter
if __name__ == '__main__':
    p = argparse.ArgumentParser()
    p.add_argument('--parse', type=str, default='McCC')
    p.add_argument('--tokenization', type=str, default=None)
    p.add_argument('--corpus_file', type=str, default='../reparse_from_installed_GE09/GE09-devel.xml')
    p.add_argument('--merge', action='store_true')
    p.add_argument('--apply_alg', action='store_true', help='apply the heuristics of taking the longest chain for unmerging')
    args = p.parse_args()

    structureAnalyzer = StructureAnalyzer()
    structureAnalyzer.analyze(args.corpus_file)
    print >> sys.stderr, "--- Structure Analysis ----"
    print >> sys.stderr, structureAnalyzer.toString()

    corpus = loadCorpus(args.corpus_file, args.parse, tokenization=args.tokenization,
                        removeNameInfo=False, removeIntersentenceInteractionsFromCorpusElements=True,
                        merge=False)

    triggers = [[j for j in i.entities if j.get('type') != 'Protein' and j.get('type') != 'Entity'] for i in corpus.sentences]
    triggers = [i for j in triggers for i in j]
    entities = [[j for j in i.entities if j.get('type') == 'Protein' or j.get('type') == 'Entity'] for i in corpus.sentences]
    entities = [i for j in entities for i in j]
    interactions = [i.interactions for i in corpus.sentences]
    interactions = [i for j in interactions for i in j]
    print "------In original annotation:------"
    print "Total triggers {}, Total named entities {}, Total interactions {}".format(len(triggers), format(len(entities)), format(len(interactions)))