Beispiel #1
0
def per_sentence_bionlp_fscores_nbest(test_filenames, gold_dir):
    import tempfile
    from DataSet import DataSet
    from cStringIO import StringIO

    gold_dir = path(gold_dir)

    sentences = DataSet.from_filenames(*test_filenames)
    for docid, sentences_in_doc in sentences.group_by_metadata('DOC'):
        # if docid != '9361029':
        # continue

        print 'DOC:', docid
        our_total_proposed = 0
        our_total_matched = 0
        for i, sentence in enumerate(sentences_in_doc):
            for j, parse in enumerate(sentence):
                print "DOC:", docid, 'Sentence:', i, 'Parse:', j
                our_score_components = parse.bionlp_fscore_components(sentence)
                matched, gold, proposed = our_score_components
                our_total_proposed += proposed
                our_total_matched += matched

                conll_version = StringIO()
                parse.write_conll(conll_version,
                                  include_metadata=False,
                                  sentence=sentence)
                conll_version.seek(0)
                conll_version = conll_version.read()

                import BioNLPConversionDB
                converter = BioNLPConversionDB.get_converter()
                bionlp_events_string = converter.convert(conll_version)

                if 0:
                    print 'Events ---'
                    print bionlp_events_string
                    print 'Events ---'

                temp_test_dir = path(tempfile.mkdtemp(prefix=docid + '-'))
                temp_test_filename = path(temp_test_dir / docid + '.a2.t1')
                temp_test_file = file(temp_test_filename, 'w')
                temp_test_file.write(bionlp_events_string)
                temp_test_file.close()
                real_score_components = real_evaluation_bionlp_components(
                    temp_test_dir, gold_dir)
                if our_score_components != real_score_components:
                    real_evaluation_bionlp_components(temp_test_dir,
                                                      gold_dir,
                                                      show_output=True)
                temp_test_dir.rmtree()
                if our_score_components != real_score_components:
                    print "Ours:", our_score_components
                    print 'Real:', real_score_components

                    print 'Events ---'
                    print bionlp_events_string
                    print 'Events ---'
                    raise 'mismatch'
Beispiel #2
0
def per_sentence_bionlp_fscores(test_filename, test_dir, gold_dir):
    import tempfile
    from DataSet import DataSet
    from cStringIO import StringIO
    test_dir = path(test_dir)
    gold_dir = path(gold_dir)

    sentences = DataSet.from_filenames(test_filename)
    for docid, sentences_in_doc in sentences.group_by_metadata('DOC'):
        # if docid != '9015187':
        # if docid != '9081693':
        # if docid != '9257843':
        # if docid != '8108127':
        # if docid != '9115366':
        # if docid != '9361029':
            # continue

        print 'DOC:', docid
        our_total_proposed = 0
        our_total_matched = 0
        for sentence in sentences_in_doc:
            parse = sentence.gold_parse
            sentence.parses = [parse]
            our_score_components = parse.bionlp_fscore_components(sentence)
            matched, gold, proposed = our_score_components
            our_total_proposed += proposed
            our_total_matched += matched

            conll_version = StringIO()
            parse.write_conll(conll_version, include_metadata=False, sentence=sentence)
            conll_version.seek(0)
            conll_version = conll_version.read()

            import BioNLPConversionDB
            converter = BioNLPConversionDB.get_converter()
            bionlp_events_string = converter.convert(conll_version)

            if 1:
                print 'Events ---'
                print bionlp_events_string
                print 'Events ---'
            
            print "Ours:", our_score_components

            temp_test_dir = path(tempfile.mkdtemp(prefix=docid + '-'))
            temp_test_filename = path(temp_test_dir/docid + '.a2.t1')
            temp_test_file = file(temp_test_filename, 'w')
            temp_test_file.write(bionlp_events_string)
            temp_test_file.close()
            real_score_components = real_evaluation_bionlp_components(temp_test_dir, gold_dir)
            print 'Real:', real_score_components 
            if our_score_components != real_score_components:
                real_evaluation_bionlp_components(temp_test_dir, gold_dir, show_output=True)
            temp_test_dir.rmtree()
            if our_score_components != real_score_components:
                raise 'mismatch'
Beispiel #3
0
def per_sentence_bionlp_fscores_nbest(test_filenames, gold_dir):
    import tempfile
    from DataSet import DataSet
    from cStringIO import StringIO

    gold_dir = path(gold_dir)

    sentences = DataSet.from_filenames(*test_filenames)
    for docid, sentences_in_doc in sentences.group_by_metadata('DOC'):
        # if docid != '9361029':
            # continue

        print 'DOC:', docid
        our_total_proposed = 0
        our_total_matched = 0
        for i, sentence in enumerate(sentences_in_doc):
            for j, parse in enumerate(sentence):
                print "DOC:", docid, 'Sentence:', i, 'Parse:', j
                our_score_components = parse.bionlp_fscore_components(sentence)
                matched, gold, proposed = our_score_components
                our_total_proposed += proposed
                our_total_matched += matched

                conll_version = StringIO()
                parse.write_conll(conll_version, include_metadata=False, sentence=sentence)
                conll_version.seek(0)
                conll_version = conll_version.read()

                import BioNLPConversionDB
                converter = BioNLPConversionDB.get_converter()
                bionlp_events_string = converter.convert(conll_version)

                if 0:
                    print 'Events ---'
                    print bionlp_events_string
                    print 'Events ---'

                temp_test_dir = path(tempfile.mkdtemp(prefix=docid + '-'))
                temp_test_filename = path(temp_test_dir/docid + '.a2.t1')
                temp_test_file = file(temp_test_filename, 'w')
                temp_test_file.write(bionlp_events_string)
                temp_test_file.close()
                real_score_components = real_evaluation_bionlp_components(temp_test_dir, gold_dir)
                if our_score_components != real_score_components:
                    real_evaluation_bionlp_components(temp_test_dir, gold_dir, show_output=True)
                temp_test_dir.rmtree()
                if our_score_components != real_score_components:
                    print "Ours:", our_score_components
                    print 'Real:', real_score_components 

                    print 'Events ---'
                    print bionlp_events_string
                    print 'Events ---'
                    raise 'mismatch'
Beispiel #4
0
def per_sentence_bionlp_fscores(test_filename, test_dir, gold_dir):
    import tempfile
    from DataSet import DataSet
    from cStringIO import StringIO
    test_dir = path(test_dir)
    gold_dir = path(gold_dir)

    sentences = DataSet.from_filenames(test_filename)
    for docid, sentences_in_doc in sentences.group_by_metadata('DOC'):
        # if docid != '9015187':
        # if docid != '9081693':
        # if docid != '9257843':
        # if docid != '8108127':
        # if docid != '9115366':
        # if docid != '9361029':
        # continue

        print 'DOC:', docid
        our_total_proposed = 0
        our_total_matched = 0
        for sentence in sentences_in_doc:
            parse = sentence.gold_parse
            sentence.parses = [parse]
            our_score_components = parse.bionlp_fscore_components(sentence)
            matched, gold, proposed = our_score_components
            our_total_proposed += proposed
            our_total_matched += matched

            conll_version = StringIO()
            parse.write_conll(conll_version,
                              include_metadata=False,
                              sentence=sentence)
            conll_version.seek(0)
            conll_version = conll_version.read()

            import BioNLPConversionDB
            converter = BioNLPConversionDB.get_converter()
            bionlp_events_string = converter.convert(conll_version)

            if 1:
                print 'Events ---'
                print bionlp_events_string
                print 'Events ---'

            print "Ours:", our_score_components

            temp_test_dir = path(tempfile.mkdtemp(prefix=docid + '-'))
            temp_test_filename = path(temp_test_dir / docid + '.a2.t1')
            temp_test_file = file(temp_test_filename, 'w')
            temp_test_file.write(bionlp_events_string)
            temp_test_file.close()
            real_score_components = real_evaluation_bionlp_components(
                temp_test_dir, gold_dir)
            print 'Real:', real_score_components
            if our_score_components != real_score_components:
                real_evaluation_bionlp_components(temp_test_dir,
                                                  gold_dir,
                                                  show_output=True)
            temp_test_dir.rmtree()
            if our_score_components != real_score_components:
                raise 'mismatch'