Esempio n. 1
0
 def finalize(self):
     CorpusFile.finalize(self)
     self.reference.finalize()
     print('Wrote results to', self.path, file=self.logger)
     print('Wrote reference to', self.reference.path, file=self.logger)
     for i, sec in enumerate(self.secondaries):
         sec.finalize()
         print('Wrote sec %d to ' % i, sec.path, file=self.logger)
Esempio n. 2
0
class ScorerAndWriter(ConstituentScorer, CorpusFile):
    """
    A resource to which parsing results can be written.
    Computes LF1 score (inhouse implementation) and writes resulting parse tree to a file.
    """
    def __init__(self, experiment, path=None, directory=None, logger=None, secondary_scores=0):
        ConstituentScorer.__init__(self)
        _, path = tempfile.mkstemp(dir=directory) if path is None else path
        CorpusFile.__init__(self, path=path, directory=directory, logger=logger)
        self.experiment = experiment
        self.reference = CorpusFile(directory=directory, logger=logger)
        self.logger = logger if logger is not None else sys.stdout
        self.secondaries = [CorpusFile(directory=directory, logger=logger) for _ in range(secondary_scores)]

    def init(self):
        CorpusFile.init(self)
        self.reference.init()
        for sec in self.secondaries:
            sec.init()

    def finalize(self):
        CorpusFile.finalize(self)
        self.reference.finalize()
        print('Wrote results to', self.path, file=self.logger)
        print('Wrote reference to', self.reference.path, file=self.logger)
        for i, sec in enumerate(self.secondaries):
            sec.finalize()
            print('Wrote sec %d to ' % i, sec.path, file=self.logger)

    def score(self, system, gold, secondaries=None):
        ConstituentScorer.score(self, system, gold)
        self.file.writelines(self.experiment.serialize(system))
        self.reference.file.writelines(self.experiment.serialize(gold))
        if secondaries:
            for system_sec, corpus in zip(secondaries, self.secondaries):
                corpus.file.writelines(self.experiment.serialize(system_sec))

    def failure(self, gold):
        ConstituentScorer.failure(self, gold)
        sentence = self.experiment.obtain_sentence(gold)
        label = self.experiment.obtain_label(gold)
        fallback = self.experiment.compute_fallback(sentence, label)
        self.file.writelines(self.experiment.serialize(fallback))
        self.reference.file.writelines(self.experiment.serialize(gold))
        for sec in self.secondaries:
            sec.file.writelines(self.experiment.serialize(fallback))

    def __str__(self):
        return CorpusFile.__str__(self)
    def evaluate(self, result_resource, gold_resource):
        if gold_resource.end is not None \
                or gold_resource.limit is not None\
                or gold_resource.length_limit is not None:
            corpus_gold_selection = self.read_corpus(gold_resource)
            gold_selection_resource = CorpusFile()
            gold_selection_resource.init()
            gold_selection_resource.finalize()
            export_corpus(corpus_gold_selection, gold_selection_resource.path)
            gold_resource = gold_selection_resource

        call([
            "sh", "../util/semeval-run.sh", "Scorer", gold_resource.path,
            result_resource.path, "representation=DM"
        ])