Ejemplo n.º 1
0
def loadAllStandard(path):
    """Load all standard markup files from the provided directory. Returns a list."""

    names = set([x.split('.')[0] for x in os.listdir(path)])
    res = []
    for name in names:
        res.append(Standard(name, path))

    return sorted(res,
                  key=lambda x: int(x.name[5:]))  # book_XXX - sort by number
Ejemplo n.º 2
0
    def _doEvaluate(self, std_dir, test_dir, is_locorg_allowed=True):
        """
            Evaluate the submission by comparing it to the standard

            Returns a dictionary of metrics by mention type.
            Metrics include precision, recall, f1,
            true positive count, standard size and test size
            in that order.

            std_dir - standard markup files directory
            test_dir - response files directory
            is_locorg_allowed - enables/disables 'locorg' mention type
        """

        unique_names = set()

        for filename in os.listdir(std_dir):
            parts = filename.split('.')
            unique_names.add('.'.join(parts[:-1]))

        names = list(unique_names)

        for filename in os.listdir(test_dir):
            parts = filename.split('.')
            if parts[-1] != 'task1':
                continue
            unique_names.remove('.'.join(parts[:-1]))

        # validate the submission integrity:
        if len(unique_names) > 0:
            raise Exception('Missing files in the response:\n' +
                            '\n'.join(sorted(unique_names)))

        allowed_tags = ['per', 'org', 'loc']
        if is_locorg_allowed:
            allowed_tags.append('locorg')

        doc_results = {}
        # calculate partial metrics:
        tp = dict([(x, 0.0) for x in allowed_tags])
        n_std = dict([(x, 0.0) for x in allowed_tags])
        n_test = dict([(x, 0.0) for x in allowed_tags])
        for name in names:
            std = Standard(name, std_dir)
            test = Test(name, test_dir)
            res = self.evaluateDocument(std, test, is_locorg_allowed)
            doc_results[name] = res
            for tag in allowed_tags:
                tp[tag] += res[tag][3]
                n_std[tag] += res[tag][4]
                n_test[tag] += res[tag][5]

        # calculate global metrics:
        return dict([(tag, calcMetrics(tp[tag], n_std[tag], n_test[tag]))
                     for tag in allowed_tags]), doc_results
Ejemplo n.º 3
0
    def generate(self, std_dir, out_dir):
        """
            Generate a response based on all the standard documents in the directory

            std_dir - standard markup directory
            out_dir - directory where the generated response will be saved to
        """

        unique_names = set()

        for filename in os.listdir(std_dir):
            parts = filename.split('.')
            unique_names.add('.'.join(parts[:-1]))

        names = list(unique_names)
        for name in names:
            std = Standard(name, std_dir)
            self.generateDoc(std, os.path.join(out_dir, name + '.task1'))
Ejemplo n.º 4
0
 def createResponse(self, std_path, test_path):
     os.makedirs(test_path, exist_ok=True)
     names = set([x.split('.')[0] for x in os.listdir(std_path)])
     for name in names:
         s = Standard(name, std_path)
         self.createDocumentResponse(s, test_path)