Esempio n. 1
0
    def load(self, filename):
        """Do the exception-prone loading"""

        # set the allowed tags for later
        self.allowed_tags = set(['org', 'per', 'loc', 'locorg'])

        self.mentions = {}
        for tag in self.allowed_tags:
            self.mentions[tag] = []

        # read the file that should consist of lines like
        # [TAG] [START_SYMBOL_INDEX] [LENGTH]
        with safeOpen(filename) as f:
            r = csv.reader(f, delimiter=' ', quotechar=Config.QUOTECHAR)
            for index, parts in enumerate(r):
                # skip the empty lines
                if len(parts) == 0:
                    continue

                try:
                    assert (len(parts) == 3)
                    tag = parts[0].lower()
                    assert (tag in self.allowed_tags)
                    self.mentions[tag].append(Interval(*parts[1:]))
                except Exception as e:
                    line_descr = '[{}] [START_SYMBOL_INDEX] [LENGTH]'.format(
                        '/'.join(self.allowed_tags))
                    raise Exception(
                        'Error: "{}", line {}.\nExpected: {}\nReceived: {}\nDetails: {}'
                        .format(filename, index, line_descr, ' '.join(parts),
                                str(e)))
Esempio n. 2
0
 def load(self, filename):
     """Do the exception-prone loading"""
     
     # set the allowed tags for later
     self.allowed_tags = set(['org', 'per', 'loc', 'locorg'])
         
     self.mentions = {}
     for tag in self.allowed_tags:
         self.mentions[tag] = []
         
     # read the file that should consist of lines like
     # [TAG] [START_SYMBOL_INDEX] [LENGTH]
     with safeOpen(filename) as f:
         r = csv.reader(f, delimiter=' ', quotechar=Config.QUOTECHAR)
         for index, parts in enumerate(r):
             # skip the empty lines
             if len(parts) == 0:
                 continue
                 
             try:
                 assert(len(parts) == 3)
                 tag = parts[0].lower()
                 assert(tag in self.allowed_tags)
                 self.mentions[tag].append(Interval(*parts[1:]))
             except Exception as e:
                 line_descr = '[{}] [START_SYMBOL_INDEX] [LENGTH]'.format(
                             '/'.join(self.allowed_tags))
                 raise Exception(
                     'Error: "{}", line {}.\nExpected: {}\nReceived: {}\nDetails: {}'.format(
                         filename, index, line_descr, ' '.join(parts), str(e)))
Esempio n. 3
0
    def load(self, filename):
        """Do the exception-prone loading"""
        self.facts = []

        with safeOpen(filename) as f:
            buffer = ''
            for raw_line in f:
                line = normalize(raw_line)
                if len(line) == 0:
                    if len(buffer) > 0:
                        self.facts.append(Fact.fromTest(buffer))
                        buffer = ''
                else:
                    buffer += line + '\n'
            if len(buffer) > 0:
                self.facts.append(Fact.fromTest(buffer))
Esempio n. 4
0
    def load(self, filename):
        """Do the exception-prone loading"""
        self.facts = []

        with safeOpen(filename) as f:
            buffer = ''
            for raw_line in f:
                line = normalize(raw_line)
                if len(line) == 0:
                    if len(buffer) > 0:
                        self.facts.append(Fact.fromTest(buffer))
                        buffer = ''
                else:
                    buffer += line + '\n'
            if len(buffer) > 0:
                self.facts.append(Fact.fromTest(buffer))
Esempio n. 5
0
    def load(self, filename):
        """Do the exception-prone loading"""
        self.entities = []

        with safeOpen(filename) as f:
            buffer = ''
            for raw_line in f:
                line = raw_line.strip(' \t\n\r')
                if len(line) == 0:
                    if len(buffer) > 0:
                        self.entities.append(Entity.fromTest(buffer))
                        buffer = ''
                else:
                    buffer += line + '\n'
            if len(buffer) > 0:
                self.entities.append(Entity.fromTest(buffer))