Esempio n. 1
0
    def start(self):
        analyzer = Analyzer('id')
        options = []
        for k, v in self.labels.items():
            opt = '[{}]'.format(Utilities.get_colored_text(v, k))
            options.append(v.replace(k, opt) if k in v else "{} {}".format(opt, v))
        choices = ', '.join(options)

        while self.labeled_num_lines < self.original_num_lines:
            line = self.labeled_num_lines + 1
            
            # linecache provides random access to lines in (large) text files
            raw_json = linecache.getline(self.dataset + '.json', line)
            json_object = json.loads(raw_json)
            message = json_object['body']
            (label, disp, message) = analyzer.analyze(message)
            
            print(Utilities.get_colored_text('head', '--- Labeling message {} (ID: {}) ---'.format(line, json_object['id'])))
            print(message + '\n')
            print('Guess: {}'.format(Utilities.get_colored_text(label)))
            choice = '?'
            while choice != '' and choice not in self.labels:
                choice = raw_input('Label (Enter to confirm, or {}): '.format(choices))
                if choice == 'q':
                    return

            text = self.labels[choice] if choice is not '' else Utilities.score_to_label(label)
            print('You entered: {}\n'.format(Utilities.get_colored_text(text, text)))

            json_object['label'] = text
            Utilities.write_json(self.dataset + '.labeled.json', json_object, ["id", "label"])
            self.labeled_num_lines += 1