Esempio n. 1
0
    def start(self):
        analyzer = Analyzer('id')
        options = []
        for k, v in self.labels.items():
            opt = '[{}]'.format(Utilities.get_colored_text(v, k))
            options.append(v.replace(k, opt) if k in v else "{} {}".format(opt, v))
        choices = ', '.join(options)

        while self.labeled_num_lines < self.original_num_lines:
            line = self.labeled_num_lines + 1
            
            # linecache provides random access to lines in (large) text files
            raw_json = linecache.getline(self.dataset + '.json', line)
            json_object = json.loads(raw_json)
            message = json_object['body']
            (label, disp, message) = analyzer.analyze(message)
            
            print(Utilities.get_colored_text('head', '--- Labeling message {} (ID: {}) ---'.format(line, json_object['id'])))
            print(message + '\n')
            print('Guess: {}'.format(Utilities.get_colored_text(label)))
            choice = '?'
            while choice != '' and choice not in self.labels:
                choice = raw_input('Label (Enter to confirm, or {}): '.format(choices))
                if choice == 'q':
                    return

            text = self.labels[choice] if choice is not '' else Utilities.score_to_label(label)
            print('You entered: {}\n'.format(Utilities.get_colored_text(text, text)))

            json_object['label'] = text
            Utilities.write_json(self.dataset + '.labeled.json', json_object, ["id", "label"])
            self.labeled_num_lines += 1
    def analyze(self, message):
        score = 0
        found = 0
        disp = ""

        i = 0
        # try:
        parts = Utilities.split(message)
        # except AttributeError as e:
        #     print message #None

        for w in parts:
            if w in self.words:
                score += self.words[w]
                found += 1
                if self.display:
                    i = message.lower().find(w, i)
                    d = Utilities.get_colored_text(self.words[w], message[i:i+len(w)])
                    message = message[:i] + d + message[i+len(w):]
                    i = i + len(d)

                    disp += d + " "

        label = score / float(found) if found != 0 else 0.0
        return (label, disp, message)
Esempio n. 3
0
 def output(self, predictions):
     for i in xrange(len(predictions)):
         prediction = predictions[i]
         message = ""
         if self.display:
             message = "\t" + Utilities.get_colored_text(prediction, self.test_data[i]).replace('\n', ' ')
 
         g = "{}\t".format(self.test_group[i]) if self.group != "score" else ""
         print("{}{:.2f}{}".format(g, prediction, message))
Esempio n. 4
0
    def analyze(self, message):
        score = 0
        found = 0
        disp = ""

        i = 0
        parts = Utilities.split(message)
        for w in parts:
            if w in self.words:
                score += self.words[w]
                found += 1
                if self.display:
                    i = message.lower().find(w, i)
                    d = Utilities.get_colored_text(self.words[w], message[i:i+len(w)])
                    message = message[:i] + d + message[i+len(w):]
                    i = i + len(d)

                    disp += d + " "

        label = score / float(found) if found != 0 else 0.0
        return (label, disp, message)