def start(self): analyzer = Analyzer('id') options = [] for k, v in self.labels.items(): opt = '[{}]'.format(Utilities.get_colored_text(v, k)) options.append(v.replace(k, opt) if k in v else "{} {}".format(opt, v)) choices = ', '.join(options) while self.labeled_num_lines < self.original_num_lines: line = self.labeled_num_lines + 1 # linecache provides random access to lines in (large) text files raw_json = linecache.getline(self.dataset + '.json', line) json_object = json.loads(raw_json) message = json_object['body'] (label, disp, message) = analyzer.analyze(message) print(Utilities.get_colored_text('head', '--- Labeling message {} (ID: {}) ---'.format(line, json_object['id']))) print(message + '\n') print('Guess: {}'.format(Utilities.get_colored_text(label))) choice = '?' while choice != '' and choice not in self.labels: choice = raw_input('Label (Enter to confirm, or {}): '.format(choices)) if choice == 'q': return text = self.labels[choice] if choice is not '' else Utilities.score_to_label(label) print('You entered: {}\n'.format(Utilities.get_colored_text(text, text))) json_object['label'] = text Utilities.write_json(self.dataset + '.labeled.json', json_object, ["id", "label"]) self.labeled_num_lines += 1
def analyze(self, message): score = 0 found = 0 disp = "" i = 0 # try: parts = Utilities.split(message) # except AttributeError as e: # print message #None for w in parts: if w in self.words: score += self.words[w] found += 1 if self.display: i = message.lower().find(w, i) d = Utilities.get_colored_text(self.words[w], message[i:i+len(w)]) message = message[:i] + d + message[i+len(w):] i = i + len(d) disp += d + " " label = score / float(found) if found != 0 else 0.0 return (label, disp, message)
def output(self, predictions): for i in xrange(len(predictions)): prediction = predictions[i] message = "" if self.display: message = "\t" + Utilities.get_colored_text(prediction, self.test_data[i]).replace('\n', ' ') g = "{}\t".format(self.test_group[i]) if self.group != "score" else "" print("{}{:.2f}{}".format(g, prediction, message))
def analyze(self, message): score = 0 found = 0 disp = "" i = 0 parts = Utilities.split(message) for w in parts: if w in self.words: score += self.words[w] found += 1 if self.display: i = message.lower().find(w, i) d = Utilities.get_colored_text(self.words[w], message[i:i+len(w)]) message = message[:i] + d + message[i+len(w):] i = i + len(d) disp += d + " " label = score / float(found) if found != 0 else 0.0 return (label, disp, message)