def test_naivebayes_compare(self): basepath = '../resource/' naivebayes = NaiveBayes() json_data = Serializer.load_json(os.path.join(basepath, 'ocr.json')) naivebayes.human_labels = json_data['translate']['country'] x_list = ['ネツァワル王国', 'カセドリア連合王国', 'ゲブランド帝国', 'ホルデイン王国', 'エルソード王国'] print(json_data['translate']['country']) out = naivebayes.predict_all(x_list) for i, y in enumerate(out): if x_list[i] != y: raise Exception('compare x:{0},predict:{1}'.format( x_list[i], y))
class Ranking(object): def __init__(self, config): self.ocr = OCREngine() self.naivebayes = NaiveBayes() self.naivebayes.human_labels = self.ocr.settings['translate'][ 'country'] def create_TemporyFile(self, buffer, verbose=False): """ @param {io.BytesIO}buffer {bool}verbose @return {string}create file tempory file """ temp_file_name = '' with tempfile.NamedTemporaryFile(delete=False) as temp: temp.write(buffer.getvalue()) temp_file_name = temp.name if verbose: logger.info(temp_file_name) return temp_file_name def getResult(self, src, save_image=False): """ @param {string} src {bool}save_image output debug image @return {OCRDocument} doucument """ pro = DataProcessor(src, ImageType.RAW, save_image=save_image) if pro.prepare() is None: logger.error('image error:{0}'.format(src)) return None buffer = pro.tobinary(pro.batch()) temp_file_name = self.create_TemporyFile(buffer, True) document = self.ocr.recognize(temp_file_name) os.remove(temp_file_name) output = '#' + datetime.now().strftime('%F %T.%f')[:-3] + '\n' output += '\n'.join(document.names()) + '\n' with Serializer.open_stream('../temp/corpus.txt', mode='a') as file: file.write(output) # ocr corpus data -> NaiveBayes classifier # ranking name swap change = self.naivebayes.predict_all(document.names()) #doucument.changeNames(change) document.dump() return document