def _outputTablePrettyTranslate(self, page, p, table_format='github'): tt = None tt = TextTranslater('auto', 'en', 'us-east-1') for table_number, table in enumerate(page.tables): rows_list = list() for row in table.rows: one_row = list() for cell in row.cells: if cell.text != "": one_row = one_row + [tt.getTranslation(cell.text)] else: one_row = one_row + [cell.text] rows_list.append(one_row) pretty_table = tabulate(rows_list, tablefmt=table_format) FileHelper.writeToFile( "{}-page-{}-table-{}-tables-pretty-translated.txt".format( self.fileName, p, table_number), pretty_table)
def _outputFormTranslate(self, page, p): tt = None tt = TextTranslater('auto', 'en', 'us-east-1') csvData = [] for field in page.form.fields: csvItem = [] if (field.key): csvItem.append(tt.getTranslation(field.key.text)) csvItem.append(field.key.confidence) else: csvItem.append("") csvItem.append("") if (field.value): csvItem.append(tt.getTranslation(field.value.text)) csvItem.append(field.value.confidence) else: csvItem.append("") csvItem.append("") csvData.append(csvItem) csvFieldNames = ['Key', 'KeyConfidence', 'Value', 'ValueConfidence'] FileHelper.writeCSV("{}-page-{}-forms-translated.csv".format(self.fileName, p), csvFieldNames, csvData)
def generateInsights(self, insights, medicalInsights, translate, awsRegion): print("Generating insights...") if(not self.document.pages): return ta = TextAnalyzer('en', awsRegion) tma = TextMedicalAnalyzer(awsRegion) tt = None if(translate): tt = TextTranslater('auto', translate, awsRegion) p = 1 for page in self.document.pages: self._generateInsightsPerDocument(page, p, insights, medicalInsights, translate, ta, tma, tt) p = p + 1