def _outputForm(self, page, p): csvData = [] for field in page.form.fields: csvItem = [] if (field.key): csvItem.append(field.key.text) csvItem.append(field.key.confidence) else: csvItem.append("") csvItem.append("") if (field.value): csvItem.append(field.value.text) csvItem.append(field.value.confidence) else: csvItem.append("") csvItem.append("") csvItem.append(field.boundingBox["Top"]) csvItem.append(field.boundingBox["Height"]) csvItem.append(field.boundingBox["Width"]) csvItem.append(field.boundingBox["Left"]) csvData.append(csvItem) csvFieldNames = [ 'Key', 'KeyConfidence', 'Value', 'ValueConfidence', "Top", "Height", "Width", "Left" ] FileHelper.writeCSV("{}-page-{}-forms.csv".format(self.fileName, p), csvFieldNames, csvData)
def _generateInsightsPerDocument(self, page, p, insights, medicalInsights, translate, ta, tma, tt): maxLen = 2000 text = page.text start = 0 sl = len(text) sentiment = [] syntax = [] entities = [] keyPhrases = [] medicalEntities = [] phi = [] translation = "" while(start < sl): end = start + maxLen if(end > sl): end = sl subText = text[start:end] if(insights): self._insights(start, text, sentiment, syntax, entities, keyPhrases, ta) if(medicalInsights): self._medicalInsights(start, text, medicalEntities, phi, tma) if(translate): translation = translation + tt.getTranslation(subText) + "\n" start = end if(insights): FileHelper.writeCSV("{}-page-{}-insights-sentiment.csv".format(self.fileName, p), ["Sentiment"], sentiment) FileHelper.writeCSV("{}-page-{}-insights-entities.csv".format(self.fileName, p), ["Type", "Text", "Score", "BeginOffset", "EndOffset"], entities) FileHelper.writeCSV("{}-page-{}-insights-syntax.csv".format(self.fileName, p), ["PartOfSpeech-Tag", "PartOfSpeech-Score", "Text", "BeginOffset", "EndOffset"], syntax) FileHelper.writeCSV("{}-page-{}-insights-keyPhrases.csv".format(self.fileName, p), ["Text", "Score", "BeginOffset", "EndOffset"], keyPhrases) if(medicalInsights): FileHelper.writeCSV("{}-page-{}-medical-insights-entities.csv".format(self.fileName, p), ["Text", "Type", "Category", "Score", "BeginOffset", "EndOffset"], medicalEntities) FileHelper.writeToFile("{}-page-{}-medical-insights-phi.json".format(self.fileName, p), json.dumps(phi)) if(translate): FileHelper.writeToFile("{}-page-{}-text-translation.txt".format(self.fileName, p), translation)
def _outputWords(self, page, p): csvData = [] for line in page.lines: for word in line.words: csvItem = [] csvItem.append(word.id) if(word.text): csvItem.append(word.text) else: csvItem.append("") csvData.append(csvItem) csvFieldNames = ['Word-Id', 'Word-Text'] FileHelper.writeCSV("{}-page-{}-words.csv".format(self.fileName, p), csvFieldNames, csvData)
def _outputForm(self, page, p): csvData = [] for field in page.form.fields: csvItem = [] if(field.key): csvItem.append(field.key.text) csvItem.append(field.key.confidence) else: csvItem.append("") csvItem.append("") if(field.value): csvItem.append(field.value.text) csvItem.append(field.value.confidence) else: csvItem.append("") csvItem.append("") csvData.append(csvItem) csvFieldNames = ['Key', 'KeyConfidence', 'Value', 'ValueConfidence'] FileHelper.writeCSV("{}-forms.csv".format(self.fileName), csvFieldNames, csvData)
def _outputFormTranslate(self, page, p): tt = None tt = TextTranslater('auto', 'en', 'us-east-1') csvData = [] for field in page.form.fields: csvItem = [] if (field.key): csvItem.append(tt.getTranslation(field.key.text)) csvItem.append(field.key.confidence) else: csvItem.append("") csvItem.append("") if (field.value): csvItem.append(tt.getTranslation(field.value.text)) csvItem.append(field.value.confidence) else: csvItem.append("") csvItem.append("") csvData.append(csvItem) csvFieldNames = ['Key', 'KeyConfidence', 'Value', 'ValueConfidence'] FileHelper.writeCSV("{}-page-{}-forms-translated.csv".format(self.fileName, p), csvFieldNames, csvData)