コード例 #1
0
    def _outputForm(self, page, p):
        csvData = []
        for field in page.form.fields:
            csvItem = []
            if (field.key):
                csvItem.append(field.key.text)
                csvItem.append(field.key.confidence)
            else:
                csvItem.append("")
                csvItem.append("")
            if (field.value):
                csvItem.append(field.value.text)
                csvItem.append(field.value.confidence)
            else:
                csvItem.append("")
                csvItem.append("")

            csvItem.append(field.boundingBox["Top"])
            csvItem.append(field.boundingBox["Height"])
            csvItem.append(field.boundingBox["Width"])
            csvItem.append(field.boundingBox["Left"])

            csvData.append(csvItem)
        csvFieldNames = [
            'Key', 'KeyConfidence', 'Value', 'ValueConfidence', "Top",
            "Height", "Width", "Left"
        ]
        FileHelper.writeCSV("{}-page-{}-forms.csv".format(self.fileName, p),
                            csvFieldNames, csvData)
コード例 #2
0
    def _generateInsightsPerDocument(self, page, p, insights, medicalInsights, translate, ta, tma, tt):

        maxLen = 2000

        text = page.text

        start = 0
        sl = len(text)

        sentiment = []
        syntax = []
        entities = []
        keyPhrases = []
        medicalEntities = []
        phi = []
        translation = ""

        while(start < sl):
            end = start + maxLen
            if(end > sl):
                end = sl

            subText = text[start:end]

            if(insights):
                self._insights(start, text, sentiment, syntax, entities, keyPhrases, ta)

            if(medicalInsights):
                self._medicalInsights(start, text, medicalEntities, phi, tma)

            if(translate):
                translation = translation + tt.getTranslation(subText) + "\n"

            start = end

        if(insights):
            FileHelper.writeCSV("{}-page-{}-insights-sentiment.csv".format(self.fileName, p),
                            ["Sentiment"], sentiment)
            FileHelper.writeCSV("{}-page-{}-insights-entities.csv".format(self.fileName, p),
                            ["Type", "Text", "Score", "BeginOffset", "EndOffset"], entities)
            FileHelper.writeCSV("{}-page-{}-insights-syntax.csv".format(self.fileName, p),
                            ["PartOfSpeech-Tag", "PartOfSpeech-Score", "Text", "BeginOffset", "EndOffset"], syntax)
            FileHelper.writeCSV("{}-page-{}-insights-keyPhrases.csv".format(self.fileName, p),
                            ["Text", "Score", "BeginOffset", "EndOffset"], keyPhrases)

        if(medicalInsights):
            FileHelper.writeCSV("{}-page-{}-medical-insights-entities.csv".format(self.fileName, p),
                            ["Text", "Type", "Category", "Score", "BeginOffset", "EndOffset"], medicalEntities)

            FileHelper.writeToFile("{}-page-{}-medical-insights-phi.json".format(self.fileName, p), json.dumps(phi))

        if(translate):
            FileHelper.writeToFile("{}-page-{}-text-translation.txt".format(self.fileName, p), translation)
コード例 #3
0
 def _outputWords(self, page, p):
     csvData = []
     for line in page.lines:
         for word in line.words:
             csvItem  = []
             csvItem.append(word.id)
             if(word.text):
                 csvItem.append(word.text)
             else:
                 csvItem.append("")
             csvData.append(csvItem)
     csvFieldNames = ['Word-Id', 'Word-Text']
     FileHelper.writeCSV("{}-page-{}-words.csv".format(self.fileName, p), csvFieldNames, csvData)
コード例 #4
0
ファイル: og.py プロジェクト: yoditgetahun/decevals
 def _outputForm(self, page, p):
     csvData = []
     for field in page.form.fields:
         csvItem  = []
         if(field.key):
             csvItem.append(field.key.text)
             csvItem.append(field.key.confidence)
         else:
             csvItem.append("")
             csvItem.append("")
         if(field.value):
             csvItem.append(field.value.text)
             csvItem.append(field.value.confidence)
         else:
             csvItem.append("")
             csvItem.append("")
         csvData.append(csvItem)
     csvFieldNames = ['Key', 'KeyConfidence', 'Value', 'ValueConfidence']
     FileHelper.writeCSV("{}-forms.csv".format(self.fileName), csvFieldNames, csvData)
コード例 #5
0
 def _outputFormTranslate(self, page, p):
     tt = None
     tt = TextTranslater('auto', 'en', 'us-east-1')
     csvData = []
     for field in page.form.fields:
         csvItem = []
         if (field.key):
             csvItem.append(tt.getTranslation(field.key.text))
             csvItem.append(field.key.confidence)
         else:
             csvItem.append("")
             csvItem.append("")
         if (field.value):
             csvItem.append(tt.getTranslation(field.value.text))
             csvItem.append(field.value.confidence)
         else:
             csvItem.append("")
             csvItem.append("")
         csvData.append(csvItem)
     csvFieldNames = ['Key', 'KeyConfidence', 'Value', 'ValueConfidence']
     FileHelper.writeCSV("{}-page-{}-forms-translated.csv".format(self.fileName, p),
                         csvFieldNames, csvData)