def inputAnalysis(self, corpus): self.resetWidget() self.corpus = corpus OWWidget.progressBarInit(self) duplicateRefStartEndsArray = list(self.EMPTYLIST) if self.corpus is None: self.label.setText("No corpus available") else: text = self.EMPTYSTRING self.fieldIdDate = self.getFieldId(self.corpus, self.FIELDNAMEDATE) self.fieldIdText = self.getFieldId(self.corpus, self.FIELDNAMETEXT) self.fieldIdExtra = self.getFieldId(self.corpus, self.FIELDNAMEEXTRA) for msgId in range(0, len(self.corpus.metas)): date = datetime.datetime.fromtimestamp( self.corpus.metas[msgId][self.fieldIdDate], tz=datetime.timezone.utc) text = self.prepareText( str(self.corpus.metas[msgId][self.fieldIdText])) duplicateRefStartEnds = self.countPhrases(date, text, msgId) duplicateRefStartEndsArray.append( [list(duplicateRefStartEnds)]) self.label.setText(str(duplicateRefStartEnds)) self.corpus.metas[msgId][self.fieldIdExtra] = list( duplicateRefStartEnds) self.corpus.metas[msgId][ self.fieldIdText] = self.markDuplicates( text, duplicateRefStartEnds) OWWidget.progressBarSet( self, 100 * (msgId + 1) / len(self.corpus.metas)) # np.append(self.corpus.metas,np.array(duplicateRefStartEndsArray),axis=1) self.Outputs.corpus.send(self.corpus)
def inputAnalysis(self, corpus): self.resetWidget() self.corpus = corpus OWWidget.progressBarInit(self) if self.corpus is None: self.label.setText("No corpus available") else: text = self.EMPTYSTRING self.fieldIdExtra = self.getFieldId(self.corpus, self.FIELDNAMEEXTRA) self.fieldIdText = self.getFieldId(self.corpus, self.FIELDNAMETEXT) for msgId in range(0, len(self.corpus.metas)): text = self.prepareText( str(self.corpus.metas[msgId][self.fieldIdText])) averageWeights = self.daap(text) averageWeights = np.mean(averageWeights) OWWidget.progressBarSet(self, len(self.corpus)) owfeatureconstructor.OWFeatureConstructor() self.corpus.metas[msgId][self.fieldIdExtra] = averageWeights self.Outputs.corpus.send(self.corpus)