def getClusterForDocument(self, document):
     UtilityMethods.updatePhraseTextAndDimensionsMap(document, self.phraseTextAndDimensionMap, **self.clustering_settings)
     document.setSignatureUsingVectorPermutations(self.unitVector, self.vectorPermutations, self.phraseTextAndDimensionMap)
     predictedCluster = None
     possibleNearestNeighbors = reduce(lambda x,y:x.union(y), (permutation.getNearestDocuments(document) for permutation in self.signaturePermutations), set())
     if possibleNearestNeighbors: predictedCluster = max(((clusterId, self.clusters[clusterId].cosineSimilarity(document)) for clusterId in possibleNearestNeighbors), key=itemgetter(1))
     if predictedCluster and predictedCluster[1]>=self.thresholdForDocumentToBeInACluster:return predictedCluster[0]
 def getNearestDocument(self, document):
     UtilityMethods.updatePhraseTextAndDimensionsMap(document, self.phraseTextAndDimensionMap, **self.settings)
     document.setSignatureUsingVectorPermutations(self.unitVector, self.vectorPermutations, self.phraseTextAndDimensionMap)
     predictedNeighbor = None
     possibleNearestNeighbors = reduce(lambda x,y:x.union(y), (permutation.getNearestDocuments(document) for permutation in self.signaturePermutations), set())
     if possibleNearestNeighbors: predictedNeighbor = max(((docId, self.documentIdToDocumentMap[docId].cosineSimilarity(document)) for docId in possibleNearestNeighbors), key=itemgetter(1))
     print predictedNeighbor
     if predictedNeighbor and predictedNeighbor[1]>=self.nearestNeighborThreshold:return predictedNeighbor[0]
 def update(self, newDocument):
     UtilityMethods.updatePhraseTextAndDimensionsMap(newDocument, self.phraseTextAndDimensionMap, **self.settings)
     currentDocument = self.documentIdToDocumentMap.get(newDocument.docId, None)
     self.documentIdToDocumentMap[newDocument.docId] = newDocument
     newDocument.setSignatureUsingVectorPermutations(self.unitVector, self.vectorPermutations, self.phraseTextAndDimensionMap)
     for permutation in self.signaturePermutations: 
         if currentDocument!=None: permutation.removeDocument(currentDocument)
         permutation.addDocument(newDocument)
Esempio n. 4
0
 def test_updatePhraseTextAndDimensionsMap_PhraseMapHasLesserDimensions(
         self):
     settings['dimensions'] = 4
     UtilityMethods.updatePhraseTextAndDimensionsMap(
         self.phraseVector, self.phraseTextAndDimensionMap, **settings)
     self.assertEqual(
         self.finalPhraseToIdMap,
         self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))
Esempio n. 5
0
 def test_updatePhraseTextAndDimensionsMap_PhraseMapHasMaximumDimensions(
         self):
     UtilityMethods.updatePhraseTextAndDimensionsMap(
         self.phraseVector, self.phraseTextAndDimensionMap, **settings)
     for k in ['streams', 'highdimensional']:
         del self.finalPhraseToIdMap[k]
     self.assertEqual(
         self.finalPhraseToIdMap,
         self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))
 def update(self, newDocument):
     UtilityMethods.updatePhraseTextAndDimensionsMap(
         newDocument, self.phraseTextAndDimensionMap, **self.settings)
     currentDocument = self.documentIdToDocumentMap.get(
         newDocument.docId, None)
     self.documentIdToDocumentMap[newDocument.docId] = newDocument
     newDocument.setSignatureUsingVectorPermutations(
         self.unitVector, self.vectorPermutations,
         self.phraseTextAndDimensionMap)
     for permutation in self.signaturePermutations:
         if currentDocument != None:
             permutation.removeDocument(currentDocument)
         permutation.addDocument(newDocument)
Esempio n. 7
0
 def getClusterForDocument(self, document):
     UtilityMethods.updatePhraseTextAndDimensionsMap(
         document, self.phraseTextAndDimensionMap,
         **self.clustering_settings)
     document.setSignatureUsingVectorPermutations(
         self.unitVector, self.vectorPermutations,
         self.phraseTextAndDimensionMap)
     predictedCluster = None
     possibleNearestNeighbors = reduce(
         lambda x, y: x.union(y),
         (permutation.getNearestDocuments(document)
          for permutation in self.signaturePermutations), set())
     if possibleNearestNeighbors:
         predictedCluster = max(
             ((clusterId,
               self.clusters[clusterId].cosineSimilarity(document))
              for clusterId in possibleNearestNeighbors),
             key=itemgetter(1))
     if predictedCluster and predictedCluster[
             1] >= self.thresholdForDocumentToBeInACluster:
         return predictedCluster[0]
 def getNearestDocument(self, document):
     UtilityMethods.updatePhraseTextAndDimensionsMap(
         document, self.phraseTextAndDimensionMap, **self.settings)
     document.setSignatureUsingVectorPermutations(
         self.unitVector, self.vectorPermutations,
         self.phraseTextAndDimensionMap)
     predictedNeighbor = None
     possibleNearestNeighbors = reduce(
         lambda x, y: x.union(y),
         (permutation.getNearestDocuments(document)
          for permutation in self.signaturePermutations), set())
     if possibleNearestNeighbors:
         predictedNeighbor = max(((
             docId,
             self.documentIdToDocumentMap[docId].cosineSimilarity(document))
                                  for docId in possibleNearestNeighbors),
                                 key=itemgetter(1))
     print predictedNeighbor
     if predictedNeighbor and predictedNeighbor[
             1] >= self.nearestNeighborThreshold:
         return predictedNeighbor[0]
Esempio n. 9
0
 def test_updatePhraseTextAndDimensionsMap_PhraseMapHasMaximumDimensions(self):
     UtilityMethods.updatePhraseTextAndDimensionsMap(self.phraseVector, self.phraseTextAndDimensionMap, **settings)
     for k in ['streams', 'highdimensional']: del self.finalPhraseToIdMap[k]
     self.assertEqual(self.finalPhraseToIdMap, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))
Esempio n. 10
0
 def test_updatePhraseTextAndDimensionsMap_PhraseMapHasLesserDimensions(self):
     settings['dimensions'] = 4
     UtilityMethods.updatePhraseTextAndDimensionsMap(self.phraseVector, self.phraseTextAndDimensionMap, **settings)
     self.assertEqual(self.finalPhraseToIdMap, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))