Exemple #1
0
 def __init__(self, eps, minPts, filePath):
     self.filePath = filePath
     self.minPts = minPts
     self.label = 0
     self.pixelLabels = collections.defaultdict(list)
     self.visitedPixels = []
     self.mongoConnectInstance = mongoConnect.MongoDBConnector(
         "QuickDBScanDB")
Exemple #2
0
    def __init__(self, filePath):
        img = cv2.imread(filePath, cv2.IMREAD_COLOR)
        self.pixelList = []
        for i in range(len(img)):
            for j in range(len(img[0])):
                self.pixelList.append([
                    i, j, img[i][j]
                ])  #position x, position y, rgb code (numpy array)

        self.mongoConnectInstance = mongoConnect.MongoDBConnector(
            "QuickDBScanDB")
Exemple #3
0
    def __init__(self, eps, dataset):
        self.eps = eps
        self.dataset = dataset
        #clean collection

        #number of dimension
        self.no_dims = 2

        self.mongoConnectInstance = mongoConnect.MongoDBConnector(
            "QuickDBScanDB")
        self.mongoConnectInstance.dropCollection("kdTreeDBSCAN")
Exemple #4
0
    def __init__(self, eps, pivotChoosingStrategy):
        #int
        self.eps = eps

        #number of dimension
        self.no_dims = 2

        #pivot choosing strategy: 1 = corner, 2 = random
        self.pivotChoosingStrategy = pivotChoosingStrategy

        self.mongoConnectInstance = mongoConnect.MongoDBConnector(
            "QuickDBScanDB")
        #clean collection
        self.mongoConnectInstance.dropCollection("quickDBSCAN")
Exemple #5
0
 def createEpsChains(self):
     self.mongoConnectInstance = mongoConnect.MongoDBConnector(
         "QuickDBScanDB")
     results = self.kdIndex.query_ball_tree(self.kdIndex, self.eps)
     for idx in range(len(self.dataset)):
         for result in self.dataset[results[idx]]:
             self.upsertPixelValue('kdTreeDBSCAN', {
                 "$or": [{
                     "bucket": []
                 }, {
                     "bucket": [result[0], result[1]]
                 }]
             }, [[result[0], result[1]],
                 [self.dataset[idx][0], self.dataset[idx][1]]], False)
             self.upsertPixelValue(
                 'kdTreeDBSCAN', {
                     "$or": [{
                         "bucket": []
                     }, {
                         "bucket":
                         [self.dataset[idx][0], self.dataset[idx][1]]
                     }]
                 }, [[result[0], result[1]],
                     [self.dataset[idx][0], self.dataset[idx][1]]])
import numpy as np
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from nltk.corpus import stopwords
import mongoConnect
from nltk.tokenize import RegexpTokenizer

mongoDb = mongoConnect.MongoDBConnector('ARXIVRobert6Clusters')

stemmers = ["porterStemmedWords", "lancasterStemmedWords", "lemmatizedWords"]

stop_words = set(stopwords.words('english'))

for stemmer in stemmers:

	arxivRecordsCursor = mongoDb.getRecords("documents", {}, {"_id":1, stemmer: 1, "category": 1})

	docs = []

	for document in arxivRecordsCursor:
		documentWithoutStop = [w for w in document[stemmer] if not w in stop_words]
		docs.append(documentWithoutStop)

	taggedDocuments = [TaggedDocument(doc, [i]) for i, doc in enumerate(docs)]
	model = Doc2Vec(vector_size=16, window=3, min_count=2, workers=4, epochs=200)

	model.build_vocab(taggedDocuments)

	model.train(taggedDocuments, total_examples=len(taggedDocuments), epochs=200)

	X = []
Exemple #7
0
 def __init__(self, eps):
     #int
     self.eps = eps
     self.mongoConnectInstance = mongoConnect.MongoDBConnector(
         "QuickDBScanDB")
     self.allPairs = []