def __init__(self, eps, minPts, filePath): self.filePath = filePath self.minPts = minPts self.label = 0 self.pixelLabels = collections.defaultdict(list) self.visitedPixels = [] self.mongoConnectInstance = mongoConnect.MongoDBConnector( "QuickDBScanDB")
def __init__(self, filePath): img = cv2.imread(filePath, cv2.IMREAD_COLOR) self.pixelList = [] for i in range(len(img)): for j in range(len(img[0])): self.pixelList.append([ i, j, img[i][j] ]) #position x, position y, rgb code (numpy array) self.mongoConnectInstance = mongoConnect.MongoDBConnector( "QuickDBScanDB")
def __init__(self, eps, dataset): self.eps = eps self.dataset = dataset #clean collection #number of dimension self.no_dims = 2 self.mongoConnectInstance = mongoConnect.MongoDBConnector( "QuickDBScanDB") self.mongoConnectInstance.dropCollection("kdTreeDBSCAN")
def __init__(self, eps, pivotChoosingStrategy): #int self.eps = eps #number of dimension self.no_dims = 2 #pivot choosing strategy: 1 = corner, 2 = random self.pivotChoosingStrategy = pivotChoosingStrategy self.mongoConnectInstance = mongoConnect.MongoDBConnector( "QuickDBScanDB") #clean collection self.mongoConnectInstance.dropCollection("quickDBSCAN")
def createEpsChains(self): self.mongoConnectInstance = mongoConnect.MongoDBConnector( "QuickDBScanDB") results = self.kdIndex.query_ball_tree(self.kdIndex, self.eps) for idx in range(len(self.dataset)): for result in self.dataset[results[idx]]: self.upsertPixelValue('kdTreeDBSCAN', { "$or": [{ "bucket": [] }, { "bucket": [result[0], result[1]] }] }, [[result[0], result[1]], [self.dataset[idx][0], self.dataset[idx][1]]], False) self.upsertPixelValue( 'kdTreeDBSCAN', { "$or": [{ "bucket": [] }, { "bucket": [self.dataset[idx][0], self.dataset[idx][1]] }] }, [[result[0], result[1]], [self.dataset[idx][0], self.dataset[idx][1]]])
import numpy as np from gensim.models.doc2vec import Doc2Vec, TaggedDocument from nltk.corpus import stopwords import mongoConnect from nltk.tokenize import RegexpTokenizer mongoDb = mongoConnect.MongoDBConnector('ARXIVRobert6Clusters') stemmers = ["porterStemmedWords", "lancasterStemmedWords", "lemmatizedWords"] stop_words = set(stopwords.words('english')) for stemmer in stemmers: arxivRecordsCursor = mongoDb.getRecords("documents", {}, {"_id":1, stemmer: 1, "category": 1}) docs = [] for document in arxivRecordsCursor: documentWithoutStop = [w for w in document[stemmer] if not w in stop_words] docs.append(documentWithoutStop) taggedDocuments = [TaggedDocument(doc, [i]) for i, doc in enumerate(docs)] model = Doc2Vec(vector_size=16, window=3, min_count=2, workers=4, epochs=200) model.build_vocab(taggedDocuments) model.train(taggedDocuments, total_examples=len(taggedDocuments), epochs=200) X = []
def __init__(self, eps): #int self.eps = eps self.mongoConnectInstance = mongoConnect.MongoDBConnector( "QuickDBScanDB") self.allPairs = []