def __init__(self, id, system, workerArgs = None): DatabaseHandler.__init__(self) self.changesLock = Lock() #this is incremented for SQL Updates used to transfer items from cluster to cluster. self.changes = 0 #this is incremented for each SQL Select emited to determine where an item is most pulled. self.i = 0 self.itemExchangeLoopClosingFailures = 0 IndependentWorker.__init__(self, id, system)
def __init__(self, id="main", master=None): DatabaseHandler.__init__(self) Thread.__init__(self) self._id = id if master is None : self._itemList = self._getItems() self._items = len(self._itemList) self._itemsParsedLock = Lock() self._itemsParsed = 0 self.main = self self._power = True self._sync = False self._report = False else: self.main = master self._power = False
def __init__(self): start = time.time() DatabaseHandler.__init__(self) #init SQL Functions: self.executeSQL(''' CREATE OR REPLACE FUNCTION public.measure_density(item_key INT4, cluster_key INT4) RETURNS FLOAT8 AS $$ SELECT GREATEST(SUM(similarity), 0.0000000000000001) AS sum FROM %s AS a, public.itemclusters AS b WHERE $1 = a.tail AND a.head = b.item_key AND b.cluster_key = $2 $$ LANGUAGE 'SQL'; ''' % (ITEMLINKS_TABLE,)) self.executeSQL(''' CREATE OR REPLACE FUNCTION public.get_clustering_statistics() RETURNS TABLE(count INT4, maxsum FLOAT8, maxcount INT2, sum FLOAT8) AS $$ SELECT COUNT(*)::INT4, MAX(sum), MAX(count)::INT2, SUM(sum) FROM ( SELECT cluster_key, COUNT(*), SUM(density) FROM public.itemclusters GROUP BY cluster_key ORDER BY sum DESC ) AS foo WHERE sum > 50 ; $$ LANGUAGE 'SQL'; ''') self.executeSQL(''' CREATE OR REPLACE FUNCTION public.measure_density_for_transfer(affected_item INT4, transfered_item INT4) RETURNS FLOAT8 AS $$ SELECT GREATEST(SUM(similarity), 0.000001) AS sum FROM ( SELECT cluster_key FROM public.itemclusters WHERE item_key = $1 ) AS a, %s AS b, public.itemclusters AS c WHERE $1 = b.tail AND b.head = c.item_key AND c.item_key != $2 AND c.cluster_key = a.cluster_key ; $$ LANGUAGE 'SQL'; ''' % (ITEMLINKS_TABLE,)) self.executeSQL(''' CREATE OR REPLACE FUNCTION public.transfer_item_to_cluster(item INT4, new_cluster INT4) RETURNS VOID AS $$ --Update density of all items in previous_cluster itemlinking the item: UPDATE public.itemclusters SET density = public.measure_density_for_transfer(item_key, $1) FROM ( SELECT cluster_key AS previous_cluster FROM public.itemclusters WHERE item_key = $1 ) AS a, ( SELECT head AS item FROM %s WHERE $1 = tail ) AS b WHERE b.item = item_key AND cluster_key = a.previous_cluster; --transfer the item to new_cluster: UPDATE public.itemclusters SET cluster_key = $2, density = public.measure_density($1, $2) WHERE item_key = $1; --Update item_density of all items in the new_cluster itemlinking the item: UPDATE public.itemclusters SET density = public.measure_density(item_key, $2) FROM ( SELECT head AS item FROM %s WHERE $1 = tail ) AS a WHERE a.item = item_key AND cluster_key = $2 ; $$ LANGUAGE 'SQL'; ''' % (ITEMLINKS_TABLE, ITEMLINKS_TABLE)) self.executeSQL(''' CREATE OR REPLACE FUNCTION public.get_badly_clustered_item_from_cluster(cluster INT4) RETURNS INT4 AS $$ SELECT item_key FROM public.itemclusters WHERE cluster_key = $1 ORDER BY density ASC LIMIT 1 ; $$ LANGUAGE 'SQL'; ''') #get clusters: clusters = self.executeSQL("SELECT DISTINCT cluster_key FROM public.itemclusters", action=self.FETCH_ALL) self.clusters = {} self.clusterList = [] for (cluster_key,) in clusters: self.clusters[cluster_key] = Cluster(cluster_key) self.clusterList.append(cluster_key) self.num_clusters = len(self.clusters) self.taskLock = Lock() self.nextTask = 1 #for real time (periodic online) statistics: self.taskings = ZERO; self.changes = ZERO; self.tries = ZERO; self.loopFailures = 0; self.lastTimeCheck = time.time() self.lastSync = time.time() #init workers: InvisibleHand.__init__(self, ClusteringWorker) print "system initialized in", time.time() - start, "secs"