Ejemplo n.º 1
0
 def __init__(self, id, system, workerArgs = None):
     DatabaseHandler.__init__(self)
     self.changesLock = Lock()
     #this is incremented for SQL Updates used to transfer items from cluster to cluster.
     self.changes = 0
     #this is incremented for each SQL Select emited to determine where an item is most pulled. 
     self.i = 0
     self.itemExchangeLoopClosingFailures = 0 
     IndependentWorker.__init__(self, id, system)
Ejemplo n.º 2
0
 def __init__(self, id="main", master=None):
     DatabaseHandler.__init__(self)
     Thread.__init__(self)
     self._id = id
     if master is None :
         self._itemList = self._getItems()
         self._items = len(self._itemList)
         self._itemsParsedLock = Lock()
         self._itemsParsed = 0
         self.main = self
         self._power = True
         self._sync = False
         self._report = False
     else:
         self.main = master
         self._power = False
Ejemplo n.º 3
0
    def __init__(self):
		start = time.time()
		DatabaseHandler.__init__(self)
		#init SQL Functions:
		self.executeSQL('''
         CREATE OR REPLACE FUNCTION public.measure_density(item_key INT4, cluster_key INT4)
            RETURNS FLOAT8 AS $$
         SELECT GREATEST(SUM(similarity), 0.0000000000000001) AS sum
         FROM %s AS a, public.itemclusters AS b
         WHERE $1 = a.tail AND a.head = b.item_key AND b.cluster_key = $2
         $$ LANGUAGE 'SQL';
      ''' % (ITEMLINKS_TABLE,))
		self.executeSQL('''
        CREATE OR REPLACE FUNCTION public.get_clustering_statistics()
                RETURNS TABLE(count INT4, maxsum FLOAT8, maxcount INT2, sum FLOAT8) AS $$
        SELECT COUNT(*)::INT4, MAX(sum), MAX(count)::INT2, SUM(sum)
        FROM    (
                SELECT cluster_key, COUNT(*), SUM(density)
                FROM public.itemclusters
                GROUP BY cluster_key
                ORDER BY sum DESC
                ) AS foo
        WHERE sum > 50
        ; $$ LANGUAGE 'SQL';
		''')
		self.executeSQL('''
        CREATE OR REPLACE FUNCTION public.measure_density_for_transfer(affected_item INT4, transfered_item INT4)
            RETURNS FLOAT8 AS $$
        SELECT GREATEST(SUM(similarity), 0.000001) AS sum
		FROM 	(
			SELECT cluster_key
			FROM public.itemclusters
			WHERE item_key = $1
			) AS a, %s AS b, public.itemclusters AS c
		WHERE $1 = b.tail AND b.head = c.item_key AND c.item_key != $2 AND c.cluster_key = a.cluster_key
		; $$ LANGUAGE 'SQL';
		''' % (ITEMLINKS_TABLE,))
		self.executeSQL('''
        CREATE OR REPLACE FUNCTION public.transfer_item_to_cluster(item INT4, new_cluster INT4)
            RETURNS VOID  AS $$
        --Update density of all items in previous_cluster itemlinking the item:
        UPDATE public.itemclusters
        SET density = public.measure_density_for_transfer(item_key, $1)
        FROM 	(
                SELECT cluster_key AS previous_cluster
                FROM public.itemclusters
                WHERE item_key = $1
                ) AS a, 
                (
				SELECT head AS item
				FROM %s
				WHERE $1 = tail
                ) AS b
        WHERE b.item = item_key AND cluster_key = a.previous_cluster;
        --transfer the item to new_cluster:
        UPDATE public.itemclusters
        SET cluster_key = $2, density = public.measure_density($1, $2)
        WHERE item_key = $1;
        --Update item_density of all items in the new_cluster itemlinking the item:
        UPDATE public.itemclusters
        SET density = public.measure_density(item_key, $2)
        FROM 	(
				SELECT head AS item
				FROM %s
				WHERE $1 = tail
				) AS a
        WHERE a.item = item_key AND cluster_key = $2
        ; $$ LANGUAGE 'SQL';
		''' % (ITEMLINKS_TABLE, ITEMLINKS_TABLE))
		self.executeSQL('''
        CREATE OR REPLACE FUNCTION public.get_badly_clustered_item_from_cluster(cluster INT4)
            RETURNS INT4 AS $$
        SELECT item_key
        FROM public.itemclusters
        WHERE cluster_key = $1
        ORDER BY density ASC 
        LIMIT 1
        ; $$ LANGUAGE 'SQL';
        ''')
        #get clusters:
		clusters = self.executeSQL("SELECT DISTINCT cluster_key FROM public.itemclusters", action=self.FETCH_ALL)
		self.clusters = {}
		self.clusterList = []
		for (cluster_key,) in clusters:
			self.clusters[cluster_key] = Cluster(cluster_key)
			self.clusterList.append(cluster_key)
		self.num_clusters = len(self.clusters)
		self.taskLock = Lock()
		self.nextTask = 1
		#for real time (periodic online) statistics:
		self.taskings = ZERO; self.changes = ZERO; self.tries = ZERO;
		self.loopFailures = 0;
		self.lastTimeCheck = time.time()
		self.lastSync = time.time()
		#init workers:
		InvisibleHand.__init__(self, ClusteringWorker)
		print "system initialized in", time.time() - start, "secs"