Exemple #1
0
	def distribute_to_nodes(self, crawler_queue):

		qsizes = self.node_qsizes()		

		while (crawler_queue.get(timeout=60)):

			node_id = get_keys_by_min_value(qsizes)[0]

			node = self.get_node(node_id)			

			node.put(cmd)
			qsizes[node_id] += 1
    def distribute_to_nodes(self, crawler_queue):

        qsizes = self.node_qsizes()

        cmd = crawler_queue.get(timeout=60)
        while (cmd):
            node_id = get_keys_by_min_value(qsizes)[0]

            node = self.get_node(node_id)

            node.put(cmd)
            qsizes[node_id] += 1

            cmd = crawler_queue.get(timeout=60)
Exemple #3
0
        def split(lst, n):
            lsize = {}
            results = {}
            for i in range(n):
                lsize[i] = 0
                results[i] = []

            for x in lst:
                idx = get_keys_by_min_value(lsize)[0]
                results[idx].append(x)
                lsize[idx] += 1

            for i in range(n):
                yield results[i]
Exemple #4
0
    def split(self, lst, n):
        """ Yield successive n chunks of even sized sub-lists from lst."""
        lsize = {}
        results = {}
        for i in range(n):
            lsize[i] = 0
            results[i] = []

        for x in lst:
            idx = get_keys_by_min_value(lsize)[0]
            results[idx].append(x)
            lsize[idx] += 1

        for i in range(n):
            yield results[i]
		def split(lst, n):
			lsize = {}
			results = {}
			for i in range(n):
				lsize[i] = 0
				results[i] = []

			
			for x in lst:
				idx = get_keys_by_min_value(lsize)[0]
				results[idx].append(x)
				lsize[idx] += 1

			for i in range(n):
				yield results[i]
Exemple #6
0
	def split(self, lst, n):
		""" Yield successive n chunks of even sized sub-lists from lst."""
		lsize = {}
		results = {}
		for i in range(n):
			lsize[i] = 0
			results[i] = []

		
		for x in lst:
			idx = get_keys_by_min_value(lsize)[0]
			results[idx].append(x)
			lsize[idx] += 1

		for i in range(n):
			yield results[i]
def flush_cmd(bulk, data_type, template, redis_config):

	try:
		node_coordinator = NodeCoordinator(redis_config=redis_config)

		qsizes = node_coordinator.node_qsizes()

		logger.debug(qsizes)
		
		node_queues = {}

		for element in bulk:
			if data_type == "ids" and type(element) == int:
				user_id = element
			elif data_type =="users" and type(element) == dict and "id" in element:
				user_id = element['id']
			
			t = copy.copy(template)
			t["user_id"] = int(user_id)
			t["depth"] = int(t["depth"]) -1

			node_id = get_keys_by_min_value(qsizes)[0]

			if (node_id in node_queues):
				node_queue = node_queues[node_id]
			else:
				node_queue = NodeQueue(node_id, redis_config=redis_config)
				node_queues[node_id] = node_queue


			t['cmd_hash'] = hash_cmd(t)
			node_queue.put(t)
			qsizes[node_id] += 1

			logger.debug("send [%s] to node: %s"%(json.dumps(t),node_id))

		# intend to close all redis connections, but not sure yet...
		node_queues.clear()

		del node_coordinator

			
	except Exception as exc:
		logger.error('error during flush: %s'%exc)

	return True
def flush_cmd(bulk, data_type, template, redis_config):

    try:
        node_coordinator = NodeCoordinator(redis_config=redis_config)

        qsizes = node_coordinator.node_qsizes()

        logger.debug(qsizes)

        node_queues = {}

        for element in bulk:
            if data_type == "ids" and type(element) == int:
                user_id = element
            elif data_type == "users" and type(
                    element) == dict and "id" in element:
                user_id = element['id']

            t = copy.copy(template)
            t["user_id"] = int(user_id)
            t["depth"] = int(t["depth"]) - 1

            node_id = get_keys_by_min_value(qsizes)[0]

            if (node_id in node_queues):
                node_queue = node_queues[node_id]
            else:
                node_queue = NodeQueue(node_id, redis_config=redis_config)
                node_queues[node_id] = node_queue

            t['cmd_hash'] = hash_cmd(t)
            node_queue.put(t)
            qsizes[node_id] += 1

            logger.debug("send [%s] to node: %s" % (json.dumps(t), node_id))

        # intend to close all redis connections, but not sure yet...
        node_queues.clear()

        del node_coordinator

    except Exception as exc:
        logger.error('error during flush: %s' % exc)

    return True