コード例 #1
0
ファイル: flickr.py プロジェクト: infinity0/tag-routing
	def pruneProducers(self, socgr, gumap, pddb, cutoff=1):
		"""
		Removes producers with less than the given number of photos.

		@param socgr: graph of users
		@param groups: list of groups
		@param pddb: an open database of {producer:[photo]}
		@param cutoff: producers with this many photos or less will be pruned
		       (default 1)
		"""
		# TODO NORM maybe also prune groups with >n users

		#FIXME HIGH if we prune users, then we also need to prune groups that
		#point to this user
		delu = []
		#for u in socgr.vs[NID]:
		#	if u in pddb:
		#		if len(pddb[u]) > cutoff:
		#			continue
		#		del pddb[u]
		#	delu.append(u)

		delg = []
		for g in gumap:
			if g in pddb:
				if len(pddb[g]) > cutoff:
					continue
				del pddb[g]
			delg.append(g)

		#socgr.delete_vertices([v.index for v in socgr.vs.select(id_in=set(delu))])
		for g in delg: del gumap[g]

		LOG.info("producer db: pruned %s users, %s groups" % (len(delu), len(delg)))
コード例 #2
0
ファイル: flickr.py プロジェクト: infinity0/tag-routing
	def commitUserPhotos(self, users, pddb):
		"""
		Gets the photos of the given users and saves these to a database

		@param users: a list of user ids
		@param pddb: an open database of {producer:[photo]}
		"""
		if type(users) != set and len(users) > 16: users = set(users) # efficient membership test
		def run(nsid):
			# OPT HIGH decide whether we want this many, or whether "faves" only will do
			stream = list(self.data_walker(self.people_getPublicPhotos, user_id=nsid, per_page=500))
			faves = list(p for p in self.data_walker(self.favorites_getPublicList, user_id=nsid, per_page=500) if p.get("owner") in users)
			return stream, faves

		def post(nsid, i, (stream, faves)):
			photos = [p.get(NID) for p in chain(stream, faves)]
			if len(photos) >= 4096:
				LOG.info("producer db (user): got %s photos for user %s" % (len(photos), nsid))
			pddb[nsid] = photos
コード例 #3
0
ファイル: flickr.py プロジェクト: infinity0/tag-routing
	def scrapeIDs(self, seed, size):

		if type(size) != int:
			raise TypeError

		def next(ss, qq):
			id = qq.popleft()
			if id in ss: return None
			node = self.makeID(id)
			qq.extend(node.out.keys())
			ss.add_node(node)
			return id

		s = NodeSample()
		q = deque([self.getNSID(seed)])

		while len(s) < size:
			id = next(s, q)
			if id is not None:
				LOG.info("id sample: %s/%s (added %s)" % (len(s), size, id))

		s.build()
		return s
コード例 #4
0
ファイル: flickr.py プロジェクト: infinity0/tag-routing
		def post(gid, i, photos):
			if len(photos) >= 4096:
				LOG.info("producer db (group): got %s photos for group %s" % (len(photos), gid))
			pddb[gid] = [p.get(NID) for p in photos]
コード例 #5
0
ファイル: flickr.py プロジェクト: infinity0/tag-routing
		"""
		if vkdb.writeback is not True:
			raise ValueError("[vkdb] must have writeback=True")

		def syncer(i, (key, items)):
			vkdb.sync()

		for i, (key, items) in enumerate_cb(kvdb.iteritems(), syncer, every=0x10000):
			for item in items:
				if item in vkdb:
					vkdb[item].append(key)
				else:
					vkdb[item] = [key]
		vkdb.sync()

		LOG.info("%s db: inverted %s keys to %s items" % (name, len(kvdb), len(vkdb)))


	def commitTagClusters(self, tags, tcdb):
		"""
		Gets the clusters of all the given tags and saves these to a database

		@param tags: a list of tags
		@param tcdb: an open database of {tag:[cluster]}
		"""
		def run(tag):
			try:
				# FIXME HIGH verify that this does the right thing for unicode tags
				# atm all evidence points to flickr not doing clustering anaylses for them...
				clusters = self.tags_getClusters(tag=tag).getchildren()[0].getchildren()
			except FlickrError, e: