def build_tag_index(recent_media): """ Transforms recent_media into dictionaries of tags and edges (input) [list of media] (return) {'nodes':{}, 'edges':{}} """ nodes = {} edges = {} for media in recent_media[:10]: try: tags = media.tags except: tags = [] for tag in tags: tname = tag.name.encode('ascii', 'ignore') nodes[tname] = nodes.get(tname, 0) + 1 tags.remove(tag) tnames = [t.name.encode('ascii', 'ignore') for t in tags] if tname in edges: edges[tname] += tnames else: print "{1} new edges for {0}".format(tname, len(tnames)) edges[tag.name] = tnames for name, tags in edges.items(): edge_freq = vec_count(tags) edges[name] = edge_freq return {'nodes': nodes, 'edges': edges}
def tag_cooccurence(all_tags, min_count=3): """ creates a mapping of {tag: {neighbor: count}} <all_tags> = list of tags """ tag_map = {} tag_counts = get_tag_counts(all_tags, min_count) for tags in all_tags: for tag in list(set(tags)): tgcpy = tags[:] if tag in tag_counts: tgcpy.remove(tag) tag_map[tag] = tag_map.get(tag, []) + tgcpy for tag, neighbors in tag_map.iteritems(): tag_map[tag] = vec_count(neighbors) return {'counts': tag_counts, 'mapping': tag_map}