def tag_set_with_tags(mc_api_key, tag_sets_id, only_public_tags=False, use_file_cache=False): # don't need to cache here, because either you are reading from a file, or each page is cached local_mc = MediaCloud(mc_api_key) if use_file_cache: file_name = "tags_in_{}.json".format(tag_sets_id) file_path = os.path.join(static_tag_set_cache_dir, file_name) if os.path.isfile(file_path): return cached_tag_set_file(file_path) # more caching! tag_set = local_mc.tagSet(tag_sets_id) # page through tags more_tags = True all_tags = [] last_tags_id = 0 while more_tags: tags = _cached_tag_page(mc_api_key, tag_set['tag_sets_id'], last_tags_id, 100, only_public_tags) all_tags = all_tags + tags if len(tags) > 0: last_tags_id = tags[-1]['tags_id'] more_tags = len(tags) != 0 # double check the show_on_media because that controls public or not tag_list = [ t for t in all_tags if (only_public_tags is False) or ( t['show_on_media'] == 1 or t['show_on_media'] is True) ] # sort by label (or tag if no label exists) for t in tag_list: t['sort_key'] = t['label'].lower() if t['label'] else t['tag'].lower() tag_list = sorted(tag_list, key=itemgetter('sort_key')) for t in tag_list: del t['sort_key'] tag_set['tags'] = tag_list tag_set['name'] = tag_set['label'] return tag_set
# setup a connection to the DB try: db = MongoStoryDatabase(config.get('db','name'),config.get('db','host'),int(config.get('db','port'))) except pymongo.errors.ConnectionFailure, e: log.error(e) sys.exit() log.info("Connected to "+config.get('db','name')+" on "+config.get('db','host')+":"+str(config.get('db','port'))) # setup a connection to the geocoder worked = clavin.connect() if not worked: sys.exit() # setup the mediacloud connection mc = MediaCloud( config.get('api','user'), config.get('api','pass') ) # set up my callback function that adds the reading grade level to the story pub.subscribe(mcgeo.algorithms.addLocationsToStory, StoryDatabase.EVENT_PRE_STORY_SAVE) # save all the stories in the db (this will fire the callback above) saved = 0 first_page = int(config.get('api','first_page'))+1 for page in xrange(MAX_PAGES_TO_FETCH): query_page = first_page+(page+1) results = mc.allProcessed(query_page) log.info("Fetched "+str(len(results))+" stories (page "+str(query_page)+")") for story in results: worked = db.addStory(story) if worked: saved = saved + 1
def get_mc_client(): return MediaCloud(MC_API_KEY)