def parse_new_items(soup, existing_content_by_name): """Prase all podcast episodes from the RSS feed. @param soup: Soup with all podcast episodes. @type soup: bs4.BeautifulSoup @param existing_content_by_name: Collection with the names of HI epsiodes already parsed. @type existing_content_by_name: Collection of str """ items_soup = soup.findAll('item') item_soups_with_name = map( lambda x: (x.find('title').contents[0], x), items_soup ) new_item_soups_with_name = filter( lambda (name, soup): not name in existing_content_by_name, item_soups_with_name ) new_item_soups = map(lambda x: x[1], new_item_soups_with_name) stem_mapping = {} new_items = map( lambda x: parse_item(x, stem_mapping), new_item_soups ) common.consolidate_tags(new_items, stem_mapping, 1) return new_items
def main(): """Driver for the 99 Percent Invisible parser.""" if len(sys.argv) != 2: print USAGE_STR return raw_tracks_info = load_tracks() stem_mapping = {} processed_tracks = map( lambda x: process_track(x, stem_mapping), raw_tracks_info ) common.consolidate_tags(processed_tracks, stem_mapping, 2) with open(sys.argv[1], 'w') as f: f.write(common.DateJSONEncoder().encode({ 'episodes': processed_tracks }))