def parse_item(item_soup, stem_mapping): """Get information about a single HI episode. @param item_soup: Soup containing information about a single HI episode. @type item_soup: bs4.BeautifulSoup @return: Dictionary describing the episode. Contains keys name (str value), date (datetime.date), loc (url - str value), duration (seconds - int), and orig_tags (tags applied to episode - list of str) @rtype: dict """ item_date = common.interpret_2822_date( item_soup.find('pubdate').contents[0] ) duration_soup = item_soup.find('itunes:duration') if duration_soup: duration = common.interpret_duration( item_soup.find('itunes:duration').contents[0] ) else: duration = 7200 title = item_soup.find('title').contents[0] return { 'name': title, 'date': item_date, 'loc': '', 'duration': duration, 'orig_tags': get_item_tags(title, item_soup, stem_mapping) }
def process_item(item_soup): """Parse information about a single podcast episode. @param item_soup: Soup containing information about a single podcast episode. @type item_soup: bs4.BeautifulSoup @return: Dictionary describing the episode. Contains keys name (str value), date (datetime.date), loc (url - str value), duration (seconds - int), and orig_tags (tags applied to episode - list of str) @rtype: dict """ title = item_soup.find('title').contents[0].strip() loc = item_soup.find('guid').contents[0] pub_date_raw = item_soup.find('pubdate').contents[0] pub_date = common.interpret_2822_date(pub_date_raw) tags = map( lambda x: x.contents[0], item_soup.findAll('category') ) duration_soup = item_soup.find('itunes:duration') if duration_soup == None: duration = 1800 if 'shorts' in tags else 3600 else: duration_str = duration_soup.contents[0] duration = common.interpret_duration(duration_str) return { 'name': title, 'date': pub_date, 'tags': sorted(set(tags)), 'loc': loc, 'duration': duration }