def parse_item(item_soup, stem_mapping):
    """Get information about a single HI episode.

    @param item_soup: Soup containing information about a single HI episode.
    @type item_soup: bs4.BeautifulSoup
    @return: Dictionary describing the episode. Contains keys name (str value),
        date (datetime.date), loc (url - str value), duration (seconds - int),
        and orig_tags (tags applied to episode - list of str)
    @rtype: dict
    """
    item_date = common.interpret_2822_date(
        item_soup.find('pubdate').contents[0]
    )
    
    duration_soup = item_soup.find('itunes:duration')
    if duration_soup:
        duration = common.interpret_duration(
            item_soup.find('itunes:duration').contents[0]
        )
    else:
        duration = 7200

    title = item_soup.find('title').contents[0]
    
    return {
        'name': title,
        'date': item_date,
        'loc': '',
        'duration': duration,
        'orig_tags': get_item_tags(title, item_soup, stem_mapping)
    }
def parse_episode_page(loc, contents):
    """Parse a page describing a single podcast episode.

    @param loc: The URL of this page.
    @type loc: basestring
    @param contents: The raw HTML contents of the episode page from which
        episode information should be parsed.
    @type contents: basestring
    @return: Dictionary describing the episode. Contains keys name (str value),
        date (datetime.date), loc (url - str value), duration (seconds - int),
        and orig_tags (tags applied to episode - list of str)
    @rtype: dict
    """
    soup = bs4.BeautifulSoup(contents)
    header = soup.find(class_='centerPosts')
    title = header.find('strong').contents[0]

    date_str = soup.find(class_='pdateS').find('em').contents[0]
    date_components = date_str.replace(',', ' ').split(' ')
    
    year = int(date_components[2])
    month = common.MONTH_ABBRV[date_components[0]]
    day = int(date_components[1])
    episode_date = datetime.date(year, month, day)

    tags = sorted(set(map(
        lambda x: x.contents[0], soup.findAll('a', rel='tag')
    )))

    duration_str = soup.find(class_='podpress_mediafile_dursize').contents[0]
    duration_str_clean = duration_str.replace('[ ', '').replace(' ]', '')
    duration = common.interpret_duration(duration_str_clean)

    return {
        'title': title,
        'date': episode_date,
        'tags': tags,
        'loc': loc,
        'duration': duration
    }
def process_item(item_soup):
    """Parse information about a single podcast episode.

    @param item_soup: Soup containing information about a single podcast
        episode.
    @type item_soup: bs4.BeautifulSoup
    @return: Dictionary describing the episode. Contains keys name (str value),
        date (datetime.date), loc (url - str value), duration (seconds - int),
        and orig_tags (tags applied to episode - list of str)
    @rtype: dict
    """
    title = item_soup.find('title').contents[0].strip()
    loc = item_soup.find('guid').contents[0]

    pub_date_raw = item_soup.find('pubdate').contents[0]
    pub_date = common.interpret_2822_date(pub_date_raw)

    tags = map(
        lambda x: x.contents[0],
        item_soup.findAll('category')
    )

    duration_soup = item_soup.find('itunes:duration')
    if duration_soup == None:
        duration = 1800 if 'shorts' in tags else 3600
    else:
        duration_str = duration_soup.contents[0]
        duration = common.interpret_duration(duration_str)

    return {
        'name': title,
        'date': pub_date,
        'tags': sorted(set(tags)),
        'loc': loc,
        'duration': duration
    }