Ejemplo n.º 1
0
def handle_comment_element(comment_element):
    """Handles comment element.

    Argument:

        comment_element: comment element of XML.
    """

    username = comment_element.find('username').text

    unixtime = comment_element.find('timestamp').text
    timestamp = utils.unix2ctime(unixtime)

    comment = comment_element.find('body').text

    return username, timestamp, comment
Ejemplo n.º 2
0
def get_metadata(string):
    """Get metadata of entry.

    Argument:

        string: title line string of hatena blog entry.
    """
    if string:
        """pattern a)

        timestamp: (\d*)
        category: (\[.*\])*
        title with uri converted: ( `.+ <.+>`_ )(.*)
        """
        pat_title_with_link = re.compile(
            '\*?(\d*)\*(\[.*\])*( `.+ <.+>`_ )(.*)',
            flags=re.U)

        """pattern b)

        timestamp: (\d*)
        category: (\[.*\])*
        title: (.*)
        """
        pat_title = re.compile(
            '\*?(\d*)\*(\[.*\])*(.*)', flags=re.U)

        if pat_title_with_link.search(string):
            # pattern a)
            timestamp, str_categories, linked_title, str_title = (
                pat_title_with_link.search(string).groups())

            title = convert_hyperlink(linked_title) + str_title

        elif pat_title.search(string):
            # pattern b)
            timestamp, str_categories, title = (
                pat_title.search(string).groups())

        return (utils.unix2ctime(timestamp, date_enabled=False),
                extract_categories(str_categories), title)