Example #1
0
def process(thread):
    for url in pages(thread):
        stanza_template=Template(u"""
        {*
        <table id="firstAuthor">
		    <tr>
                <td>
                    <a>${author}</a> &nbsp;发表日期:{{ [stanzas].datetime }}
		        </td>
		    </tr>
	    </table>
	    <div id="pContentDiv">
	        <div class="post">
	        {{ [stanzas].content|html }}
	        </div>
	    </div>
        *}
        {*
        <table>
		    <tr>
                <td>
                    <a>${author}</a> 回复日期:{{ [stanzas].datetime }}
		        </td>
		    </tr>
	    </table>
        <div class="post">
        {{ [stanzas].content|html }}
        </div>
        *}
        """)
        logging.info(thread['author'])
        pattern=scrapemark.compile(stanza_template.substitute(author=thread['author']))
        logging.info(pattern)
        thread['stanzas'][url]=scrapemark.scrape(pattern, url=url, encoding=encoding)['stanzas']
        logging.info(thread['stanzas'][url])
Example #2
0
def process():
    url='http://www.tianya.cn/publicforum/content/develop/1/905898.shtml'
    template=Template(u"""
        {*
        <table id="firstAuthor">
		    <tr>
                <td>
                    <a>${author}</a> &nbsp;发表日期:{{ [stanzas].datetime }}
		        </td>
		    </tr>
	    </table>
	    <div id="pContentDiv">
	        <div class="post">
	        {{ [stanzas].content }}
	        </div>
	    </div>
        *}
        {*
        <table>
		    <tr>
                <td>
                    <a>${author}</a> 回复日期:{{ [stanzas].datetime }}
		        </td>
		    </tr>
	    </table>
        <div class="post">
        {{ [stanzas].content }}
        </div>
        *}
    """)
    pattern=template.substitute(author=u'flp713')
    pattern=scrapemark.compile(pattern)
    stanzas=scrapemark.scrape(pattern, url=url, encoding=encoding)['stanzas']
    return stanzas