Example #1
0
def crawl():
    return basic.hard_crawl(url='http://www.oschina.net/blog/more?p=1',
                            list_select='.BlogList li h3 a',
                            title_select='title',
                            body_select='.BlogContent',
                            list_url_pre='',
                            remove_tags=[])
Example #2
0
def crawl():
    return basic.hard_crawl(
        url='http://div.io/pro/index',
        list_select='.hot-topics li span.topic-title a.title',
        list_url_pre='',
        title_select='title',
        body_select='.topic-firstfloor-detail')
Example #3
0
def crawl():
    return basic.hard_crawl(url='http://www.cocoachina.com/ios/',
                            list_select='.article-list ul li .newstitle a',
                            title_select='.detail-main h2',
                            body_select='.field_body',
                            list_url_pre='http://www.cocoachina.com',
                            remove_tags=[])
Example #4
0
def crawl():
    return basic.hard_crawl(url='http://www.devstore.cn/essay/essayHome.html',
                            list_select='dd.content h3 a',
                            title_select='h1.title',
                            body_select='.article_content',
                            list_url_pre='http://www.devstore.cn',
                            remove_tags=[])
Example #5
0
def crawl():
    return basic.hard_crawl(url='http://www.cnetnews.com.cn/',
                            list_select='#tab1 li > .qu_jx > a',
                            title_select='h1.qu_ti',
                            body_select='.qu_content_div',
                            list_url_pre='http://www.cnetnews.com.cn',
                            remove_tags=['div'])
def crawl():
    return basic.hard_crawl(url='http://www.time-weekly.com/html/newmedia/',
                            list_select='ul.sumlist01 li a',
                            title_select='.sumlist01 h1',
                            body_select='.content',
                            list_url_pre='',
                            remove_tags=[])
Example #7
0
def crawl():
    return basic.hard_crawl(url='http://www.keke289.com/info.html',
                            list_select='article.article h2.article-title a',
                            title_select='section.article-hd h2',
                            body_select='section.article-bd',
                            list_url_pre='http://www.keke289.com',
                            remove_tags=['div'])
Example #8
0
def crawl():
    return basic.hard_crawl(url='http://www.guancha.cn/Science/list_1.shtml',
                            list_select='.search_result_item a',
                            title_select='h2.content-title1',
                            body_select='.all-txt',
                            list_url_pre='http://www.guancha.cn',
                            remove_tags=[])
Example #9
0
def crawl():
    return basic.hard_crawl(url='http://www.ciweek.com/v7/list.jsp',
                            list_select='dl h2 a',
                            title_select='p.title',
                            body_select='.text',
                            list_url_pre='http://www.ciweek.com',
                            remove_tags=[])
Example #10
0
def crawl():
    return basic.hard_crawl(
        url='http://www.jiemian.com/lists/65.html',
        list_select='.news-list .news-view.card .news-img a',
        title_select='.article-header h1',
        body_select='.article-content',
        list_url_pre='',
        remove_tags=[])
Example #11
0
def crawl():
    return basic.hard_crawl(url='http://www.w3ctech.com/',
                            list_select='.topic_list_content h2.topic_title a',
                            list_url_pre='http://www.w3ctech.com',
                            title_select='.topic_info h1',
                            body_select='.topic_detail .callout')
Example #12
0
def crawl():
    return basic.hard_crawl(url='http://www.aliued.cn/', list_select='.latestpost > .post > .top > a.block', title_select='.blog-list-title h2', body_select='.entry', list_url_pre='', remove_tags=[])
Example #13
0
def crawl():
    return basic.hard_crawl(url='http://www.aliued.com/', list_select='.listbox ul li .listbox_img a', title_select='.title_top', body_select='.post_content', list_url_pre='', remove_tags=[])
Example #14
0
def crawl():
    return basic.hard_crawl(url='http://news.ittime.com.cn/',
                            list_select='.left-list a.img_212',
                            list_url_pre='http://news.ittime.com.cn',
                            title_select='.article h1',
                            body_select='.articlep')