def __init__(self): Spider.__init__(self, start_urls=[ 'https://www.androidperformance.com', ], index_xpath="//article/a/@href", article_title_xpath="//*[@class='post-title']/a/text()", article_content_xpath="//*[@class='article-entry']", )
def __init__(self): Spider.__init__(self, start_urls=[ 'http://www.wjdiankong.cn', ], index_xpath="//header/h2/a/@href", article_title_xpath='//header/h1/a/text()', article_content_xpath="//div/article", article_trim_xpaths=['//div[@class="article-social"]'])
def __init__(self): Spider.__init__(self, start_urls=[ 'http://fex.baidu.com/weekly/', ], index_xpath="//ul[@class='post-list']//a/@href", article_title_xpath="//h1[@class='title']/text()", article_content_xpath="//div[@class='content']", )
def __init__(self): Spider.__init__(self, start_urls=[ 'https://timyang.net', ], index_xpath="//h2/a/@href", article_title_xpath="//*[@class='posttitle']/a/text()", article_content_xpath="//*[@class='entry']", )
def __init__(self): Spider.__init__(self, start_urls=[ 'https://www.lfhacks.com/', ], index_xpath="//li[@class='article']/h4/a/@href", article_title_xpath="//article[@class='article']/h1/text()", article_content_xpath="//div[@itemprop='articleBody']", )
def __init__(self): Spider.__init__(self, start_urls=[ 'https://www.stackoverflow.wiki/blog', ], index_xpath="//h1/a/@href", article_title_xpath="//h1[contains(@class, 'title')]/text()", article_content_xpath="//div[@id='post-article']", )
def __init__(self): Spider.__init__(self, start_urls=[ 'https://xingzx.org/', ], index_xpath="//h2[@class='post-title']/a/@href", article_title_xpath="//div[@class='markdown-body editormd-preview-container']/h1/text()", article_content_xpath="//div[@class='blog-body']", )
def __init__(self): Spider.__init__(self, start_urls=[ 'https://www.jdon.com/approval', ], index_xpath="//h3[@class='vid-name']/a/@href", article_title_xpath="//title/text()", article_content_xpath="//div[@class='post_body_content']", )
def __init__(self): Spider.__init__( self, start_urls=['https://github.com/polaris1119/golangweekly'], index_xpath="//article//a[contains(text(),'угг')]/@href", article_title_xpath='//article//h1/text()', article_content_xpath="//article", index_limit_count=3, )
def __init__(self): Spider.__init__(self, start_urls=[ 'https://moelove.info/', ], index_xpath="//h1[@class='post-title']/a/@href", article_title_xpath="//h1[@class='post-title']/text()", article_content_xpath="//div[@class='post-content']", )
def __init__(self): Spider.__init__(self, start_urls=[ 'https://yafeilee.com/archives', ], index_xpath="//a[@class='blog-title']/@href", article_title_xpath="//h2[@class='blog-title']/text()", article_content_xpath="//div[@class='content markdown']", index_limit_count=1, )
def __init__(self): Spider.__init__(self, start_urls=[ 'http://www.yinwang.org', ], index_xpath="//ul[@class='list-group']/li/a/@href", article_title_xpath='//h2/text()', article_content_xpath="//div[@class='inner']", index_limit_count=3, )
def __init__(self): Spider.__init__( self, start_urls=[ 'https://www.coder4.com/', ], index_xpath="//h1[@class='entry-title']/a/@href", article_title_xpath="//h1[@class='entry-title']/text()", article_content_xpath="//div[@class='entry-content']", )
def __init__(self): Spider.__init__( self, start_urls=[ 'http://ued.baidu.com/case', ], index_xpath="//ul[@class='case-article-list']/li/a/@href", article_title_xpath="//div[@class='article-title']/text()", article_content_xpath="//div[@class='article-content']", )
def __init__(self): Spider.__init__(self, start_urls=[ 'https://github.com/aliyunfe/weekly', ], index_xpath="//a[contains(text(),'》第')]/@href", article_title_xpath="//h2//*[@class='final-path']/text()", article_content_xpath="//div[@id='readme']", index_reverse=True )
def __init__(self): Spider.__init__( self, start_urls=[ 'https://lucifer.ren/blog/', ], index_xpath="//h2[@class='title']/a/@href", article_title_xpath="//h1[@class='title']/a/text()", article_content_xpath="//div[@class='article-entry']", )
def __init__(self): Spider.__init__( self, start_urls=[ 'https://github.com/trending?since=monthly', ], index_xpath='//*[@class="Box-row"]/h1/a/@href', article_title_xpath='//meta[@property="og:title"]/@content', article_content_xpath='//div[@id="readme"]', )
def __init__(self): Spider.__init__(self, start_urls=[ 'https://hiwannz.com/', ], index_xpath="//h2[@class='post-title']/a/@href", article_title_xpath="//h1[@class='post-title']/text()", article_content_xpath="//div[@class='post-content']", index_limit_count=2, )
def __init__(self): Spider.__init__( self, start_urls=[ 'http://www.alloyteam.com/page/0/', ], index_xpath="//ul[@class='articlemenu']/li/a[2]/@href", article_title_xpath="//div[@class='title1']/a[2]/text()", article_content_xpath="//div[@class='content_banner']", )
def __init__(self): Spider.__init__( self, start_urls=[ 'https://blog.miniasp.com/', ], index_xpath="//h2[@class='post-title']/a/@href", article_title_xpath="//h2[@class='post-title']/a/text()", article_content_xpath="//section[@class='post-body text']", )
def __init__(self): Spider.__init__( self, start_urls=[ 'https://kernel.taobao.org', ], index_xpath="//div[@class='article-title']/a/@href", article_title_xpath="//h1[@class='post-title']/text()", article_content_xpath="//div[@class='post-content']", )
def __init__(self): Spider.__init__( self, start_urls=[ 'https://teddysun.com', ], index_xpath="//header/h2/a/@href", article_title_xpath="//h1/a/text()", article_content_xpath="//*[@class='article-content']", )
def __init__(self): Spider.__init__(self, start_urls=[ 'https://juejin.im/?sort=monthly_hottest', ], index_xpath="//div[@class='entry-box']//a[@class='entry-link']/@href", article_title_xpath="//h1[@class='article-title']/text()", article_content_xpath="//div[@class='article-content']", browser=True )
def __init__(self): Spider.__init__( self, start_urls=[ 'http://yalishizhude.com/', ], index_xpath="//h1[@class='post-title']/a/@href", article_title_xpath="//h1[@class='post-title']/text()", article_content_xpath="//div[@class='post-body']", )
def __init__(self): Spider.__init__( self, start_urls=[ 'https://blog.huoding.com/', ], index_xpath="//header/h1/a/@href", article_title_xpath="//article/header/h1/text()", article_content_xpath="//article//div[@class='entry-content']", )
def __init__(self): Spider.__init__( self, start_urls=[ 'https://sofi.sh/', ], index_xpath="//div[@class='block']/h2/a/@href", article_title_xpath="//a[@title='page.attributes.title']/h1/text()", article_content_xpath="//div[@itemprop='articleBody']", )
def __init__(self): Spider.__init__(self, start_urls=[ 'https://www.cnblogs.com/pick/', ], index_xpath='//*[@id="post_list"]/div/div/h3/a/@href', article_title_xpath='//*[@id="cb_post_title_url"]/text()', article_content_xpath='//*[@id="cnblogs_post_body"]', index_limit_count=4, )
def __init__(self): Spider.__init__(self, start_urls=[ 'https://github.com/dt-fe/weekly', ], index_xpath="//td[@class='content']//a[contains(text(),'.md') and not(contains(text(),'readme.md'))]/@href", article_title_xpath="//h2//*[@class='final-path']/text()", article_content_xpath='//article', index_reverse=True, )
def __init__(self): Spider.__init__(self, start_urls=[ 'http://www.nginx.cn', ], index_xpath="//div[@class='post']/h2/a/@href", article_title_xpath="//div[@class='post']/h1/text()", article_content_xpath="//div[@class='post']/div[@class='content']", index_limit_count=3, )
def __init__(self): Spider.__init__(self, start_urls=[ 'http://gityuan.com', ], index_xpath="//div[@class='post-preview']/a/@href", article_title_xpath='//h1/text()', article_content_xpath="//div[contains(@class, 'post-container')]", index_limit_count=6, )