def __init__(self, name, key_word, max_page, *args, **kwargs): self.base_url = 'http://s.wanfangdata.com.cn/Paper.aspx?' config = get_config(name) self.config = config self.key_word = key_word self.my_max_page = max_page self.allowed_domains = config.get('allowed_domains') super(WanfangSpider, self).__init__(*args, **kwargs)
def __init__(self, name, key_word, min_page, max_page, *args, **kwargs): self.base_url = 'http://kns.cnki.net' self.home_url = 'http://kns.cnki.net/kns/request/SearchHandler.ashx?action=&NaviCode=*&' self.list_url = 'http://kns.cnki.net/kns/brief/brief.aspx' self.cur_referer = 'http://kns.cnki.net/kns/brief/default_result.aspx' config = get_config(name) self.config = config self.key_word = key_word self.my_max_page = max_page self.allowed_domains = config.get('allowed_domains') super(CNKISpider, self).__init__(*args, **kwargs)
def __init__(self, name, key_word, max_page, *args, **kwargs): self.list_url = 'http://wap.cnki.net/touch/web/Article/Search' self.header = {'Referer': 'http://wap.cnki.net/touch/web'} config = get_config(name) self.config = config self.key_word = key_word self.my_max_page = max_page self.page_size = 10 self.myFormData = { #近十年的数据 "searchtype": "0", "dbtype": "", "pageindex": "1", "pagesize": str(self.page_size), "theme_kw": "", "title_kw": "", "full_kw": "", "author_kw": "", "depart_kw": "", "key_kw": "", "abstract_kw": "", "source_kw": "", "teacher_md": "", "catalog_md": "", "depart_md": "", "refer_md": "", "name_meet": "", "collect_meet": "", "keyword": self.key_word, "remark": "", "fieldtype": "101", "sorttype": "0", "articletype": "11", "screentype": "0", "isscreen": "", "subject_sc": "", "research_sc": "", "depart_sc": "", "sponsor_sc": "", "author_sc": "", "teacher_sc": "", "subjectcode_sc": "", "researchcode_sc": "", "departcode_sc": "", "sponsorcode_sc": "", "authorcode_sc": "", "teachercode_sc": "", "starttime_sc": "2007", "endtime_sc": "2018", "timestate_sc": "1" } self.allowed_domains = config.get('allowed_domains') super(WAPCNKISpider, self).__init__(*args, **kwargs)