コード例 #1
0
ファイル: jxny.py プロジェクト: yuyi-7/double
 def parse(self, response):
     item = DoubleItem()
     driver = webdriver.PhantomJS(service_log_path=r'../watchlog.log')
     driver.get('http://jxndjy.jxau.edu.cn/module/jobfairs?type=')
     html = etree.HTML(driver.page_source)
     #lists = response.xpath('//div[@class="newsBox"]')
     #print(lists)
     title = html.xpath('//div[@class="text-eps w240"]/@title')
     print(title)
     publishDate = list(
         map(
             lambda x: x.strip(),
             html.xpath(
                 '//table[@class="tb-pub-list"]/tbody/tr/td[2]/text()')))
     holdDate = ""
     #url = lists.xpath('ul/li[2]/a/@href').extract()
     time = getPresentTime()
     #print('运行成功')
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') !=
                 -1) and time == publishDate[i][:10]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i][:10]
             item['holdDate'] = holdDate
             item['url'] = 'http://jxndjy.jxau.edu.cn/module/jobfairs?type='
             yield item
         else:
             print('没有匹配')
コード例 #2
0
ファイル: srsf.py プロジェクト: yuyi-7/double
 def parse(self, response):
     item = DoubleItem()
     driver = webdriver.PhantomJS(service_log_path=r'../watchlog.log')  #初始化
     driver.get('http://zsjy.sru.jx.cn/html/srsfzscjyyw/index.html')  #爬取网页
     html = etree.HTML(driver.page_source)  #转换格式
     #lists = response.xpath('//div[@class="newsBox"]')
     #print(lists)
     title = html.xpath('//span[@class="a-box"]/ul/li/a/text()')
     print(title)
     publishDate = html.xpath('//span[@class="a-box"]/ul/li/span/text()')
     holdDate = ""
     url = html.xpath('//span[@class="a-box"]/ul/li/a/@href')
     time = getPresentTime()
     #print('运行成功')
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1 or
                 title[i].find('供需见面会') != -1) and time == publishDate[i]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i]
             item['holdDate'] = holdDate
             item['url'] = 'http://zsjy.sru.jx.cn' + url[i]
             yield item
         else:
             print('没有匹配')
コード例 #3
0
ファイル: hnwl.py プロジェクト: yuyi-7/double
 def parse(self, response):
     item = DoubleItem()
     driver = webdriver.PhantomJS(service_log_path=r'../watchlog.log')  #初始化
     driver.get('http://jy.huaszj.cn/module/jobfairs')  #爬取网页
     html = etree.HTML(driver.page_source)  #转换格式
     #lists = response.xpath('//div[@class="newsBox"]')
     #print(lists)
     title = html.xpath('//ul[@id="data_html"]/li/div/div[2]/p[1]/a/@title')
     print(title)
     publishDate = html.xpath(
         '//ul[@id="data_html"]/li/div/div[3]/div/p[1]/text()')
     holdDate = ""
     url = html.xpath('//ul[@id="data_html"]/li/div/div[2]/p[1]/a/@href')
     time = getPresentTime()
     #print('运行成功')
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') !=
                 -1) and time == publishDate[i][:10]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i][:10]
             item['holdDate'] = holdDate
             item['url'] = 'http://jy.huaszj.cn' + url[i]
             yield item
         else:
             print('没有匹配')
コード例 #4
0
ファイル: lczy.py プロジェクト: yuyi-7/double
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath(
         '//table[@id="__01"]/tr[2]/td[6]/table/tr[2]/td')
     print(lists)
     title = lists.xpath('a/text()').extract()
     print(title)
     times = lists.xpath('text()').extract()
     f = re.compile(r'\d{2}-\d{2}')
     b = list(map(lambda x: a.findall(x), times))
     publishDate = [x for x in b if len(x) != 0]
     holdDate = ""
     url = lists.xpath('a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1 or title[i].find("供需见面会") !=
                 -1) and time[5:] == publishDate[i][0]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i]
             item['holdDate'] = holdDate
             item['url'] = 'http://zsjyw.lcvtc.edu.cn' + url[i]
             yield item
         else:
             print('没有匹配')
コード例 #5
0
ファイル: ahny.py プロジェクト: yuyi-7/double
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//ul[@class="list-unstyled"]')
     print(lists)
     title = list(
         map(lambda x: x.strip(),
             lists.xpath('li/a/text()').extract()))
     print(title)
     publishDate = list(
         map(lambda x: x.strip(),
             lists.xpath('li/span/text()').extract()))
     holdDate = ""
     url = lists.xpath('li/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1
             ) and time[5:] == publishDate[i][1:6]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i]
             item['holdDate'] = holdDate
             item['url'] = 'http://job.ahau.edu.cn/tzgg/' + url[i]
             yield item
         else:
             print('没有匹配')
コード例 #6
0
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//div[@class="infoBox mt10 b"]')
     print(lists)
     title = lists.xpath(
         'ul[@class="infoList jobfairList"]/li[1]/a/@title').extract()
     print(title)
     publishDate = list(
         map(
             lambda x: x.strip(),
             lists.xpath('ul[@class="infoList jobfairList"]/li[5]/text()').
             extract()))
     holdDate = ""
     url = lists.xpath(
         'ul[@class="infoList jobfairList"]/li[1]/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') !=
                 -1) and time == publishDate[i][:10]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i][:10]
             item['holdDate'] = holdDate
             item['url'] = 'http://jzit.91wllm.com' + url[i]
             yield item
         else:
             print('没有匹配')
コード例 #7
0
ファイル: yzdx.py プロジェクト: yuyi-7/double
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//div[@class="infoBox mt10"]/ul')
     print(lists)
     title = lists.xpath(
         '//ul[@class="infoList"]/li[@class="span7"]/a/text()').extract(
         )[:20]
     print(title)
     publishDate = lists.xpath(
         '//ul[@class="infoList"]/li[@class="span4"]/text()').extract()[:20]
     holdDate = ""
     url = lists.xpath('//ul[@class="infoList"]/li[@class="span7"]/a/@href'
                       ).extract()[:20]
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1
             ) and time == publishDate[i][:10]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i]
             item['holdDate'] = holdDate
             item['url'] = 'http://yzu.91job.gov.cn' + url[i]
             yield item
         else:
             print('没有匹配')
コード例 #8
0
ファイル: jxnygc.py プロジェクト: yuyi-7/double
 def parse(self, response):
     item = DoubleItem()
     #lists = response.xpath('//a[@class="listA" and @target="_self"]/@title').extract()
     #print(lists)
     title = response.xpath('//a[@class="listA" and @target="_self"]/@title').extract()
     print(title)
     publishDate = ''
     holdDate = ""
     url = response.xpath('//a[@class="listA" and @target="_self"]/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1  or title[i].find('供需见面会')!=-1 :#and time == publishDate[i][:10]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate
             item['holdDate'] = holdDate
             item['url'] = 'http://www.jxaevc.com'+url[i]
             yield item
         else:
             print('没有匹配')
コード例 #9
0
ファイル: hzny.py プロジェクト: yuyi-7/double
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//div[@class="newsBox"]')
     print(lists)
     title = lists.xpath('ul/li[2]/a/text()').extract()
     print(title)
     publishDate = lists.xpath('ul/li[1]/text()').extract()
     holdDate = ""
     url = lists.xpath('ul/li[2]/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1  or title[i].find('供需见面会')!=-1) and time == publishDate[i]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i]
             item['holdDate'] = holdDate
             item['url'] = 'http://hzau.91wllm.com'+url[i]
             yield item
         else:
             print('没有匹配')
コード例 #10
0
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//div[@id="wp_news_w71"]/table')
     print(lists)
     title = lists.xpath('tr/td[2]/a[2]/text()').extract()
     print(title)
     publishDate = lists.xpath('tr/td[4]/text()').extract()
     holdDate = ""
     url = lists.xpath('tr/td[2]/a[2]/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1)  and time[5:] == publishDate[i]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i]
             item['holdDate'] = holdDate
             item['url'] = 'http://www.aqvtc.edu.cn'+url[i]
             yield item
         else:
             print('没有匹配')
コード例 #11
0
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath("//div[@class='info_list']")
     print(lists)
     title = lists.xpath('/ol/a/text()').extract()
     print(title)
     publishDate = lists.xpath('ol/span/text()').extract()
     holdDate = ""
     url = lists.xpath('ol/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("校园招聘") != -1) and time == publishDate[i]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i]
             item['holdDate'] = holdDate
             item['url'] = 'http://jiuye.hebau.edu.cn/news2/' + url[i]
             yield item
         else:
             print('没有匹配')
コード例 #12
0
ファイル: ht.py プロジェクト: yuyi-7/double
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//div[@class="newsnr"]/div/ul')
     print(lists)
     title = lists.xpath('li/a/text()').extract()
     print(title)
     publishDate = lists.xpath('li/span/text()').extract()
     holdDate = ""
     url = lists.xpath('li/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1) and time == publishDate[i]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i]
             item['holdDate'] = holdDate
             item['url'] = 'http://www.hetaodaxue.com/jyzdzx' + url[i][5:]
             yield item
         else:
             print('没有匹配')
コード例 #13
0
ファイル: jh.py プロジェクト: yuyi-7/double
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//div[@id="wp_news_w6"]/ul')
     print(lists)
     title = lists.xpath('li/div/span/a/text()').extract()
     print(title)
     publishDate = lists.xpath('li/div[2]/span/text()').extract()
     holdDate = ""
     url = lists.xpath('li/div/span/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1) and time == publishDate[i]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i]
             item['holdDate'] = holdDate
             item['url'] = 'http://news.jhc.cn' + url[i]
             yield item
         else:
             print('没有匹配')
コード例 #14
0
ファイル: sxsf.py プロジェクト: yuyi-7/double
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//div[@class="list_right fr"]/table')
     print(lists)
     title = lists.xpath('tr[@height="20"]/td[2]/a/@title').extract()
     print(title)
     publishDate = lists.xpath(
         'tr[@height="20"]/td[3]/span/text()').extract()
     holdDate = ""
     url = lists.xpath('tr[@height="20"]/td[2]/a/@href')
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1) and time == publishDate[i]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i][:-1]
             item['holdDate'] = holdDate
             item['url'] = 'http://xsc.sxnu.edu.cn' + url[i][2:]
             yield item
         else:
             print('没有匹配')
コード例 #15
0
ファイル: bz.py プロジェクト: yuyi-7/double
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//div[@id="newslist"]/table')
     print(lists)
     title = lists.xpath('tr/td[2]/table/tr/td/a/font/text()').extract()
     print(title)
     publishDate = ''
     holdDate = ""
     url = lists.xpath('tr/td[2]/table/tr/td/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if title[i].find("招聘会") != -1 or title[i].find(
                 "双选会") != -1 or title[i].find("宣讲会") != -1 or title[
                     i].find('供需见面会') != -1:  # and time == publishDate[i]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate
             item['holdDate'] = holdDate
             item['url'] = 'http://jyw.bzpt.edu.cn' + url[i]
             yield item
         else:
             print('没有匹配')
コード例 #16
0
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//div[@id="newlist"]/ul')
     print(lists)
     title = lists.xpath('li/a/@title').extract()
     print(title)
     publishDate = lists.xpath('li/span/text()').extract()
     holdDate = ""
     url = lists.xpath('li/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') !=
                 -1) and time == '20' + publishDate[i][1:9]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = '20' + publishDate[i][1:9]
             item['holdDate'] = holdDate
             item['url'] = 'http://218.5.241.22:8036' + url[i]
             yield item
         else:
             print('没有匹配')
コード例 #17
0
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//table[@class="table_style01"]')
     print(lists)
     title = lists.xpath('tr[@class="trbg"]/td[2]/a/text()').extract()
     print(title)
     publishDate = lists.xpath('tr[@class="trbg"]/td[4]/text()').extract()
     holdDate = ""
     url = lists.xpath('tr[@class="trbg"]/td[2]/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1 or
                 title[i].find('供需见面会') != -1) and time == publishDate[i]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i]
             item['holdDate'] = holdDate
             item['url'] = 'http://hnbemc.university-hr.cn' + url[i]
             yield item
         else:
             print('没有匹配')
コード例 #18
0
ファイル: nysf.py プロジェクト: yuyi-7/double
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//table[@id="GridView1"]')
     print(lists)
     title = lists.xpath('tr/td/a/span/text()').extract()
     print(title)
     publishDate = lists.xpath('tr/td/span/text()').extract()
     holdDate = ""
     url = lists.xpath('tr/td/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if title[i].find("招聘会") != -1 or title[i].find(
                 "双选会") != -1 or title[i].find("宣讲会") != -1 or title[
                     i].find('供需见面会') != -1:  #and time == publishDate[i]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i]
             item['holdDate'] = holdDate
             item['url'] = 'http://www2.nynu.edu.cn/xzbm/jiuye/' + url[i]
             yield item
         else:
             print('没有匹配')
コード例 #19
0
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//div[@class="articleList articleList2"]/ul')
     print(lists)
     title = lists.xpath('li/a/@title').extract()
     print(title)
     publishDate = lists.xpath('li/span/text()').extract()
     holdDate = ""
     url = lists.xpath('li/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选周") != -1
                 or title[i].find("宣讲会") != -1 or
                 title[i].find('供需见面会') != -1) and time == publishDate[i]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i]
             item['holdDate'] = holdDate
             item['url'] = r'http://www.xyafu.edu.cn/jyxxw/tzgg/' + url[i]
             yield item
         else:
             print('没有匹配')
コード例 #20
0
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//div[@class="listbox"]')
     print(lists)
     title = lists.xpath('div[@class="txt link_lan"]/h2/a/text()').extract()
     print(title)
     publishDate = lists.xpath(
         'div[@class="txt link_lan"]/em[1]/text()').extract()
     holdDate = ""
     url = lists.xpath('div[@class="txt link_lan"]/h2/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1) and time == publishDate[i]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i]
             item['holdDate'] = holdDate
             item['url'] = url[i]
             yield item
         else:
             print('没有匹配')
コード例 #21
0
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//table[@class="winstyle2456"]')
     print(lists)
     title = lists.xpath('tr/td[1]/a/@title').extract()
     print(title)
     publishDate = lists.xpath('tr/td[2]/text()').extract()
     holdDate = ""
     url = lists.xpath('tr/td[1]/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') !=
                 -1) and time == publishDate[i][:10]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i][:10]
             item['holdDate'] = holdDate
             item['url'] = 'http://job.sqzy.edu.cn' + url[i]
             yield item
         else:
             print('没有匹配')
コード例 #22
0
ファイル: ndsny.py プロジェクト: yuyi-7/double
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//div[@class="artileListWraper"]')
     print(lists)
     title = lists.xpath('div/h3/a/text()').extract()
     print(title)
     publishDate = lists.xpath(
         'div/div[@class="m-news-data"]/span[1]/text()').extract()
     holdDate = ""
     url = lists.xpath('div/h3/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1) and time == publishDate[i]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i]
             item['holdDate'] = holdDate
             item['url'] = 'http://www.ndgzy.com' + url[i]
             yield item
         else:
             print('没有匹配')
コード例 #23
0
ファイル: dz.py プロジェクト: yuyi-7/double
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//div[@class="article-list floatL"]/ul')
     print(lists)
     title = lists.xpath('li/a/span/text()').extract()
     print(title)
     publishDate = lists.xpath('li/a/i/text()').extract()
     holdDate = ""
     url = lists.xpath('li/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') !=
                 -1) and time == publishDate[i][:10]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i]
             item['holdDate'] = holdDate
             item['url'] = 'http://dzujy.dzu.edu.cn' + url[i]
             yield item
         else:
             print('没有匹配')
コード例 #24
0
ファイル: zk.py プロジェクト: yuyi-7/double
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//div[@class="gov-main"]/div/ul')
     print(lists)
     title = lists.xpath('li[@style=";"]/a/@title').extract()
     print(title)
     publishDate = lists.xpath('li[@style=";"]/span/text()').extract()
     holdDate = ""
     url = lists.xpath('li/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') !=
                 -1) and time == publishDate[i][1:11]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i][1:11]
             item['holdDate'] = holdDate
             item['url'] = url[i]
             yield item
         else:
             print('没有匹配')
コード例 #25
0
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//div[@class="p2_m"]/div[2]/table')
     print(lists)
     title = lists.xpath('tr[@class="trbg"]/td[@align="left"]/a/text()').extract()
     print(title)
     publishDate = lists.xpath('tr[@class="trbg"]/td[4]/text()').extract()
     holdDate = ""
     url = lists.xpath('tr[@class="trbg"]/td[2]/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1)  and time == publishDate[i]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i]
             item['holdDate'] = holdDate
             if url[i][:11] == 'showarticle':
                 item['url'] = 'http://sxau.university-hr.com/'+url[i]
             else:
                 item['url'] = url[i]
             yield item
         else:
             print('没有匹配')
コード例 #26
0
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//ul[@class="inf_lc"]')
     print(lists)
     title = lists.xpath('li/a[1]/@title').extract()
     print(title)
     publishDate = list(
         map(lambda x: x.strip(),
             lists.xpath('li/h4/span/text()').extract()))
     holdDate = ""
     url = lists.xpath('li/a[1]/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or
                 title[i].find("宣讲会") != -1) and time[:7] == publishDate[i]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i]
             item['holdDate'] = holdDate
             item['url'] = url[i]
             yield item
         else:
             print('没有匹配')
コード例 #27
0
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('body/div[2]/table[2]/tr/td[3]/table[2]')
     print(lists)
     title = lists.xpath('tr/td/a/text()').extract()
     print(title)
     publishDate = list(
         map(lambda x: x.strip(),
             lists.xpath('tr/td/text()').extract()))
     holdDate = ""
     url = lists.xpath('tr/td/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') !=
                 -1) and time == publishDate[i][1:]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i][1:]
             item['holdDate'] = holdDate
             item['url'] = 'http://www6.hpu.edu.cn/web5' + url[i][2:]
             yield item
         else:
             print('没有匹配')
コード例 #28
0
ファイル: lc.py プロジェクト: yuyi-7/double
 def parse(self, response):
     item = DoubleItem()
     lists = response.xpath('//div[@class="article cur02"]/ul')
     print(lists)
     title = list(
         map(lambda x: x.strip(),
             lists.xpath('li/a/text()').extract()))
     print(title)
     publishDate = lists.xpath('li/span/text()').extract()
     holdDate = ""
     url = lists.xpath('li/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1
                 or title[i].find("宣讲会") != -1 or
                 title[i].find('供需见面会') != -1) and time == publishDate[i]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate[i]
             item['holdDate'] = holdDate
             item['url'] = 'http://www.lcu.edu.cn/ztzx/ldyw/' + url[i]
             yield item
         else:
             print('没有匹配')
コード例 #29
0
ファイル: yz.py プロジェクト: yuyi-7/double
 def parse(self, response):
     item = DoubleItem()
     #lists = response.xpath('//table[@class="table_style01"]')
     #print(lists)
     title = response.xpath(
         '//div[@class="tdtext1"]/table/tr/td/a/font/text()').extract()
     print(title)
     publishDate = ''
     holdDate = ""
     url = response.xpath(
         '//div[@class="tdtext1"]/table/tr/td/a/@href').extract()
     time = getPresentTime()
     for i in range(len(title)):
         if title[i].find("招聘会") != -1 or title[i].find(
                 "双选会") != -1 or title[i].find("宣讲会") != -1 or title[
                     i].find('供需见面会') != -1:  # and time == publishDate[i]:
             print(title[i])
             item['title'] = title[i]
             item['publishDate'] = publishDate
             item['holdDate'] = holdDate
             item['url'] = 'http://www.hnyzzy.com' + url[i]
             yield item
         else:
             print('没有匹配')