Beispiel #1
0
 def parse_doctor_reg_info(self, response):
     self.logger.info('>>>>>>正在抓取医生排班信息……')
     doctor_reg_info = json.loads(response.text)
     reg_info_list = doctor_reg_info['data']['selWork']
     doctor_name = doctor_reg_info['data']['doctor'][0].get('doctorName', '')
     hospital_name = doctor_reg_info['data']['doctor'][0].get('hospitalName', '')
     dept_name = doctor_reg_info['data']['doctor'][0].get('deptName', '')
     for each_reg_info in reg_info_list:
         duty_date = each_reg_info['dutydate']
         sel_works = each_reg_info['selWorks']
         for each_work_info in sel_works:
             duty_time = each_work_info['dutytime']
             if int(duty_time) == 1:
                 duty_time = '上午'
             elif int(duty_time) == 3:
                 duty_time = '上午'
             else:
                 # duty_time 4 晚上 doctorId 3329 成都中医药大学附属医院
                 duty_time = '晚上'
             reg_info = '{0}{1}'.format(duty_date, duty_time)
             loader = CommonLoader(item=DoctorRegInfoItem(), response=response)
             loader.add_value('doctor_name', doctor_name)
             loader.add_value('hospital_name', hospital_name)
             loader.add_value('dept_name', dept_name)
             loader.add_value('reg_info', reg_info)
             loader.add_value('update_time', now_day())
             reg_info_item = loader.load_item()
             yield reg_info_item
Beispiel #2
0
 def parse_doctor_reg_info(self, response):
     self.logger.info('>>>>>>正在抓取{}:医生排班信息>>>>>>'.format(
         self.hospital_name))
     doctor_name = response.meta['doctor_name']
     dept_name = response.meta['dept_name']
     reg_tr_list = response.xpath('//table/tr[position()>1]')
     is_has_reg = response.xpath('//table/tr[position()>1]/td/img')
     # reg_date = ['星期一', '星期二', '星期三', '星期四', '星期五', '星期六', '星期日']
     reg_col = ['上午', '下午', '晚班']
     if is_has_reg:
         for each_td in reg_tr_list:
             reg_time = each_td.xpath('td[1]/text()').extract_first('')
             all_reg_info = each_td.xpath('td[position()>1]')
             for index, each_reg_info in enumerate(all_reg_info):
                 reg_info_date = reg_col[index]
                 has_reg = each_reg_info.xpath('img')
                 if has_reg:
                     reg_info = '{0}{1}'.format(reg_time, reg_info_date)
                     reg_loader = CommonLoader2(item=DoctorRegInfoItem(),
                                                response=response)
                     reg_loader.add_value('doctor_name', doctor_name)
                     reg_loader.add_value(
                         'dept_name', dept_name,
                         MapCompose(custom_remove_tags, match_special))
                     reg_loader.add_value('hospital_name',
                                          self.hospital_name)
                     reg_loader.add_value('reg_info', reg_info)
                     reg_loader.add_value('update_time', now_day())
                     reg_item = reg_loader.load_item()
                     yield reg_item
Beispiel #3
0
    def parse_doctor_info_detail(self, response):
        self.logger.info('>>>>>>正在抓取医生详细信息>>>>>>')
        try:
            doctor_name = response.meta.get('doctor_name')
            dept_name = response.meta.get('dept_name')
            # dept_name = dept_name.split('-')[-1] if '-' in dept_name else dept_name
            doctor_level = response.meta.get('doctor_level')
            hospital_name = response.meta.get('hospital_name')
            # hospital_name2 = response.xpath('//div[@class="yy_til"]/h2/text()').extract_first('')
            # hospital_name = hospital_name2 if hospital_name2 else hospital_name1
            diagnosis_amt = response.xpath('//td/span[@class="doc_yuyue_time"]/a/@title').extract()
            if diagnosis_amt:
                res = re.search(r'.*挂号费:(.*?)$', diagnosis_amt[0], S)
                if res:
                    diagnosis_amt = res.group(1)
                else:
                    diagnosis_amt = None
            else:
                diagnosis_amt = None
            loader = CommonLoader2(item=DoctorInfoItem(), response=response)
            loader.add_value('doctor_name', doctor_name, MapCompose(custom_remove_tags))
            loader.add_value('dept_name', dept_name, MapCompose(custom_remove_tags))
            loader.add_value('hospital_name', hospital_name, MapCompose(custom_remove_tags))
            loader.add_value('doctor_level', doctor_level, MapCompose(custom_remove_tags, match_special2))
            loader.add_xpath('doctor_intro',
                             '//div[@class="zrys"]/dl/dd',
                             MapCompose(remove_tags, custom_remove_tags, clean_info2))
            loader.add_value('diagnosis_amt', diagnosis_amt)
            loader.add_value('dataSource_from', self.data_source_from)
            loader.add_value('crawled_url', response.url)
            loader.add_value('update_time', now_day())
            doctor_item = loader.load_item()
            yield doctor_item

            # 获取医生排班信息
            has_reg_info = response.xpath('//td/span[@class="doc_yuyue_time"]').extract()
            if has_reg_info:
                for each_reg_info in has_reg_info:
                    reg_info_date = re.search(r'.*出诊时间:(.*?)\n', each_reg_info, S)
                    reg_info_date = reg_info_date.group(1) if reg_info_date else None
                    reg_info = '{0}-{1}'.format(now_year(), reg_info_date).replace('月', '-').replace('日', '')
                    reg_loader = CommonLoader2(item=DoctorRegInfoItem(), response=response)
                    reg_loader.add_value('doctor_name', doctor_name, MapCompose(custom_remove_tags))
                    reg_loader.add_value('dept_name', dept_name, MapCompose(custom_remove_tags))
                    reg_loader.add_xpath('hospital_name',
                                         '//div[@class="yy_til"]/h2/text()',
                                         MapCompose(custom_remove_tags))
                    reg_loader.add_value('reg_info', reg_info, MapCompose(custom_remove_tags))
                    reg_loader.add_value('dataSource_from', self.data_source_from)
                    reg_loader.add_value('crawled_url', response.url)
                    reg_loader.add_value('update_time', now_day())
                    reg_item = reg_loader.load_item()
                    yield reg_item
        except Exception as e:
            self.logger.error('在抓取医生详细信息的过程中出错了,原因是:{}'.format(repr(e)))
Beispiel #4
0
 def parse_doctor_info_detail(self, response):
     self.logger.info('>>>>>>正在抓取{}:医生详细信息>>>>>>'.format(
         self.hospital_name))
     loader = CommonLoader2(item=DoctorInfoItem(), response=response)
     loader.add_xpath('doctor_name',
                      '//div[@class="viewexpert_demo"]/p[1]/text()',
                      MapCompose(custom_remove_tags))
     loader.add_xpath('dept_name',
                      '//div[@class="viewexpert_demo"]/p[3]/text()',
                      MapCompose(custom_remove_tags, match_special))
     loader.add_value('hospital_name', self.hospital_name)
     loader.add_xpath(
         'doctor_level', '//div[@class="viewexpert_demo"]/p[2]/text()',
         MapCompose(custom_remove_tags, match_special, match_special2))
     loader.add_xpath('doctor_intro', '//div[@class="viewexpert_detail"]',
                      MapCompose(remove_tags, custom_remove_tags))
     loader.add_xpath('doctor_goodAt',
                      '//div[@class="viewexpert_demo"]/p[4]/text()',
                      MapCompose(custom_remove_tags))
     loader.add_value('update_time', now_day())
     doctor_item = loader.load_item()
     yield doctor_item
     # 获取医生排班信息
     reg_tr_list = response.xpath(
         '//div[@class="viewexpert_detail"]/table/tr[position()>1]')
     is_has_reg = response.xpath(
         '//div[@class="viewexpert_detail"]/table/tr[position()>1]/td/img')
     reg_date = ['星期一', '星期二', '星期三', '星期四', '星期五', '星期六', '星期日']
     if is_has_reg:
         for each_td in reg_tr_list:
             i = 0
             reg_time = each_td.xpath('td[1]/text()').extract_first('')
             all_reg_info = each_td.xpath('td[position()>1]')
             for each_reg_info in all_reg_info:
                 reg_info_date = reg_date[i]
                 i += 1
                 has_reg = each_reg_info.xpath('img')
                 if has_reg:
                     reg_info = '{0}{1}'.format(reg_info_date, reg_time)
                     reg_loader = CommonLoader2(item=DoctorRegInfoItem(),
                                                response=response)
                     reg_loader.add_xpath(
                         'doctor_name',
                         '//div[@class="viewexpert_demo"]/p[1]/text()',
                         MapCompose(custom_remove_tags))
                     reg_loader.add_xpath(
                         'dept_name',
                         '//div[@class="viewexpert_demo"]/p[3]/text()',
                         MapCompose(custom_remove_tags, match_special))
                     reg_loader.add_value('hospital_name',
                                          self.hospital_name)
                     reg_loader.add_value('reg_info', reg_info)
                     reg_loader.add_value('update_time', now_day())
                     reg_item = reg_loader.load_item()
                     yield reg_item
Beispiel #5
0
 def parse_doctor_detail(self, response):
     loader = CommonLoader2(item=DoctorInfoItem(), response=response)
     loader.add_xpath(
         'doctor_name',
         '//div[@class="fleft wd740"]/div[1]/div[2]/p[2]/text()',
         MapCompose(custom_remove_tags, match_special))
     loader.add_xpath(
         'dept_name',
         '//div[@class="fleft wd740"]/div[1]/div[2]/p[1]/text()',
         MapCompose(custom_remove_tags, match_special))
     loader.add_value('hospital_name', self.hospital_name)
     loader.add_xpath(
         'doctor_level',
         '//div[@class="fleft wd740"]/div[1]/div[2]/p[3]/text()',
         MapCompose(custom_remove_tags, match_special))
     loader.add_xpath('doctor_intro',
                      '//div[@class="fleft wd740"]/div[1]/div[2]/div/p[1]',
                      MapCompose(remove_tags, custom_remove_tags))
     loader.add_value('update_time', now_day())
     dept_item = loader.load_item()
     yield dept_item
     reg_info = response.xpath(
         '//div[@class="fleft wd740"]/div[1]/div[2]/p[4]/text()'
     ).extract_first('')
     if reg_info:
         reg_info_list = get_reg_info(reg_info)
         for each_reg_info in reg_info_list:
             reg_loader = CommonLoader2(item=DoctorRegInfoItem(),
                                        response=response)
             reg_loader.add_xpath(
                 'doctor_name',
                 '//div[@class="fleft wd740"]/div[1]/div[2]/p[2]/text()',
                 MapCompose(custom_remove_tags, match_special))
             reg_loader.add_xpath(
                 'dept_name',
                 '//div[@class="fleft wd740"]/div[1]/div[2]/p[1]/text()',
                 MapCompose(custom_remove_tags, match_special))
             reg_loader.add_value('hospital_name', self.hospital_name)
             reg_loader.add_value('reg_info', each_reg_info)
             reg_loader.add_value('update_time', now_day())
             reg_item = reg_loader.load_item()
             yield reg_item
Beispiel #6
0
 def parse_doctor_info_detail(self, response):
     self.logger.info('>>>>>>正在抓取{}:医生详细信息>>>>>>'.format(
         self.hospital_name))
     dept_name = response.meta['dept_name']
     doctor_name = response.meta['doctor_name']
     loader = CommonLoader2(item=DoctorInfoItem(), response=response)
     loader.add_value('doctor_name', doctor_name)
     loader.add_value('dept_name', dept_name)
     loader.add_value('hospital_name', self.hospital_name)
     loader.add_xpath('doctor_intro',
                      '//div[@class="right-about clearfix"]',
                      MapCompose(remove_tags, get_doctor_intro2))
     loader.add_xpath('doctor_goodAt',
                      '//div[@class="right-about clearfix"]',
                      MapCompose(remove_tags, get_doctor_good_at))
     loader.add_value('update_time', now_day())
     doctor_item = loader.load_item()
     yield doctor_item
     # 获取排班信息
     self.logger.info('>>>>>>正在抓取{}:医生排班信息>>>>>>'.format(
         self.hospital_name))
     reg_info = response.xpath(
         '//div[@class="right-about clearfix"]/p[contains(text(),"坐诊时间")]/text()'
         '|//div[@class="right-about clearfix"]/p/strong[contains(text(),"坐诊时间")]/text()'
         '|//div[@class="right-about clearfix"]/p/span/strong[contains(text(),"坐诊时间")]/text()'
         '|//div[@class="right-about clearfix"]/p/strong[contains(text(),"上午")]/text()'
         '|//div[@class="right-about clearfix"]/p/strong[contains(text(),"下午")]/text()'
         '|//div[@class="right-about clearfix"]/p/strong/span[contains(text(),"坐诊时间")]/text()'
     ).extract_first('')
     if reg_info:
         reg_loader = CommonLoader2(item=DoctorRegInfoItem(),
                                    response=response)
         reg_loader.add_value('doctor_name', doctor_name)
         reg_loader.add_value('dept_name', dept_name)
         reg_loader.add_value('hospital_name', self.hospital_name)
         reg_loader.add_value('reg_info', reg_info,
                              MapCompose(match_special, clean_info))
         reg_loader.add_value('update_time', now_day())
         reg_item = reg_loader.load_item()
         yield reg_item
Beispiel #7
0
    def parse_doctor_info_detail(self, response):
        # self.logger.info('>>>>>>正在抓取{}:医生详细信息>>>>>>'.format(self.hospital_name))
        # loader = CommonLoader2(item=DoctorInfoItem(), response=response)
        # loader.add_xpath('doctor_name',
        #                  '//div[@class="page_sum2"]/table/tr[1]/td[3]',
        #                  MapCompose(remove_tags, custom_remove_tags, match_special))
        # loader.add_xpath('dept_name',
        #                  '//div[@class="page_sum2"]/table/tr[3]/td',
        #                  MapCompose(remove_tags, custom_remove_tags, match_special))
        # loader.add_value('hospital_name', self.hospital_name)
        # loader.add_xpath('doctor_level',
        #                  '//div[@class="page_sum2"]/table/tr[2]/td',
        #                  MapCompose(remove_tags, custom_remove_tags, match_special))
        # loader.add_xpath('doctor_intro',
        #                  '//div[@class="listsum_block"]',
        #                  MapCompose(remove_tags, custom_remove_tags, clean_info))
        # loader.add_value('doctor_goodAt', '')
        # loader.add_value('update_time', now_day())
        # doctor_item = loader.load_item()
        # yield doctor_item
        # 医生排班信息
        self.logger.info('>>>>>>正在抓取{}:医生排班信息>>>>>>'.format(
            self.hospital_name))

        reg_loader = CommonLoader2(item=DoctorRegInfoItem(), response=response)
        reg_loader.add_xpath(
            'doctor_name', '//div[@class="page_sum2"]/table/tr[1]/td[3]',
            MapCompose(remove_tags, custom_remove_tags, match_special))
        reg_loader.add_xpath(
            'dept_name', '//div[@class="page_sum2"]/table/tr[3]/td',
            MapCompose(remove_tags, custom_remove_tags, match_special))
        reg_loader.add_value('hospital_name', self.hospital_name)
        reg_loader.add_xpath(
            'reg_info', '//div[@class="page_sum2"]/table/tr[5]/td|'
            '//div[@class="listsum_block"]',
            MapCompose(remove_tags, custom_remove_tags, match_special))
        reg_loader.add_value('update_time', now_day())
        reg_item = reg_loader.load_item()
        yield reg_item
Beispiel #8
0
    def parse_doctor_info_detail(self, response):
        hospital_name = response.meta.get('hospital_name')
        dept_name = response.meta.get('dept_name')
        self.logger.info('>>>>>>正在抓取[{}]医生详细信息>>>>>>'.format(hospital_name))
        try:
            # 获取医生信息
            loader = CommonLoader2(item=DoctorInfoItem(), response=response)
            loader.add_xpath(
                'doctor_name', '//td/b[contains(text(),"姓名")]/ancestor::td[1]',
                MapCompose(remove_tags, custom_remove_tags, match_special))
            loader.add_value('dept_name', dept_name,
                             MapCompose(custom_remove_tags))
            loader.add_xpath(
                'hospital_name',
                '//div[@class="page_position"]/a[last()-1]/text()',
                MapCompose(custom_remove_tags))
            loader.add_xpath(
                'sex', '//td/b[contains(text(),"性别")]/ancestor::td[1]',
                MapCompose(remove_tags, custom_remove_tags, match_special,
                           clean_info2))
            loader.add_xpath(
                'doctor_level',
                '//td/b[contains(text(),"职称")]/ancestor::td[1]',
                MapCompose(remove_tags, custom_remove_tags, match_special,
                           clean_info2))
            loader.add_xpath(
                'doctor_intro',
                '//td/b[contains(text(),"医生简介")]/ancestor::td[1]',
                MapCompose(remove_tags, custom_remove_tags, clean_info2))
            loader.add_value('dataSource_from', self.data_source_from)
            loader.add_value('crawled_url', response.url)
            loader.add_value('update_time', now_day())
            doctor_item = loader.load_item()
            yield doctor_item

            # 获取医生排班信息
            self.logger.info(
                '>>>>>>正在抓取[{}]医生排班信息>>>>>>'.format(hospital_name))
            has_doctor_scheduling = response.xpath(
                '//td/div[@class="doctor-work"]')
            if has_doctor_scheduling:
                doctor_scheduling_tr = response.xpath(
                    '//table[@class="workTable"]/tbody/tr')
                all_scheduling_date = response.xpath(
                    '//table[@class="workTable"]/thead/tr/td[position()>1]'
                ).extract()
                scheduling_date_list = custom_remove_tags(
                    remove_tags(','.join(all_scheduling_date))).split(',')
                for each_td in doctor_scheduling_tr:
                    scheduling_time = each_td.xpath(
                        'td[1]/text()').extract_first('')
                    scheduling_info = each_td.xpath('td[position()>1]')
                    for index, each_s_i in enumerate(scheduling_info):
                        has_scheduling = each_s_i.xpath('div')
                        if has_scheduling:
                            each_scheduling_date = scheduling_date_list[index][
                                0:3]
                            reg_info = '{0}{1}'.format(each_scheduling_date,
                                                       scheduling_time)
                            reg_loader = CommonLoader2(
                                item=DoctorRegInfoItem(), response=response)
                            reg_loader.add_xpath(
                                'doctor_name',
                                '//td/b[contains(text(),"姓名")]/ancestor::td[1]',
                                MapCompose(remove_tags, custom_remove_tags,
                                           match_special))
                            reg_loader.add_value(
                                'dept_name', dept_name,
                                MapCompose(custom_remove_tags))
                            reg_loader.add_xpath(
                                'hospital_name',
                                '//div[@class="page_position"]/a[last()-1]/text()',
                                MapCompose(custom_remove_tags))
                            reg_loader.add_value('reg_info', reg_info)
                            reg_loader.add_value('dataSource_from',
                                                 self.data_source_from)
                            reg_loader.add_value('crawled_url', response.url)
                            reg_loader.add_value('update_time', now_day())
                            reg_item = reg_loader.load_item()
                            yield reg_item
        except Exception as e:
            self.logger.error('在抓取医生详细信息的过程中出错了,原因是:{}'.format(repr(e)))
Beispiel #9
0
    def parse_doctor_reg_info(self, response):
        """
        获取医生排班信息
        """
        self.logger.info('>>>>>>正在抓取医生排班信息……>>>>>>')
        dept_name = response.meta.get('dept_name')
        hospital_name = response.xpath(
            '//div[@class="link-555"]/a/text()').extract_first('')
        all_doctors_link = response.xpath(
            '//ul[@class="doc-results clearfix"]/li')
        self.logger.info('>>>>>>当前页共有{}个医生……'.format(str(
            len(all_doctors_link))))
        try:
            for each_doctor in all_doctors_link:
                doctor_name = each_doctor.xpath(
                    'div/dl[@class="doctor-info"]/dt/a/text()').extract_first(
                        '')
                reg_info_list = each_doctor.xpath(
                    'div[@class="doc-result-schedule"]/div/div/ul/li[@data-arrangeid]'
                )
                self.logger.info('>>>>>>当前医生[{}]一周内的排班信息有{}条……'.format(
                    doctor_name, str(len(reg_info_list))))
                for each_reg_info in reg_info_list:
                    loader = YiHuLoader(item=DoctorRegInfoItem(),
                                        selector=each_reg_info)
                    reg_date = each_reg_info.xpath(
                        'a/span/em[1]/text()').extract_first('')
                    reg_time = each_reg_info.xpath(
                        'a/span/em[2]/text()').extract_first('')
                    loader.add_value('doctor_name', doctor_name)
                    loader.add_value('dept_name', dept_name)
                    loader.add_value('hospital_name', hospital_name)
                    loader.add_value(
                        'reg_info',
                        '{0}/{1}{2}'.format(now_year(), reg_date,
                                            reg_time).replace('/', '-'),
                        MapCompose(custom_remove_tags, clean_info))
                    loader.add_value('update_time', now_day())
                    reg_info_item = loader.load_item()
                    yield reg_info_item

            # 翻页信息
            next_page = response.xpath(
                '//a[@class="page-next"]/@onclick').extract_first('')
            if next_page:
                next_page_link = re.search(r'\'(.*?)\'', next_page)
                if next_page_link:
                    next_page_link = next_page_link.group(1)
                    reg_request = SplashRequest(
                        next_page_link,
                        splash_headers=self.headers,
                        callback=self.parse_doctor_reg_info,
                        meta={'dept_name': dept_name},
                        args={
                            'images': 0,
                            'wait': 5
                        })
                    self.headers['Referer'] = response.url
                    yield reg_request
        except Exception as e:
            self.logger.error('抓取医生排班信息过程中出现错误,错误的眼因是:{}'.format(repr(e)))
Beispiel #10
0
    def parse_doctor_info_detail(self, response):
        hospital_name = response.meta.get('hospital_name')
        dept_name = response.meta.get('dept_name')
        doctor_name = response.meta.get('doctor_name')
        self.logger.info('>>>>>>正在抓取[{}]医院-[{}]医生详细信息>>>>>>'.format(
            hospital_name, doctor_name))
        try:
            # 获取医生信息
            doctor_photo_url = response.xpath(
                '//div[@class="doctor_Img"]/img/@src').extract_first('')
            loader = CommonLoader2(item=DoctorInfoItem(), response=response)
            loader.add_value('doctor_name', doctor_name,
                             MapCompose(custom_remove_tags))
            loader.add_value('dept_name', dept_name,
                             MapCompose(custom_remove_tags))
            loader.add_value('hospital_name', hospital_name,
                             MapCompose(custom_remove_tags))
            loader.add_xpath('sex', '//span[@class="doctor_grade"]/text()',
                             MapCompose(custom_remove_tags))
            loader.add_xpath('doctor_level',
                             '//span[@class="object_grade"]/text()',
                             MapCompose(custom_remove_tags))
            loader.add_xpath(
                'doctor_intro', '//div[@class="doctor_Text_Major"]',
                MapCompose(remove_tags, custom_remove_tags, match_special2))
            loader.add_value('dataSource_from', self.data_source_from)
            loader.add_value('crawled_url', response.url)
            loader.add_value('update_time', now_day())
            loader.add_value('doctor_id', response.url,
                             MapCompose(match_special2))
            loader.add_xpath(
                'dept_id', '//div[@class="position_one"]/span/a[last()]/@href',
                MapCompose(match_special2))
            loader.add_xpath(
                'hospital_id',
                '//div[@class="position_one"]/span/a[last()-1]/@href',
                MapCompose(match_special2))
            loader.add_value('doctor_photo_url',
                             urljoin(self.host, doctor_photo_url))
            loader.add_value('gmt_created', now_time())
            loader.add_value('gmt_modified', now_time())
            doctor_item = loader.load_item()
            yield doctor_item

            # 获取医生排班信息
            self.logger.info(
                '>>>>>>正在抓取[{}]医生排班信息>>>>>>'.format(hospital_name))
            has_doctor_scheduling = response.xpath(
                '//span[@class="yuyue"]/a[contains(text(),"预约")]')
            if has_doctor_scheduling:
                doctor_scheduling_list = response.xpath(
                    '//div[@class="whliesubscribe"]/ul/li[1]/div/'
                    'span/text()').extract()
                doctor_scheduling_length = len(doctor_scheduling_list)
                all_scheduling_date = response.xpath(
                    '//div[@class="datetable"]/ul/li[position()>1]/'
                    'span[1]/text()').extract()
                scheduling_date_list = custom_remove_tags(
                    remove_tags(','.join(all_scheduling_date))).split(',')
                for i in range(1, doctor_scheduling_length + 1):
                    scheduling_info = response.xpath(
                        '//div[@class="whliesubscribe"]/ul/li[position()>1]'
                        '/div[{}]'.format(str(i)))
                    scheduling_time = doctor_scheduling_list[i - 1]
                    for index, each_s_i in enumerate(scheduling_info):
                        has_scheduling = each_s_i.xpath('span/a')
                        if has_scheduling:
                            each_scheduling_date = scheduling_date_list[index]
                            reg_info = '{0}-{1}{2}'.format(
                                now_year(), each_scheduling_date,
                                scheduling_time)
                            reg_loader = CommonLoader2(
                                item=DoctorRegInfoItem(), response=response)
                            reg_loader.add_value(
                                'doctor_name', doctor_name,
                                MapCompose(custom_remove_tags))
                            reg_loader.add_value(
                                'dept_name', dept_name,
                                MapCompose(custom_remove_tags))
                            reg_loader.add_value(
                                'hospital_name', hospital_name,
                                MapCompose(custom_remove_tags))
                            reg_loader.add_value('reg_info', reg_info)
                            reg_loader.add_value('dataSource_from',
                                                 self.data_source_from)
                            reg_loader.add_value('crawled_url', response.url)
                            reg_loader.add_value('update_time', now_day())
                            reg_loader.add_value('doctor_id', response.url,
                                                 MapCompose(match_special2))
                            reg_loader.add_xpath(
                                'dept_id',
                                '//div[@class="position_one"]/span/a[last()]/@href',
                                MapCompose(match_special2))
                            reg_loader.add_xpath(
                                'hospital_id',
                                '//div[@class="position_one"]/span/a[last()-1]/@href',
                                MapCompose(match_special2))
                            reg_loader.add_value('gmt_created', now_time())
                            reg_loader.add_value('gmt_modified', now_time())
                            reg_item = reg_loader.load_item()
                            yield reg_item
        except Exception as e:
            self.logger.error('在抓取医生详细信息的过程中出错了,原因是:{}'.format(repr(e)))