Ejemplo n.º 1
0
    def _crawl_AQI(self):
        now = datetime.datetime.now()
        day = now.strftime("%Y-%m-%d")
        hour = now.strftime("%H:%M:%S")

        city_name, city_code = self.city.split(',')

        result = [city_code, city_name, day, hour, '', '', '']
        url_template = 'http://www.baidu.com/s?wd=%s空气质量指数'
        try:
            logger.info('city %s start crawling'%city_code)
            response = requests.get(url_template % city_name, timeout=self.request_timeout)
            html = response.text
            soup = BeautifulSoup(html, 'html.parser')

            aqi_index = soup.find('span', class_='op_pm25_graexp')
            aqi_grade = aqi_index.next_sibling
            aqi_time = soup.find('span', class_='op_pm25_date')

            result[-3:] = [
                aqi_index.text.encode('utf8') if aqi_index else '',
                aqi_grade.text.encode('utf8') if aqi_grade else '',
                aqi_time.text.encode('utf8') if aqi_time else ''
            ]
        except Exception as e:
            message = 'city %s failed with cause: %s'%(city_code, str(e))
            logger.exception(message)
            ErrorLog.create(message)
        return result
Ejemplo n.º 2
0
def migrate_file(path, data_type):
    if data_type=='data':
        records = []
        with open(path,'rb') as f:
            csvfile = csv.reader(f)
            next(csvfile)
            for line in csvfile:
                record = line[:]
                city_code = city_dict.get(line[0],'')
                record.insert(0,city_code)
                records.append(record)
        CityAQI.create_bulk(records)
    elif data_type=='log':
        with open(path,'rb') as f:
            for line in f:
                if 'ERROR' in line:
                    times,message = line.strip().split('ERROR :')
                    date,time = times.split(' ')[:2]
                    year, month, day = date.split('-')
                    hour, minute, second = time.split(':')
                    date_time = datetime(int(year), int(month), int(day),
                                        int(hour), int(minute), int(second), tzinfo=GMT8())
                    ErrorLog.create(message.strip(), date_time=date_time)
                    print('migrate one error log, log time is %s'%date_time)