def parse(self, response): laststation = '' temperture = int() stations = [] hxs = HtmlXPathSelector(response) report = hxs.select('//div[@id="ming"]') # HKO report time, temperture and humidity. time = hk0().gettime(report.extract()[0]) o = hk0().hk0current(report.extract()[0]) station = Hk0WeatherItem() station['time'] = int(time) station['station'] = o['station'] station['temperture'] = o['temperture'] station['humidity'] = o['humidity'] stations.append(station) # Other regional air temperatures. for i in report.select('span/text()').extract(): if i.isdigit(): temperture = int(i) station = Hk0WeatherItem() station['time'] = int(time) station['station'] = laststation station['temperture'] = temperture stations.append(station) else: for k,v in self.stations: if re.sub(' ','',i) == k: laststation = v return stations
def parse(self, response): laststation = '' temperture = int() stations = [] hxs = HtmlXPathSelector(response) report = hxs.select('//pre[@id="ming"]/text()') # HKO report time. time = hk0().gettime2(report[0].extract()) for i in re.split('\n',report[0].extract()): laststation = '' station = Hk0RegionalItem() for k,v in self.stations: if re.sub(' ','',i[:5]) == k: laststation = v station['time'] = time station['station'] = laststation dataline = re.sub('^\s','',i[6:]) dataline = re.sub('\*',' ',dataline) data = re.split('\s+',dataline) if len(data) >= 7: for j in range(0,len(data)): if data[j].isdigit(): station['humidity'] = int(data[j]) else: try: if j == 1: station['temperture'] = float(data[j]) elif j == 3: station['temperturemax'] = float(data[j]) elif j == 5: station['temperturemin'] = float(data[j]) except ValueError: pass stations.append(station) return stations