class ArticleItemLoader(ItemLoader): default_item_class = ArticleItem default_output_processor = TakeFirst() title_out = Compose(TakeFirst(), Net39ArticleTitle()) content_out = Compose(Join(''), Net39ArticleContent())
class ArticleLoader(XPathItemLoader): """ Used for easier construction of ArticleItem """ def is_string(string): if isinstance(string, str) or isinstance(string, unicode): if string.strip() != "": #log.msg("returning string: "+ unicode(string.strip())) return string.strip() #log.msg("returning None for string: "+ unicode(string)) return None def separate_tags(tags_string): return tags_string.replace(";", ",").split(",") default_input_processor = MapCompose(is_string) default_output_processor = TakeFirst() publishers_in = MapCompose(is_string) publishers_out = Identity() title_in = MapCompose(is_string, unicode.title) title_out = TakeFirst() time_published_in = MapCompose(is_string) time_published_out = Identity() summary_in = MapCompose(is_string) summary_out = TakeFirst() tags_in = MapCompose(is_string, separate_tags) tags_out = Identity()
class CompraLineaItem(Item): cantidad = Field(output_processor=TakeFirst()) unidad_medida = Field(output_processor=TakeFirst()) importe = Field(output_processor=lambda x: parse_money(x[0])) importe_total = Field(output_processor=lambda x: parse_money(x[0])) detalle = Field(output_processor=TakeFirst()) anio = Field(output_processor=TakeFirst())
def process_item(self, task_id): report = self.db.loadScrapedFullReport(task_id) if report is None: return text = report['full_report_body'] text = "".join(chr(min(ord(c),127)) for c in text) t = TextResponse (url=report['full_report_url'], body=text.encode('utf-8')) #must have utf-8 here l = XPathItemLoader(NrcParsedReport(), response=t) l.add_value('reportnum', task_id) patterns = self.compile_patterns () for p in patterns: l.add_value(p[0], text, TakeFirst(), unicode.strip, re=p[1]) county = l.get_output_value('county') pattern = self.get_area_code_pattern(county) if pattern: l.add_value ('areaid', county) l.add_value('blockid', text, TakeFirst(), unicode.strip, re="%s[\s]+(?:BLOCK[\s]+)?([\d]+)" % pattern) l.add_value('blockid', text, TakeFirst(), unicode.strip, re="BLOCK[\s]+([\d]+)") item = l.load_item() yield item self.item_completed(task_id)
class ZhiHuU_T(Item): ''' Zhihu user topic relationship ''' crawled_from = Field(output_processor=TakeFirst()) user_url = Field(output_processor=TakeFirst()) topic_url = Field(output_processor=TakeFirst())
def test_get_xpath(self): l = TestItemLoader(response=self.response) self.assertEqual(l.get_xpath('//p/text()'), [u'paragraph']) self.assertEqual(l.get_xpath('//p/text()', TakeFirst()), u'paragraph') self.assertEqual(l.get_xpath('//p/text()', TakeFirst(), re='pa'), u'pa') self.assertEqual(l.get_xpath(['//p/text()', '//div/text()']), [u'paragraph', 'marta'])
class VkItem(Item): id = Field(output_processor=TakeFirst()) name = Field(output_processor=TakeFirst()) text = Field(input_processor=MapCompose(remove_tags), output_processor=TakeFirst()) date = Field(output_processor=TakeFirst()) words = Field()
def parse(self, response): """ Default callback used by Scrapy to process download response Testing contracts: @url http://www.livingsocial.com/cities/15-san-francisco @returns items 1 @scrapes title link :param response: :return: """ selector = HtmlXPathSelector(response) # iterate over deals for deal in selector.select(self.deals_list_xpath): loader = XPathItemLoader(LivingSocialDeal(), selector=deal) # define processors loader.default_input_processor = TakeFirst() loader.default_input_processor = MapCompose(unicode.strip) loader.default_input_processor = Join() loader.defalut_output_processor = TakeFirst() # iterate over fields and add xpaths to the loader for field, xpath in self.item_fields.iteritems(): loader.add_xpath(field, xpath) yield loader.load_item()
class Product(Item): id = Field() name = Field(input_processor=Compose(TakeFirst(), unicode.strip)) price = Field(input_processor=Compose(TakeFirst(), unicode.strip, remove_comma, float)) cat = Field() avgStars = Field(input_processor=Compose(only_elem_or_default, float)) nReviews = Field( input_processor=Compose(only_elem, unicode.strip, remove_comma, int)) salesRank = Field( input_processor=Compose(unicode.strip, remove_comma, int)) subCatRank = Field(input_processor=Compose( only_elem_or_default, unicode.strip, remove_comma, int)) subCat = Field( input_processor=Compose(only_elem_or_default, unicode.strip)) manufact = Field( input_processor=Compose(only_elem_or_default, unicode.strip)) referrer = Field() @property def export_filename(self): return 'product' @property def key(self): return self._values['id']
def parse_item(self, response): loader = self.get_product_item_loader_with_default_values(response) loader.brand_in = lambda x: x[0][14:] if x else 'no brand' loader.brand_out = TakeFirst() loader.description_out = JoinExcludingEmptyValues('\n') loader.sale_price_out = TakeFirst() reviews = self.parse_review(response) loader.add_value('reviews', reviews) loader.add_value('url', response.meta['url']) loader.add_value('product_number', response.meta['product_number']) loader.add_xpath('brand', '//a[@class="brandstore"]/text()') loader.add_xpath( 'title', '//div[@id="divCaption"]/h1[@class="captionText"]/text()') loader.add_xpath('description', '//div[@id="divPromosPDetail"]') loader.add_xpath('description', '//div[@id="divingredientsPDetail"]') loader.add_xpath('original_price', '//span[@class="rowMSRP"]/s/text()') loader.add_xpath('sale_price', '//div[@id="productprice"]/span/text()') loader.add_xpath( 'sizes', '//div[@id="divCaption"]//span[@class="captionSizeText"]/text()') # images for sel in response.xpath('//div[@id="divPImage"]'): image_loader = ProductImageLoader(response=response, selector=sel) image_loader.add_value('thumbnail', response.meta['thumbnail']) image_loader.add_xpath('normal_size', 'a/img/@src') image_loader.add_xpath('zoomed', 'a/img/@src') loader.add_value('images', image_loader.load_item()) yield loader.load_item()
def parse_item(self, response): loader = self.get_product_item_loader_with_default_values(response) loader.original_price_out = TakeFirst() loader.sale_price_out = TakeFirst() values_from_list = response.meta.get('values_from_list', {}) for key, value in values_from_list.iteritems(): loader.add_value(key, value) loader.add_xpath('product_number', '//div[@id="swatchContent"]/div[@id="productNumber"]/text()', re='#(.*)') loader.add_xpath('title', '//div[@id="productNameText"]/span[@class="productName"]/text()') loader.add_xpath('description', '//div[@id="tabWindow"]//text()') loader.add_xpath('original_price', '//div[@id="selectionContent"]/span[@id="priceText"]/strike/text()') loader.add_xpath('original_price', '//div[@id="selectionContent"]/span[@id="priceText"]/text()') loader.add_xpath('sale_price', '//div[@id="selectionContent"]/span[@id="priceText"]/span[@class="salePrice"]/text()') loader.add_xpath('sale_price', '//div[@id="selectionContent"]/span[@id="priceText"]/text()') loader.add_xpath('sizes', '//div[@id="productContentRight"]/div[@id="swatchContent"]/div[@id="sizeDimensionSwatchContent"]/div[@id="sizeDimension1SwatchContent"]/div[@id="sizeDimension1Swatches"]/button/text()') loader.add_xpath('default_color', '//div[@id="selectionContent"]/span[@id="selectionConfirmText"]/text()') #colors for selector in response.xpath('//div[@id="swatchContent"]/div[@id="colorSwatchContent"]/input'): color_loader = ProductColorLoader(response=response, selector=selector) color_loader.add_xpath('name', '@alt', re='(.*) product image$') color_loader.add_xpath('swatch_image', '@src') loader.add_value('colors', color_loader.load_item()) #images images_data = response.meta.get('images_data', {}) if images_data.get('P01'): image_loader = ProductImageLoader(response=response) image_loader.add_value('thumbnail', images_data.get('T')) image_loader.add_value('normal_size', images_data.get('P01')) image_loader.add_value('zoomed', images_data.get('Z')) loader.add_value('images', image_loader.load_item()) num = 1 while num < 9: av_num = 'AV%s' % num if images_data.get(av_num): image_loader = ProductImageLoader(response=response) image_loader.add_value('thumbnail', images_data.get('%s_T' % av_num)) image_loader.add_value('normal_size', images_data.get(av_num)) image_loader.add_value('zoomed', images_data.get('%s_Z' % av_num)) loader.add_value('images', image_loader.load_item()) num += 1 #reviews for selector in response.xpath('//div[@id="BVRRContainer"]//ol[contains(@class,"bv-content-list")]/li[contains(@class,"bv-content-item")]'): review_loader = ProductReviewLoader(response=response, selector=selector) review_loader.body_out = JoinExcludingEmptyValues('\n') review_loader.add_xpath('author', 'div[@class="bv-author-profile"]/div[@class="bv-inline-profile"]/div[@class="bv-author-avatar"]/div[@class="bv-author-avatar-nickname"]/div[@class="bv-content-author-name"]/span/h3/text()') review_loader.add_xpath('title', 'div/div[@class="bv-content-container"]//h4[@class="bv-content-title"]/text()') review_loader.add_xpath('date', 'div/div[@class="bv-content-container"]//div[@class="bv-content-datetime"]/meta[@itemprop="dateCreated"]/@content', MapCompose(Date('%Y-%m-%d'))) review_loader.add_xpath('body', 'div/div[@class="bv-content-container"]//div[contains(@class,"bv-content-summary-body-text")]/p/text()') review_loader.add_xpath('max_stars', 'div/div[@class="bv-content-container"]//span[contains(@class,"bv-content-rating")]/meta[@itemprop="bestRating"]/@content') review_loader.add_xpath('stars', 'div/div[@class="bv-content-container"]//span[contains(@class,"bv-content-rating")]/meta[@itemprop="ratingValue"]/@content') loader.add_value('reviews', review_loader.load_item()) yield loader.load_item()
class LazyTweetAnswer(Item): question_id = Field(input_processor=MapCompose(lambda x: int(x)), output_processor=TakeFirst()) answer_content = Field(input_processor=MapCompose(remove_entities, unicode.strip), output_processor=Join()) answerer = Field(output_processor=TakeFirst()) answer_id = Field()
class ZhiHuQ(Item): title = Field(input_processor=MapCompose(remove_entities, unicode.strip), output_processor=Join()) content = Field(input_processor=MapCompose(remove_entities, unicode.strip), output_processor=Join()) id = Field(output_processor=TakeFirst()) user = Field(output_processor=TakeFirst()) num = Field(output_processor=TakeFirst())
class RakutenItem(Item): # define the fields for your item here like: # name = scrapy.Field() handbag_price = Field(output_processor=TakeFirst()) handbag_url = Field(output_processor=TakeFirst()) handbag_brand = Field(output_processor=TakeFirst()) handbag_image_urls = Field(output_processor=TakeFirst())
def parse_product(self, response): hxs = HtmlXPathSelector(response) base_url = get_base_url(response) name_xpath = '//div[@id="product-details"]/h1/span/text()' names = hxs.select('//h1[@id="product_title"]/text()').extract() if names and len(names) > 0: name = names[0].strip() else: # product not found. Just continue self.log('WARNING: Product not found => %s' % response.url) return quantity = hxs.select('//p[@id="stock_status"]/text()').extract() if quantity and "In Stock" in quantity.pop(): quantity = None else: quantity = 0 category = hxs.select( '//ul[@id="crumbs"]/li[@class="last"]/a/text()').extract() brand = hxs.select( '//div[@id="product_title_container"]/span[@class="secondary"]/text()' ).extract() loader = ProductLoader(response=response, item=Product()) loader.add_value('url', urljoin(base_url, response.url)) loader.add_value('name', name) loader.add_xpath('image_url', '//img[@id="main_image"]/@src', TakeFirst(), Compose(lambda v: urljoin(base_url, v))) loader.add_xpath( 'price', '//div[@class="product_price"]/span[@class="price"]/text()', TakeFirst(), re="([.0-9]+)") if not loader.get_output_value('price'): loader.add_value('price', 0) if category: loader.add_value('category', category[0].strip()) loader.add_value('sku', name, TakeFirst(), re='(\d\d\d+)\s*$') if brand: loader.add_value('brand', brand[0].strip()) identifier = hxs.select('//input[@name="ProductID"]/@value').extract() if not identifier: identifier = hxs.select('//li[@itemprop="id"]/text()').extract() loader.add_value('identifier', identifier[0]) if quantity == 0: loader.add_value('stock', 0) yield loader.load_item()
class RPostItemsLoader(ItemLoader): default_item_class = RpostResultsItem default_output_processor = Compose(TakeFirst(), unicode, unicode.strip) racename_out = Compose(Join(), unicode, unicode.strip) racetime_out= Compose(Join(),unicode, unicode.strip) rpOR_out = Compose(TakeFirst(), unicode, unicode.strip, processOR) rpTS_out = Compose(TakeFirst(), unicode, unicode.strip, processTS) prizemoney_out =Compose(TakeFirst(), unicode, unicode.strip, toascii) rphorseurl_out = Compose(TakeFirst(), unicode, unicode.strip)
def test_get_css(self): l = TestItemLoader(response=self.response) self.assertEqual(l.get_css('p::text'), [u'paragraph']) self.assertEqual(l.get_css('p::text', TakeFirst()), u'paragraph') self.assertEqual(l.get_css('p::text', TakeFirst(), re='pa'), u'pa') self.assertEqual(l.get_css(['p::text', 'div::text']), [u'paragraph', 'marta']) self.assertEqual(l.get_css(['a::attr(href)', 'img::attr(src)']), [u'http://www.scrapy.org', u'/images/logo.png'])
class HospitalItem(Item): _hospitalName = Field(output_processor=TakeFirst(), ) grade = Field(output_processor=TakeFirst(), ) feature = Field( input_processor=MapCompose(lambda v: v.strip()), output_processor=TakeFirst(), ) city = Field(output_processor=TakeFirst(), ) area = Field(output_processor=TakeFirst(), )
class CommentItemLoader(ItemLoader): default_item_class = CommentItem default_input_processor = MapCompose(lambda x: x.strip()) default_output_processor = Compose(TakeFirst(), lambda x: x.strip()) default_selector_class = Selector textpost_out = Compose(Join(" "), lambda x: x.strip()) comments_out = Compose(TakeFirst(), get_comments_count, lambda x: x.strip()) upvoted_out = Compose(TakeFirst(), get_upvoted, lambda x: x.strip()) comment_out = Compose(Join(" "), lambda x: x.strip())
class ZhiHuA(Item): id = Field(input_processor=MapCompose(lambda x: int(x)), output_processor=TakeFirst()) qid = Field(output_processor=TakeFirst()) asr = Field(output_processor=TakeFirst()) content = Field(input_processor=MapCompose(remove_entities, unicode.strip), output_processor=Join()) score = Field(input_processor=MapCompose(lambda x: int(x)), output_processor=TakeFirst())
def parse_brand_list(self, response): hxs = HtmlXPathSelector(response) # products product_items = hxs.select('//div[@class="productGrid"]/ul/li/div[@class="item"]') category_items = hxs.select('//h1[@class="categoryLandingPageTitle_heading"]/a/text()').extract() category = category_items[0] if category_items else '' brand_name = get_brand_from_url(response.url) def get_full_image_url(url): return get_full_url(response, url) for product_item in product_items: image_url = product_item.select(u'div[@class="prodimg"]/a/img/@src').extract() if image_url: image_url = get_full_url(response, image_url[0]) ploadr = ProductLoader(item=Product(), selector=product_item, response=response) ploadr.add_xpath('name', 'div[@class="prodname"]/a/text()', TakeFirst(), Compose(unicode.strip)) ploadr.add_xpath('url', 'div[@class="prodname"]/a/@href', TakeFirst(), Compose(unicode.strip), Compose(get_full_image_url)) ploadr.add_value('category', category) ploadr.add_value('image_url', image_url) price = ploadr.get_xpath('div[@class="proddetails"]//div[@class="prodnowprice"]/span/text()', TakeFirst(), Compose(extract_price)) price_excl_vat = Decimal(price) ploadr.add_value('price', price_excl_vat) ploadr.add_value('shipping_cost', Decimal('5.00') if price_excl_vat < 50 else Decimal('0.0')) ploadr.add_xpath('sku', 'div[@class="proddetails"]//div[@class="proditemcode"]/a/span/following-sibling::text()', TakeFirst(), Compose(unicode.strip)) ploadr.add_value('identifier', ploadr.get_output_value('sku')) stock_info = product_item.select(u'div[@class="proddetails"]/div/div/span[contains(@class, "instock")]/@class').extract() buy_button = product_item.select(u'div[@class="proddetails"]/div[@class="prodquickbuy"]/a[@class="primaryBtn"]').extract() ploadr.add_value('brand', brand_name) ploadr.add_value('stock', 1 if stock_info or buy_button else 0) item = ploadr.load_item() tmp = ''.join(product_item.select("//div[@class='proditemcode']//text()").extract()) item['metadata'] = {'product_code': tmp.split(':')[-1].strip()} if not ploadr.get_output_value('brand'): yield Request(item['url'], meta={'item': item}, callback=self.parse_brand) else: yield item
def test_get_value(self): il = NameItemLoader() self.assertEqual(u'FOO', il.get_value([u'foo', u'bar'], TakeFirst(), unicode.upper)) self.assertEqual([u'foo', u'bar'], il.get_value([u'name:foo', u'name:bar'], re=u'name:(.*)$')) self.assertEqual(u'foo', il.get_value([u'name:foo', u'name:bar'], TakeFirst(), re=u'name:(.*)$')) il.add_value('name', [u'name:foo', u'name:bar'], TakeFirst(), re=u'name:(.*)$') self.assertEqual([u'foo'], il.get_collected_values('name')) il.replace_value('name', u'name:bar', re=u'name:(.*)$') self.assertEqual([u'bar'], il.get_collected_values('name'))
def parse(self, response): for e in response.xpath( '//table[@id="tbl_proxy_list"]//tr[count(td)=6]'): l = ItemLoader(ProxyHunterItem(), selector=e) l.add_value('prot', 'http') l.add_xpath('ip', 'td[1]', TakeFirst(), remove_tags, unicode.strip) l.add_xpath('port', 'td[2]', TakeFirst(), remove_tags, unicode.strip) yield l.load_item()
def _set_loader(self, response, xs, item): if not xs: self.from_detail_page = True item = response.request.meta['item'] self.loader = XPathItemLoader(item=item, response=response) self.loader.default_output_processor = TakeFirst() else: self.from_detail_page = False self.loader = XPathItemLoader(item=item, selector=xs) self.loader.default_output_processor = TakeFirst()
class StackOverflowAnswer(Item): answer_id = Field(input_processor=MapCompose(lambda x: int(x)), output_processor=TakeFirst()) answer_content = Field(input_processor=MapCompose(remove_entities, unicode.strip), output_processor=Join()) answerer = Field(output_processor=TakeFirst()) marks = Field(input_processor=MapCompose(lambda x: int(x)), output_processor=TakeFirst()) is_best_answer = Field(output_processor=TakeFirst())
class YelpItem(Item): source = Field(output_processor=TakeFirst(), ) source_link = Field(output_processor=TakeFirst(), ) name = Field( default='', input_processor=MapCompose(unquote_markup, strip_space), output_processor=TakeFirst(), ) rating = Field( default='', input_processor=MapCompose(unquote_markup, strip_space), output_processor=TakeFirst(), ) category = Field( default='', input_processor=MapCompose(unquote_markup, strip_space), output_processor=Join(','), ) reviews = Field( default='', input_processor=MapCompose(unquote_markup, strip_space), output_processor=TakeFirst(), ) price = Field( default='', input_processor=MapCompose(unquote_markup, strip_space), output_processor=TakeFirst(), ) city = Field( default='', input_processor=MapCompose(unquote_markup, strip_space), output_processor=TakeFirst(), ) address = Field( default='', input_processor=MapCompose(unquote_markup, strip_space), output_processor=Join(), ) owner_website = Field( default='', input_processor=MapCompose(unquote_markup, strip_space), output_processor=TakeFirst(), ) phone = Field( default='', input_processor=MapCompose(unquote_markup, strip_space), output_processor=TakeFirst(), ) longitude_latitude = Field( default='', input_processor=MapCompose(unquote_markup, strip_space), output_processor=TakeFirst(), ) last_crawl = Field()
def test_replace_css_multi_fields(self): l = TestItemLoader(response=self.response) l.add_css(None, 'div::text', TakeFirst(), lambda x: {'name': x}) self.assertEqual(l.get_output_value('name'), [u'Marta']) l.replace_css(None, 'p::text', TakeFirst(), lambda x: {'name': x}) self.assertEqual(l.get_output_value('name'), [u'Paragraph']) l.add_css(None, 'a::attr(href)', TakeFirst(), lambda x: {'url': x}) self.assertEqual(l.get_output_value('url'), [u'http://www.scrapy.org']) l.replace_css(None, 'img::attr(src)', TakeFirst(), lambda x: {'url': x}) self.assertEqual(l.get_output_value('url'), [u'/images/logo.png'])
def _convert(data): if t not in ['join', 'list'] and isinstance(data, list): data = TakeFirst()(data) if type(data) in [str, unicode]: data = data.strip() elif type(data) in [int, float, datetime]: data = str(data) else: return data if t == 'join': sep = inf.get('sep', u' ') return Join(sep)(data) elif t == 'list': sep = inf.get('sep', u' ') return remove_tags(Join(sep)(data)).strip() elif t == 'text': return remove_tags(data).strip() elif t == 'clean': cleaner = Cleaner(style=True, scripts=True, javascript=True, links=True, meta=True) return cleaner.clean_html(data) elif t == 'unesc': return HTMLParser().unescape(data) elif t == 'base64': return base64.decodestring(data) elif t == 'sub': frm = inf.get('from') to = inf.get('to') return re.sub(frm, to, data) elif t == 'jpath': qs = inf.get('query') return jsonpath.jsonpath(json.loads(data), qs) elif t == 'map': m = inf.get('map') d = inf.get('default') return m.get(data, d) elif t == 'int': return int(float(data)) elif t == 'float': return float(data) elif t == 'date': fmt = inf.get('fmt', 'auto') tz = inf.get('tz', '+00:00') return parse_date(data, fmt, tz) elif t == 'cst': fmt = inf.get('fmt', 'auto') return parse_date(data, fmt, '+08:00') else: return data
class ActiveDoctorItem(Item): _name = Field(output_processor=TakeFirst(), ) hospital = Field(output_processor=TakeFirst(), ) city = Field(output_processor=TakeFirst(), ) area = Field(output_processor=TakeFirst(), ) specialty = Field(output_processor=TakeFirst(), ) title = Field(output_processor=TakeFirst(), ) count_ReplyInTwoWeeks = Field(output_processor=TakeFirst(), ) count_ReplyTotal = Field(output_processor=TakeFirst(), ) count_Calls = Field(output_processor=TakeFirst(), ) external_id = Field(output_processor=TakeFirst(), ) comment = Field(output_processor=Join(), )
class ReviewLoader(XPathItemLoader): date_in = MapCompose(unicode, unicode.strip, extract_date, date_format='%d/%m/%Y') date_out = TakeFirst() rating_in = MapCompose(unicode, extract_rating) rating_out = TakeFirst() full_text_in = MapCompose(unicode, unicode.strip, remove_entities) full_text_out = Join() url_in = MapCompose(unicode, unicode.strip) url_out = TakeFirst()
def _convert(data): if t not in ['join', 'list'] and isinstance(data, list): data = TakeFirst()(data) if type(data) in [str, unicode]: data = data.strip() elif type(data) in [int, float, datetime]: data = str(data) else: return data if t=='join': sep = inf.get('sep', u' ') return Join(sep)(data) elif t=='list': sep = inf.get('sep', u' ') return remove_tags(Join(sep)(data)).strip() elif t=='text': return remove_tags(data).strip() elif t=='clean': cleaner = Cleaner(style=True, scripts=True, javascript=True, links=True, meta=True) return cleaner.clean_html(data) elif t=='unesc': return HTMLParser().unescape(data) elif t=='base64': return base64.decodestring(data) elif t=='sub': frm = inf.get('from') to = inf.get('to') return re.sub(frm, to, data) elif t=='jpath': qs = inf.get('query') return jsonpath.jsonpath(json.loads(data), qs) elif t=='map': m = inf.get('map') d = inf.get('default') return m.get(data, d) elif t=='int': return int(float(data)) elif t=='float': return float(data) elif t=='date': fmt = inf.get('fmt', 'auto') tz = inf.get('tz', '+00:00') return parse_date(data, fmt, tz) elif t=='cst': fmt = inf.get('fmt', 'auto') return parse_date(data, fmt, '+08:00') else: return data