def extract(self, target):
     page = self.get_page(target)
     if page is None:
         return
     logger.info('start to extract sub category in %s', target)
     category_list = []
     for category_item in page.find_all('li', 'category-item'):
         for anchor in category_item.find_all('a'):
             name = to_unicode(anchor.string)
             if name is None:
                 name = to_unicode(name.strings)
             name = re.sub(r'\xa0.+', '', name)
             url = to_unicode(anchor['href'])
             category_list.append((name, url))
     write_csv(SUB_CATEGORY_CSV, 'ab', category_list)
     logger.info('finished extracting sub category in %s', target)
 def extract_top_category(self):
     target = '/browse'
     page = self.get_page(target)
     if page is None:
         return
     logger.info('start to extract top category in %s', target)
     category_url_list = []
     for category_item in page.find_all('li', 'category-item'):
         anchor = category_item.a
         if anchor is None:
             continue
         category_name = to_unicode(anchor.string)
         category_url = to_unicode(anchor['href'])
         if category_name and category_url:
             category_url_list.append((category_name, category_url))
     if len(category_url_list) == 0:
         logger.error('no category found')
         return
     write_csv(TOP_CATEGORY_CSV, 'wb', category_url_list)
     logger.info("finished extracting top category into '%s'",
                 TOP_CATEGORY_CSV)