Example #1
0
 def handle_data(self, text):
     if self.atag_title_stack:
         productId = self.atag_title_stack.pop()
         result = extract_data(text)
         if result:
             self.products[productId]['net'] = tuple(result)
         else:
             self.products[productId]['net'] = "not found"
Example #2
0
def test_run():
    parser = ProdInfoParser()
    f = open('yhd.html')
    c = f.read()
 #   print c[:100]
    parser.feed(c)
    f.close()
    d = parser.output()
    print d
    for k, v in d.items():
        result = extract_data(v['title'])
        if result:
            net, unit = extract_data(v['title'])
            unit_price = float(v['yhdprice']) / net * 500
            print k
            print v['title']
            print "Unit price: ¥%.2f/500%s" % (unit_price, unit)
Example #3
0
    def start_a(self, attrs):
        attrs = dict(attrs)
        #print attrs
        if attrs.get('id') and attrs.get('pmid') and attrs.get('title'):
            #print attrs
            productId = attrs.get('id').split('_')[1]
            if not self.products.get(productId):
                self.products[productId] = {}
#            self.products[productId]['title'] = attrs.get('title')
            result = extract_data(attrs.get('title'))
            if result:
#                net, unit = result
                self.products[productId]['net'] = tuple(result)
            else:
                self.products[productId]['net'] = "not found"