コード例 #1
0
    def from_html_productlist_common(self, webfragment, grupp, best = 0):
        assert self.state == NEW

        m = MSet([("varunr", MS(r'p_varunr=([0-9]+)')),
                  ("namn", MS(r'(?s)>(.*?)</a>')),
                  ("årgång", MSDeH(r'(?s)<td.*?>(.*?)</td>')),
                  ("varunr2", MSDeH(r'(?s)<td.*?>(.*?)</td>')),
                  ("land", MSDeH(r'(?s)<td.*?>(.*?)</td>')),
                  ("förpdata", M()),
                  ])

        dict = m.get(webfragment)
        self.grupp = grupp
        self.varunr = dict.get("varunr")
        self.namn = dict.get("namn")
        self.ursprung = dict.get("land")
        self.argang = dict.get("årgång","")
        
        # Earlier, we split on products and extracted a whole list of containers
        # from the product entry. Now, we are forced to parse each container as a separate
        # product, and merge them in the ProductList class.
        self.forpackningar = []
        fd = dict["förpdata"]
        c = Container().from_html_productlist(fd, best)
        self.forpackningar.append(c)

        if self.namn and self.varunr:
            self.state = VALID
        else:
            self.state = INVALID

        return self
コード例 #2
0
 def from_html_name(self, webpage):
     assert self.state == NEW
     
     typlista = MList(r'<table width="640" border="0" cellspacing="0" cellpadding="0">',
                      MSet([("typrubrik", MSDeH(r'(?s)<font class="rubrik2">(.*?)</font>')),
                            ("prodlista",
                             MList(r'<td width="290" align=',
                                   M())),
                            ])).get(webpage)
     self.lista = []
     for t in typlista:
         grupp = t["typrubrik"]
         
         for p in t["prodlista"]:
             prod = Product().from_html_name(p, grupp)
             
             if prod.valid():
                 # A real product
                 self.lista.append(prod)
             else:
                 # This should be a dummy product with a container
                 # to be added to the last real product
                 self.lista[-1].add_containers_from(prod)
         
     if self.lista:
         self.state = VALID
     else:
         self.state = INVALID
         
     return self
コード例 #3
0
    def from_html(self, webfragment, lan = None):
        assert self.state == NEW

        dict = MSet([("kod", MS(r'butiknr=([0-9]+)')),
                     ("ort", MS(r'>(.*?)</a>')),
                     ("adress", MS(r'<td[^>]*>(.*?)</td>')),
                     ("telefon", MS(r'<td[^>]*>(.*?)</td>')),
                     ]).get(webfragment)

        self.lan = lan
        self.kod = dict.get("kod")
        self.ort = dict.get("ort")
        self.adress = dict.get("adress")
        self.telefon = dict.get("telefon")

        assert self.kod and self.ort and self.adress
        self.state = VALID

        return self
コード例 #4
0
    def from_html_normal(self, webpage):
        assert self.state == NEW

        m = MSet([("grupp", MSDeH(r'(?s)<td.*?class="text10pxfetvit">(.*?)</td>')),
                  ("namn", MSDeH(r'(?s)<span class="rubrikstor">(.*?)\(nr')),
                  ("varunr", MS(r'(?s)\(nr.*?([0-9]+)')),
                  ("land", MSDeH(r'ursprung=.*?>(.*?)<')),
                  ("distrikt", MSDeH(r'(?s)Distrikt</td>(.*?)</td>')),
                  ("alkoholhalt", MSDeH(r'(?s)Alkoholhalt</td>(.*?)</td>')),
                  ("farg", MSFargDoftSmak("Färg")),
                  ("doft", MSFargDoftSmak("Doft")),
                  ("smak", MSFargDoftSmak("Smak")),
                  ("anvandning", MSFargDoftSmak("Användning")),
                  ("sotma", MSC("Sötma", advance = 0)),
                  ("fyllighet", MSC("Fyllighet", advance = 0)),
                  ("stravhet", MSC("Strävhet", advance = 0)),
                  ("fruktsyra", MSC("Fruktsyra", advance = 0)),
                  ("beska", MSC("Beska", advance = 0)),
                  ("lagring", MSDeH(r'(?s)llbarhet.*?</td>(.*?)</td>')),
                  ("druvsorter", MSDeH(r'(?s)Druvsorter.*?</td>(.*?)</td>')),
                  ("argang", MSDeH(r'(?s)Provad.*?årgång.*?</td>(.*?)</td>')),
                  ("provningsdatum", MSDeH(r'(?s)Provningsdatum.*?</td>(.*?)</td>')),
                  ("producent", MSDeH(r'(?s)Producent.*?</td>(.*?)</td>')),
                  ])

        m.get_into_object(webpage, self)
        # for k,v in sorted(self.__dict__.items()): print "%-16s = %s" % (k,v)

        if self.namn and self.varunr:
            self.namn = self.namn.strip()
            self.state = VALID
        else:
            self.state = INVALID
            return self
        
        self.forpackningar = []
        for f in MLimit(r'(?s)Info</td>(.*?)</table>', \
                            MList(r'<td class="text_tabell" valign="Middle"',
                                  M())).get(webpage):
            c = Container().from_html_normal(f)
            self.forpackningar.append(c)

        return self
コード例 #5
0
    def from_html(self, webpage):
        assert self.state == NEW

        for d in MLimit(r'(?s)<select name="p_druva" class="selectDruva">(.*?)</select>',
                        MList("<option",
                              MSet([("nr", M('option value="([0-9]+)"')),
                                    ("namn", M(">(.*)</option>")),
                                    ]))).get(webpage):
        
            if d["namn"] <> "-":
                self.druvor.append((int(d["nr"]), d["namn"]))

        self.state = VALID
        return self
コード例 #6
0
    def from_html_normal(self, webfragment):
        assert self.state == NEW

        # We use this instead of inline field = Mfoo(...).get(webfragment)
        # as we believe the matches below need to be sequenced
        # just the way MSet does.
        
        MSet([("namn", MS(r';">(.*?)</td>')),
              ("storlek", MS(r';">(.*?)</td>')),
              ("pris", MS(r'<td class="text10pxfet".*?>([0-9.]+)')),
              ("anm", MSDeH(r';">(.*?)</td>')),
              ]).get_into_object(webfragment,self)
        
        self.sortiment = "?"
        
        assert self.namn and self.storlek and self.pris
        self.pris = self.pris + " kr"
        self.state = VALID

        return self
コード例 #7
0
 def from_html_stores(self, webpage, lan, ort):
     if lan <> "99":
         # Ett enda län
         self.butiker = []
         for b in MList(r'<tr><td width="200" valign=top>', M()).get(webpage):
             s = Store().from_html(b)
             if s.matches_ort(ort):
                 self.butiker.append(s)
     else:
         # En lista av län
         lista = MList("<H4>", MSet([("län", MS(r'<H4>(.*?)</H4>')),
                                      ("butikslista",
                                       MList(r'<tr><td width="200" valign=top>',
                                            M()))])).get(webpage)
         self.butiker = []
         for l in lista:
             lan = l["län"]
             for b in l["butikslista"]:
                 s = Store().from_html(b, lan)
                 if s.matches_ort(ort):
                     self.butiker.append(s)
コード例 #8
0
    def from_html_productlist(self, webfragment, best = 0):
        assert self.state == NEW

        dict = MSet([("volym", MSVolym(r'(?s)<td.*?>(.*?)</td>')),
                     ("pris", MS(r'(?s)<td.*?>(.*?)</td>')),
                     ("allabutiker", MS(r'(Finns i alla butiker)')),
                     ("bestsort", MS(r'(Beställningsvara)')),
                     ]).get(webfragment)
        self.namn = None
        self.storlek = dict.get("volym")
        self.pris = dict.get("pris")
        self.anm1 = None
        self.anm2 = None
        if dict.has_key("allabutiker"):
            self.sortiment = "alla"
        elif best or dict.has_key("bestsort"):
            self.sortiment = "best"
        else:
            self.sortiment = ""

        assert self.storlek and self.pris
        self.state = VALID

        return self
コード例 #9
0
ファイル: eniro.py プロジェクト: lysator/lyspython
def split_area_phone(str):
    m = re.match("^([0-9]+)-([0-9]+)$", str)
    if m:
        return m.group(1, 2)
    else:
        return None


# Search class

search_m = MSet([
    ("resultat",
     MList(
         '<TD align="left" bgcolor="#CCCCCC">',
         MSet([
             ("name_title", MSDeH(r"<(?s).*<b>(.*?)</b>")),
             ("address", MSDeH(r'(?s)<TD align="left">(.*?)</TD>')),
             ("phone", MSDeH(r'(?s)<TD align="right" width="100">(.*?)</B>')),
         ]))), ("maxvisas", MS(r">1-([0-9]+)")),
    ("totalt", MS(r"(?s).*visas av totalt:.*?([0-9]+)"))
])


class Search:
    def __init__(self, webpage):
        (self.dict, pos) = search_m.match(webpage, 0, len(webpage))

    def valid(self):
        return self.dict.has_key("resultat")

    def to_string(self):