def get_director(self): p = "//b[. = '" + u"Режиссёр" + "']" r = self.tree.xpath(p)[0].getnext() """дергаем номер режиссера из линки""" num = utils.getnum(r.get("href")) link = "http://world-art.ru/people.php?id=%s" % num self.director = {"name": r.text, "num": num, "link": link}
def get_studio(self): p = "//a[starts-with(@href, 'company_film.php')]" r = self.tree.xpath(p) """дергаем номер студии из линки""" self.studio = {"num": utils.getnum(r[0].get("href"))} """генерим ссылку на картинку и ссылку на оптсание студии""" self.studio["link"] = "http://world-art.ru/animation/company_film.php?id=%s" % self.studio["num"] self.studio["imglink"] = "http://www.world-art.ru/img/company/%s.jpg" % self.studio["num"] self.studio["fname"] = os.path.join(self.homedir, "studio/%s.jpg" % self.studio["num"]) """качаем эмблему студии""" self.cache.dw_img(self.studio["imglink"], self.fnames["studio"]) logging.info("Studio emblem: %s" % self.studio["fname"])
def get_actors(self): self.actors=[] p = "//b[. = '" + u'Роли озвучивали:' + "']" r = self.tree.xpath(p)[0].getparent().getparent().getparent().getparent() '''walk through all tr elements''' for q in r: try: rr = q.xpath("td[2]/a") ac_name = rr[0].text ac_id = utils.getnum(rr[0].get("href")) ac_link = "http://world-art.ru/people.php?id=%s" % ac_id rr = q.xpath("td[3]") ac_role = rr[0].text[2:] self.actors.append({'ac_name':ac_name,'ac_id':ac_id,'ac_role':ac_role, 'ac_link':ac_link}) except IOError: pass except IndexError: pass return self.actors
def __init__(self, wa_addr, cachedir=None): logging.debug("Init necroposter class") self.parser = etree.HTMLParser() self.caching = 1 self.pagenum = utils.getnum(wa_addr) self.wa_addr = "http://www.world-art.ru/animation/animation.php?id=%s" % self.pagenum if cachedir == None: """will use local storage""" self.homedir = os.path.join(datadir.user_data_dir("necroposter"), self.pagenum) else: """will use provided path""" self.homedir = cachedir self.chkdirs() logging.info("Datadir is: %s" % self.homedir) self.caching = 1 self.cache = cache(self.homedir, referer=self.wa_addr) self.parse_cache()