def extract_search_result(self, element): search_result = SearchMarkupSearchResult() search_result.alignment = "LEFT" search_result.page_url = HTMLPath(GoogleParser_v2.get_path(element) + "/h3/a", "href") search_result.title = HTMLPath(GoogleParser_v2.get_path(element) + "/h3/a", "string") search_result.snippet = HTMLPath(GoogleParser_v2.get_path(element) + "/div/span", "strings") search_result.view_url = HTMLPath(GoogleParser_v2.get_path(element) + "/div/div/cite", "string") return search_result
def extract_search_result(self, element): search_result = SearchMarkupSearchResult() search_result.alignment = "LEFT" search_result.page_url = HTMLPath(YandexParser.get_path(element) + "/h2/a", "href") search_result.title = HTMLPath(YandexParser.get_path(element) + "/h2/a", "string") search_result.snippet = HTMLPath(YandexParser.get_path(element) + "/div[2]/div[1]", "string") search_result.view_url = HTMLPath(YandexParser.get_path(element) + "/div/div[1]/a[last()]", "href") return search_result
def extract_adv(self, element): adv = SearchMarkupAdv() adv.alignment = "LEFT" adv.page_url = HTMLPath(YandexParser_v3.get_path(element) + "/h2/a", "href") adv.title = HTMLPath(YandexParser_v3.get_path(element) + "/h2/a", "string") adv.snippet = HTMLPath(YandexParser_v3.get_path(element) + "/div[@class='organic__content-wrapper clearfix']/div[1]", "string") adv.view_url = HTMLPath(YandexParser_v3.get_path(element) + "/div[@class='organic__subtitle typo typo_type_greenurl']/div[1]/a", "string") return adv
def extract_wizard_image(self, element): wizard = SearchMarkupWizardImage() wizard.alignment = "LEFT" img_list = element.xpath("./div/a/img") for img in img_list: wizard.media_links.append(HTMLPath(GoogleParser_v2.get_path(img), "src")) wizard.page_url = HTMLPath(GoogleParser_v2.get_path(element) + "/h3/a", "href") wizard.title = HTMLPath(GoogleParser_v2.get_path(element) + "/h3/a", "string") return wizard
def extract_wizard_image(self, element): wizard = SearchMarkupWizardImage() wizard.alignment = "LEFT" img_list = element.xpath("./div[2]/div/div/div/a") for img in img_list: wizard.media_links.append(HTMLPath(YandexParser.get_path(img) + "/div[1]/div[1]", "style")) wizard.page_url = HTMLPath(YandexParser.get_path(element) + "/div[1]/h2/a", "href") wizard.title = HTMLPath(YandexParser.get_path(element) + "/div[1]/h2/a", "string") return wizard
def extract_actor(self, element): cinema = ActorComponent() cinema.page_url = HTMLPath( KinopoiskParser.get_path(element) + "/div[2]/p/a", "href") cinema.title = HTMLPath( KinopoiskParser.get_path(element) + "/div[2]/p/a", "string") cinema.snippet = HTMLPath( KinopoiskParser.get_path(element) + "/div[2]/span[2]", "string") cinema.image = HTMLPath( KinopoiskParser.get_path(element) + "/p/a/img", "src") return cinema
def extract_adv(self, element): adv = SearchMarkupAdv() adv.alignment = "LEFT" adv.page_url = HTMLPath( GoogleParser_v3.get_path(element) + "/h3/a", "href") adv.title = HTMLPath( GoogleParser_v3.get_path(element) + "/h3/a", "string") adv.snippet = HTMLPath( GoogleParser_v3.get_path(element) + "/div[2]", "string") adv.view_url = HTMLPath( GoogleParser_v3.get_path(element) + "/div/cite", "string") return adv
def extract_evaluated_cinema(self, element): cinema = EvaluatedCinemaComponent() cinema.page_url = HTMLPath( KinopoiskParser.get_path(element) + "/div[2]/p/a", "href") cinema.title = HTMLPath( KinopoiskParser.get_path(element) + "/div[2]/p/a", "string") cinema.snippet = HTMLPath( KinopoiskParser.get_path(element) + "/div[2]/span[2]", "string") cinema.actors = HTMLPath( KinopoiskParser.get_path(element) + "/div[2]/span[3]", "string") cinema.year = HTMLPath( KinopoiskParser.get_path(element) + "/div[2]/p/span", "string") cinema.value = HTMLPath( KinopoiskParser.get_path(element) + "/div[1]/div", "string") cinema.image = HTMLPath( KinopoiskParser.get_path(element) + "/p/a/img", "src") return cinema
def extract_wizard_news(self, element): wizard = SearchMarkupWizardNews() wizard.alignment = "LEFT" wizard.page_url = HTMLPath(GoogleParser_v2.get_path(element), "href") wizard.title = HTMLPath(GoogleParser_v2.get_path(element), "string") return wizard
def extract_wizard_news(self, element): wizard = SearchMarkupWizardNews() wizard.alignment = "LEFT" wizard.page_url = HTMLPath(YandexParser.get_path(element) + "/div[1]/h2/a[2]", "href") wizard.title = HTMLPath(YandexParser.get_path(element) + "/div[1]/h2/a[2]", "string") return wizard