def autoscraper(Link=None, Metodo=None): url = request.args["Link"] link = request.args["Link"] Metodo = request.args["Metodo"] wanted_list = [Metodo] scraper = AutoScraper() scraper.build(link, wanted_list) dict = scraper.get_result_exact(link, unique=False, grouped=True) l = [] [l.extend([k, v]) for k, v in dict.items()] regla = l[0] scraper.set_rule_aliases({regla: 'regla'}) scraper.keep_rules([regla]) url = url.replace("http:", "").replace("//", "").replace(".", "").replace( "www", "").replace("https:", "").replace("/", "").replace("\n", "").replace("-", "") scraper.save(url + '-search') data = get_pagina_result(url, link) json_format = json.dumps(data, indent=4, separators=(',', ': '), sort_keys=True, ensure_ascii=False) return json_format
from autoscraper import AutoScraper url = 'https://www.rosario3.com/' wanted_list = [ "/especiales/Club-de-Lectura-Brandon-Sanderson-es-mejor-que-J.-R.-R.-Tolkien-20200909-0043.html" ] scraper = AutoScraper() result = scraper.build(url, wanted_list) dict = scraper.get_result_exact(url, unique=False, grouped=True) l = [] [l.extend([k, v]) for k, v in dict.items()] regla = l[0] scraper.set_rule_aliases({regla: 'regla'}) scraper.keep_rules([regla]) scraper.save('rosario3-search')
# -*- coding: utf-8 -*- """ Created on Sun Apr 25 22:30:42 2021 @author: Nikhil Reddy """ from autoscraper import AutoScraper Scrap = AutoScraper() amzn_url = "https://www.amazon.in/s?k=iphones" req_list_amzn = ["₹58,400", "New Apple iPhone 11 (128GB) - Black"] Scrap_amzn = Scrap.build(amzn_url, req_list_amzn) res_amzn = Scrap.get_result_similar(amzn_url, grouped=True) dyk = list(res_amzn.keys()) print(dyk) Scrap.set_rule_aliases({dyk[len(dyk) - 1]: 'Title', dyk[0]: 'Price'}) Scrap.keep_rules([dyk[len(dyk) - 1], dyk[0]]) Scrap.save('amazon-search3')
# -*- coding: utf-8 -*- """ Created on Sat Apr 24 12:25:41 2021 @author: win10 """ from autoscraper import AutoScraper amazon_url = "https://www.amazon.in/s?k=iphones" wanted_list = ["₹58,400", "New Apple iPhone 11 (128GB) - Black"] scraper = AutoScraper() result = scraper.build(amazon_url, wanted_list) print(scraper.get_result_similar(amazon_url, grouped=True)) scraper.set_rule_aliases({'rule_io1c': 'Title', 'rule_hm52': 'Price'}) scraper.keep_rules(['rule_io1c', 'rule_hm52']) scraper.save('AmazonIn-search')
#!/usr/bin/env python # coding: utf-8 from autoscraper import AutoScraper amazon_url = "https://www.amazon.in/s?i=aps&k=iphone" wanted_list = ["New Apple iPhone 12 Pro Max (128GB) - Pacific Blue","₹1,25,900"] scraper = AutoScraper() result = scraper.build(amazon_url,wanted_list) print(scraper.get_result_similar(amazon_url,grouped=True)) scraper.set_rule_aliases({"rule_1943":"Title", "rule_1gc6":"MRP"}) scraper.keep_rules(["rule_1943","rule_1gc6"]) scraper.save("amazon_search")
"""# Creating scraper object""" scraper=AutoScraper() result=scraper.build(amazon_url,wanted_list) """# Finding similar data""" data = scraper.get_result_similar(amazon_url,grouped=True) print(data) keys = list(data.keys()) print(keys) """# Defining alias""" scraper.set_rule_aliases({str(keys[0]):'ImageUrl',str(keys[2]):'Title',str(keys[-2]):'Price',str(keys[-1]):'Reviews'}) scraper.save("amazon_in.json") """# Testing for other search word""" amazon_scraper = AutoScraper() amazon_scraper.load('amazon_in.json') search = "samsung phones" amazon_url="https://www.amazon.in/s?k={}&s=price-desc-rank".format(search) data = amazon_scraper.get_result_similar(amazon_url, group_by_alias=True) search_data = tuple(zip(data['Title'],data['ImageUrl'],data['Price'],data['Reviews'])) df = pd.DataFrame(columns=['Query','Title','Price','Reviews','ImageUrl'])