Exemple #1
0
def autoscraper(Link=None, Metodo=None):
    url = request.args["Link"]
    link = request.args["Link"]
    Metodo = request.args["Metodo"]
    wanted_list = [Metodo]
    scraper = AutoScraper()
    scraper.build(link, wanted_list)
    dict = scraper.get_result_exact(link, unique=False, grouped=True)
    l = []
    [l.extend([k, v]) for k, v in dict.items()]
    regla = l[0]
    scraper.set_rule_aliases({regla: 'regla'})
    scraper.keep_rules([regla])
    url = url.replace("http:", "").replace("//", "").replace(".", "").replace(
        "www", "").replace("https:",
                           "").replace("/", "").replace("\n",
                                                        "").replace("-", "")
    scraper.save(url + '-search')
    data = get_pagina_result(url, link)
    json_format = json.dumps(data,
                             indent=4,
                             separators=(',', ': '),
                             sort_keys=True,
                             ensure_ascii=False)
    return json_format
Exemple #2
0
from autoscraper import AutoScraper

url = 'https://www.rosario3.com/'

wanted_list = [
    "/especiales/Club-de-Lectura-Brandon-Sanderson-es-mejor-que-J.-R.-R.-Tolkien-20200909-0043.html"
]

scraper = AutoScraper()
result = scraper.build(url, wanted_list)

dict = scraper.get_result_exact(url, unique=False, grouped=True)
l = []
[l.extend([k, v]) for k, v in dict.items()]

regla = l[0]
scraper.set_rule_aliases({regla: 'regla'})

scraper.keep_rules([regla])

scraper.save('rosario3-search')
Exemple #3
0
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 25 22:30:42 2021

@author: Nikhil Reddy
"""

from autoscraper import AutoScraper

Scrap = AutoScraper()

amzn_url = "https://www.amazon.in/s?k=iphones"

req_list_amzn = ["₹58,400", "New Apple iPhone 11 (128GB) - Black"]
Scrap_amzn = Scrap.build(amzn_url, req_list_amzn)
res_amzn = Scrap.get_result_similar(amzn_url, grouped=True)

dyk = list(res_amzn.keys())
print(dyk)
Scrap.set_rule_aliases({dyk[len(dyk) - 1]: 'Title', dyk[0]: 'Price'})
Scrap.keep_rules([dyk[len(dyk) - 1], dyk[0]])
Scrap.save('amazon-search3')
Exemple #4
0
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 24 12:25:41 2021

@author: win10
"""

from autoscraper import AutoScraper

amazon_url = "https://www.amazon.in/s?k=iphones"

wanted_list = ["₹58,400", "New Apple iPhone 11 (128GB) - Black"]

scraper = AutoScraper()
result = scraper.build(amazon_url, wanted_list)

print(scraper.get_result_similar(amazon_url, grouped=True))

scraper.set_rule_aliases({'rule_io1c': 'Title', 'rule_hm52': 'Price'})
scraper.keep_rules(['rule_io1c', 'rule_hm52'])
scraper.save('AmazonIn-search')
Exemple #5
0
#!/usr/bin/env python
# coding: utf-8


from autoscraper import AutoScraper
amazon_url = "https://www.amazon.in/s?i=aps&k=iphone"

wanted_list = ["New Apple iPhone 12 Pro Max (128GB) - Pacific Blue","₹1,25,900"]

scraper = AutoScraper()

result = scraper.build(amazon_url,wanted_list)

print(scraper.get_result_similar(amazon_url,grouped=True))




scraper.set_rule_aliases({"rule_1943":"Title",
                          "rule_1gc6":"MRP"})

scraper.keep_rules(["rule_1943","rule_1gc6"])
scraper.save("amazon_search")






Exemple #6
0
"""# Creating scraper object"""

scraper=AutoScraper()
result=scraper.build(amazon_url,wanted_list)

"""# Finding similar data"""

data = scraper.get_result_similar(amazon_url,grouped=True)
print(data)

keys = list(data.keys())
print(keys)

"""# Defining alias"""

scraper.set_rule_aliases({str(keys[0]):'ImageUrl',str(keys[2]):'Title',str(keys[-2]):'Price',str(keys[-1]):'Reviews'})

scraper.save("amazon_in.json")

"""# Testing for other search word"""

amazon_scraper = AutoScraper()
amazon_scraper.load('amazon_in.json')

search = "samsung phones"
amazon_url="https://www.amazon.in/s?k={}&s=price-desc-rank".format(search)

data = amazon_scraper.get_result_similar(amazon_url, group_by_alias=True)
search_data = tuple(zip(data['Title'],data['ImageUrl'],data['Price'],data['Reviews']))

df = pd.DataFrame(columns=['Query','Title','Price','Reviews','ImageUrl'])