def get_pagina_result(url, link): PAGINA_scraper = AutoScraper() PAGINA_scraper.load('./' + url + '-search.json') result = PAGINA_scraper.get_result_similar(link, unique=True, group_by_alias=True) return _aggregate_result(result)
def auto(self, url, model_name='1'): scraper = AutoScraper() scraper.load(model_name) html_ = requests.get(url) html_.encoding = html_.apparent_encoding html_ = html_.text data = scraper.get_result_similar(url, html=html_, group_by_alias=True) return data
from autoscraper import AutoScraper import pandas as pd pd.set_option('display.max_rows', None) tickers = ['SCHB', 'AMZN', 'AAPL', 'MSFT', 'TSLA', 'AMD', 'NFLX'] scraper = AutoScraper() scraper.load('../finviz_table') for ticker in tickers: url = f'https://finviz.com/quote.ashx?t={ticker}' result = scraper.get_result(url)[0] index = result.index('Index') df = pd.DataFrame(zip(result[index:], result[:index]), columns=['Attributes', 'Values']) print(f'\n{ticker} Data: ') print(df.set_index('Attributes'))
# -*- coding: utf-8 -*- """ Created on Sat Apr 24 12:29:21 2021 @author: win10 """ from autoscraper import AutoScraper from flask import Flask, request amazon_scraper = AutoScraper() amazon_scraper.load('amazon-search') app = Flask(__name__) def get_amazon_result(search_query): url = 'https://www.amazon.in/s?k=%s' % search_query result = amazon_scraper.get_result_similar(url, group_by_alias=True) return _aggregate_result(result) def _aggregate_result(result): final_result = [] print(list(result.values())[0]) for i in range(len(list(result.values())[0])): try: final_result.append({alias: result[alias][i] for alias in result}) except: pass return final_result
from autoscraper import AutoScraper from flask import Flask, request ebay_scraper = AutoScraper() etsy_scraper = AutoScraper() ebay_scraper.load('ebay-search') etsy_scraper.load('etsy-search') app = Flask(__name__) def get_ebay_result(search_query): url = 'https://www.ebay.com/sch/i.html?_nkw=%s' % search_query result = ebay_scraper.get_result_similar(url, group_by_alias=True) return _aggregate_result(result) def get_etsy_result(search_query): url = 'https://www.etsy.com/search?q=%s' % search_query result = etsy_scraper.get_result_similar(url, group_by_alias=True) result['url'] = [ f'https://www.etsy.com/listing/{i}' for i in result['url'] ] return _aggregate_result(result) def _aggregate_result(result): final_result = [] for i in range(len(list(result.values())[0])): final_result.append({alias: result[alias][i] for alias in result}) return final_result
from autoscraper import AutoScraper # AutoScraper must be installed with # pip install git+https://github.com/alirezamika/autoscraper.git question = "france" time = "year" url = f"https://www.quora.com/search?q={question}&time={time}" model_name = "model_quora" scraper = AutoScraper() scraper.load(f"./{model_name}") results = scraper.get_result_similar(url) # if no results if results: for r in results: print(r) else: print("No result found")
from flask import Flask, render_template, request from autoscraper import AutoScraper import pandas as pd import time app = Flask(__name__) #creating object and loading amazon_scraper = AutoScraper() amazon_scraper.load('amazon_in.json') @app.route("/", methods=['GET']) def home(): #when user search it if request.args.get('search'): #inputs search = request.args.get('search') sortby = request.args.get('sortby', 'relevanceblender') #call function to retrieve data search_data, original_url = searchquery(search, sortby) data_length = len(search_data) #show to user return render_template("index.html", data={ 'original_url': original_url, 'query': search, 'sortby': sortby,
from autoscraper import AutoScraper # Create the model url = 'https://medium.com/@inzaniak' wanted_list = [ "Build a Web Scraping Python Project from Start to Finish", "5 things you need to learn as a Python beginner" ] scraper = AutoScraper() result = scraper.build(url, wanted_list) print(result) # Save the model scraper.save('scrapers/medium.json') # Load the model del scraper scraper = AutoScraper() scraper.load('scrapers/medium.json') scraper.get_result_similar(url)
from autoscraper import AutoScraper from flask import Flask, request, escape flipkart_scraper = AutoScraper() flipkart_scraper.load('flipkart-search') app = Flask(__name__) def get_flipkart_result(search_query): url = 'https://www.flipkart.com/search?q=%s&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off' % search_query result = flipkart_scraper.get_result_similar(url, group_by_alias=True) return _aggregate_result(result) def _aggregate_result(result): final_result = [] print(list(result.values())[0]) print(list(result.values())[1]) for alias in result: print(alias) for i in range(len(list(result.values())[0])): try: final_result.append({alias: result[alias][i] for alias in result}) except: pass return final_result @app.route('/', methods=['GET'])
data_dir = '/home/pi/Desktop/scraper/' scrapeRecord = os.path.join(data_dir,'Results.txt') # location to save records of file send attempts for troubleshooting scraperitemFavorites = AutoScraper() scrapershopSales= AutoScraper() scraperlastSale= AutoScraper() scraperbestSeller= AutoScraper() scraperstars= AutoScraper() scraperitemReviews= AutoScraper() scrapershopReviews= AutoScraper() scrapershopAge= AutoScraper() #scrapertotalItems= AutoScraper() scraperprice= AutoScraper() scrapershopSales.load('scrapershopSales') scraperlastSale.load('scraperlastSale') scraperbestSeller.load('scraperbestSeller') scraperstars.load('scraperstars') scraperitemReviews.load('scraperitemReviews') scrapershopReviews.load('scrapershopReviews') scrapershopAge.load('scrapershopAge') #scrapertotalItems.load('scrapertotalItems') scraperprice.load('scraperprice') keyword1 = '3d' keyword2 = 'print' #keyword3 = ' ' pageNum = 1 #search results page number
import json from autoscraper import AutoScraper from flask import Flask, request, render_template PAGINA_scraper = AutoScraper() PAGINA_scraper.load('./rosario3-search') app = Flask(__name__) def get_pagina_result(url, link): PAGINA_scraper = AutoScraper() PAGINA_scraper.load('./' + url + '-search') result = PAGINA_scraper.get_result_similar(link, group_by_alias=True) return _aggregate_result(result) def _aggregate_result(result): final_result = [] for i in range(len(list(result.values())[0])): final_result.append({alias: result[alias][i] for alias in result}) return final_result @app.route('/AutoScraper', methods=['GET']) def autoscraper(Link=None, Metodo=None): url = request.args["Link"] link = request.args["Link"] Metodo = request.args["Metodo"] wanted_list = [Metodo] scraper = AutoScraper()
# -*- coding: utf-8 -*- """ Created on Sat Apr 24 12:29:21 2021 @author: win10 """ from autoscraper import AutoScraper from flask import Flask, request amazon_scraper = AutoScraper() amazon_scraper.load('AmazonIn-search') app = Flask(__name__) def get_amazon_result(search_query): url = 'https://www.amazon.in/s?k=%s' % search_query result = amazon_scraper.get_result_similar(url, group_by_alias=True) return _aggregate_result(result) def _aggregate_result(result): final_result = [] print(list(result.values())[0]) for i in range(len(list(result.values())[0])): try: final_result.append({alias: result[alias][i] for alias in result}) except: pass return final_result