コード例 #1
0
def get_pagina_result(url, link):
    PAGINA_scraper = AutoScraper()
    PAGINA_scraper.load('./' + url + '-search.json')
    result = PAGINA_scraper.get_result_similar(link,
                                               unique=True,
                                               group_by_alias=True)
    return _aggregate_result(result)
コード例 #2
0
 def auto(self, url, model_name='1'):
     scraper = AutoScraper()
     scraper.load(model_name)
     html_ = requests.get(url)
     html_.encoding = html_.apparent_encoding
     html_ = html_.text
     data = scraper.get_result_similar(url, html=html_, group_by_alias=True)
     return data
コード例 #3
0
from autoscraper import AutoScraper
import pandas as pd

pd.set_option('display.max_rows', None)

tickers = ['SCHB', 'AMZN', 'AAPL', 'MSFT', 'TSLA', 'AMD', 'NFLX']

scraper = AutoScraper()
scraper.load('../finviz_table')

for ticker in tickers:
    url = f'https://finviz.com/quote.ashx?t={ticker}'
    result = scraper.get_result(url)[0]

    index = result.index('Index')
    df = pd.DataFrame(zip(result[index:], result[:index]),
                      columns=['Attributes', 'Values'])

    print(f'\n{ticker} Data: ')
    print(df.set_index('Attributes'))
コード例 #4
0
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 24 12:29:21 2021

@author: win10
"""
from autoscraper import AutoScraper
from flask import Flask, request

amazon_scraper = AutoScraper()
amazon_scraper.load('amazon-search')
app = Flask(__name__)


def get_amazon_result(search_query):
    url = 'https://www.amazon.in/s?k=%s' % search_query
    result = amazon_scraper.get_result_similar(url, group_by_alias=True)
    return _aggregate_result(result)


def _aggregate_result(result):
    final_result = []
    print(list(result.values())[0])
    for i in range(len(list(result.values())[0])):
        try:

            final_result.append({alias: result[alias][i] for alias in result})
        except:
            pass
    return final_result
コード例 #5
0
ファイル: api_server.py プロジェクト: umunusb1/tutorials-1
from autoscraper import AutoScraper
from flask import Flask, request

ebay_scraper = AutoScraper()
etsy_scraper = AutoScraper()
ebay_scraper.load('ebay-search')
etsy_scraper.load('etsy-search')
app = Flask(__name__)


def get_ebay_result(search_query):
    url = 'https://www.ebay.com/sch/i.html?_nkw=%s' % search_query
    result = ebay_scraper.get_result_similar(url, group_by_alias=True)
    return _aggregate_result(result)


def get_etsy_result(search_query):
    url = 'https://www.etsy.com/search?q=%s' % search_query
    result = etsy_scraper.get_result_similar(url, group_by_alias=True)
    result['url'] = [
        f'https://www.etsy.com/listing/{i}' for i in result['url']
    ]
    return _aggregate_result(result)


def _aggregate_result(result):
    final_result = []
    for i in range(len(list(result.values())[0])):
        final_result.append({alias: result[alias][i] for alias in result})
    return final_result
コード例 #6
0
ファイル: demo.py プロジェクト: raphaelmansuy/autoscraperdemo
from autoscraper import AutoScraper

# AutoScraper must be installed with
#  pip install git+https://github.com/alirezamika/autoscraper.git

question = "france"
time = "year"
url = f"https://www.quora.com/search?q={question}&time={time}"
model_name = "model_quora"

scraper = AutoScraper()
scraper.load(f"./{model_name}")
results = scraper.get_result_similar(url)

# if no results
if results:
    for r in results:
        print(r)
else:
    print("No result found")
コード例 #7
0
ファイル: app.py プロジェクト: vitthalkcontact/WEBSCRAPPING
from flask import Flask, render_template, request
from autoscraper import AutoScraper
import pandas as pd
import time

app = Flask(__name__)

#creating object and loading
amazon_scraper = AutoScraper()
amazon_scraper.load('amazon_in.json')


@app.route("/", methods=['GET'])
def home():

    #when user search it
    if request.args.get('search'):
        #inputs
        search = request.args.get('search')
        sortby = request.args.get('sortby', 'relevanceblender')

        #call function to retrieve data
        search_data, original_url = searchquery(search, sortby)
        data_length = len(search_data)

        #show to user
        return render_template("index.html",
                               data={
                                   'original_url': original_url,
                                   'query': search,
                                   'sortby': sortby,
コード例 #8
0
from autoscraper import AutoScraper

# Create the model
url = 'https://medium.com/@inzaniak'
wanted_list = [
    "Build a Web Scraping Python Project from Start to Finish",
    "5 things you need to learn as a Python beginner"
]

scraper = AutoScraper()
result = scraper.build(url, wanted_list)
print(result)

# Save the model
scraper.save('scrapers/medium.json')

# Load the model
del scraper
scraper = AutoScraper()
scraper.load('scrapers/medium.json')
scraper.get_result_similar(url)
コード例 #9
0
from autoscraper import AutoScraper
from flask import Flask, request, escape

flipkart_scraper = AutoScraper()
flipkart_scraper.load('flipkart-search')
app = Flask(__name__)


def get_flipkart_result(search_query):
    url = 'https://www.flipkart.com/search?q=%s&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off' % search_query

    result = flipkart_scraper.get_result_similar(url, group_by_alias=True)
    return _aggregate_result(result)


def _aggregate_result(result):
    final_result = []
    print(list(result.values())[0])
    print(list(result.values())[1])
    for alias in result:
        print(alias)
    for i in range(len(list(result.values())[0])):
        try:

            final_result.append({alias: result[alias][i] for alias in result})
        except:
            pass
    return final_result


@app.route('/', methods=['GET'])
コード例 #10
0
data_dir = '/home/pi/Desktop/scraper/'
scrapeRecord = os.path.join(data_dir,'Results.txt') # location to save records of file send attempts for troubleshooting

scraperitemFavorites = AutoScraper()
scrapershopSales= AutoScraper()
scraperlastSale= AutoScraper()
scraperbestSeller= AutoScraper()
scraperstars= AutoScraper()
scraperitemReviews= AutoScraper()
scrapershopReviews= AutoScraper()
scrapershopAge= AutoScraper()
#scrapertotalItems= AutoScraper()
scraperprice= AutoScraper()

scrapershopSales.load('scrapershopSales')
scraperlastSale.load('scraperlastSale')
scraperbestSeller.load('scraperbestSeller')
scraperstars.load('scraperstars')
scraperitemReviews.load('scraperitemReviews')
scrapershopReviews.load('scrapershopReviews')
scrapershopAge.load('scrapershopAge')
#scrapertotalItems.load('scrapertotalItems')
scraperprice.load('scraperprice')


keyword1 = '3d'
keyword2 = 'print'
#keyword3 = ' '
pageNum  = 1 #search results page number
コード例 #11
0
ファイル: api_server.py プロジェクト: ezeellena/AutoScraper
import json

from autoscraper import AutoScraper
from flask import Flask, request, render_template

PAGINA_scraper = AutoScraper()
PAGINA_scraper.load('./rosario3-search')
app = Flask(__name__)


def get_pagina_result(url, link):
    PAGINA_scraper = AutoScraper()
    PAGINA_scraper.load('./' + url + '-search')
    result = PAGINA_scraper.get_result_similar(link, group_by_alias=True)
    return _aggregate_result(result)


def _aggregate_result(result):
    final_result = []
    for i in range(len(list(result.values())[0])):
        final_result.append({alias: result[alias][i] for alias in result})
    return final_result


@app.route('/AutoScraper', methods=['GET'])
def autoscraper(Link=None, Metodo=None):
    url = request.args["Link"]
    link = request.args["Link"]
    Metodo = request.args["Metodo"]
    wanted_list = [Metodo]
    scraper = AutoScraper()
コード例 #12
0
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 24 12:29:21 2021

@author: win10
"""
from autoscraper import AutoScraper
from flask import Flask, request

amazon_scraper = AutoScraper()
amazon_scraper.load('AmazonIn-search')
app = Flask(__name__)


def get_amazon_result(search_query):
    url = 'https://www.amazon.in/s?k=%s' % search_query
    result = amazon_scraper.get_result_similar(url, group_by_alias=True)
    return _aggregate_result(result)


def _aggregate_result(result):
    final_result = []
    print(list(result.values())[0])
    for i in range(len(list(result.values())[0])):
        try:

            final_result.append({alias: result[alias][i] for alias in result})
        except:
            pass
    return final_result