def test_amazonscraper_csv_header():

    products = amazonscraper.search(
                                keywords="Python",
                                max_product_nb=1)
    assert "Product title,Rating,Number of customer reviews,Product URL,\
ASIN\n" in str(products.csv())
Beispiel #2
0
def main():
    product = sys.argv[1]  #product name
    choice = sys.argv[2]
    num = int(choice)  # how many items you want
    results = amazonscraper.search(product, max_product_nb=num)
    lst = list(results)
    lst.sort(key=lambda item: item.rating, reverse=True)
    top_ten = lst[0:9]

    # print(top_ten)

    for result in top_ten:

        # print("{}".format(result.title))
        # print("  - ASIN : {}".format(result.asin))
        # print("  - {} out of 5 stars, {} customer reviews".format(result.rating, result.review_nb))
        # print("  - {}".format(result.url))
        # print("  - Image : {}".format(result.img))
        # print()

        with open("result.txt", 'w+') as file:
            file.write("{}\n".format(result.title))
            file.write("  - ASIN : {}\n".format(result.asin))
            file.write("  - {} out of 5 stars, {} customer reviews\n".format(
                result.rating, result.review_nb))
            file.write("- Reviews:\n")
            data = getReviews(result.url)
            headerlists = data['headerlists']
            reviewlists = data['reviewlists']
            for i in range(len(headerlists)):
                print("hey:" + headerlists[i])
                file.write("Review " + str(i) + "{}".format(headerlists[i]))
                file.write("\n + {}\n\n\n".format(reviewlists[i]))
def test_amazonscraper_not_satisfied_url():
    url = "https://raw.githack.com/tducret/\
amazon-scraper-python/master/test/not_satisfied.html"

    products = amazonscraper.search(search_url=url,
                                    max_product_nb=_MAX_PRODUCT_NB)
    assert len(products) == 0
def test_amazonscraper_get_100_products():

    products = amazonscraper.search(
                                keywords="Python",
                                max_product_nb=100)

    assert len(products) == 100
def test_amazonscraper_get_products_with_keywords():

    products = amazonscraper.search(
                                keywords="Python",
                                max_product_nb=_MAX_PRODUCT_NB)

    assert len(products) == _MAX_PRODUCT_NB
Beispiel #6
0
def main(keywords, url, csvseparator, maxproductnb, outputhtml):
    """ Search for products on Amazon, and extract it as CSV """
    products = amazonscraper.search(
                                    keywords=keywords,
                                    search_url=url,
                                    max_product_nb=maxproductnb)

    print(products.csv(separator=csvseparator))

    if (outputhtml != ""):
        with open(outputhtml, "w") as f:
            f.write(products.last_html_page)
Beispiel #7
0
def test_amazonscraper_get_products_with_url():
    url = "https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=python"
    products = amazonscraper.search(search_url=url,
                                    max_product_nb=_MAX_PRODUCT_NB)

    assert isinstance(products, amazonscraper.Products)
    assert len(products) == _MAX_PRODUCT_NB
    product = products[0]
    assert isinstance(product, amazonscraper.Product)
    assert product.title != ""
    assert product.review_nb != ""
    assert product.rating != ""
    assert product.url != ""
Beispiel #8
0
def retrieveContent(keyword):

    results = amazonscraper.search(keyword)

    for result in results:
        print("{}".format(result.title))
        print("  - ASIN : {}".format(result.asin))
        print("  - {} out of 5 stars, {} customer reviews".format(result.rating, result.review_nb))
        print("  - {}".format(result.url))
        print("  - Image : {}".format(result.img))
        print()

    print("Number of results : %d" % (len(results)))
Beispiel #9
0
def posting():
    if request.method == 'POST':
        component = str(request.form['component'])
        product = str(request.form['product'])
        search = int(request.form['max'])

        print(component)
        print(product)
        print(search)

        try:
            results = amazonscraper.search(product, max_product_nb=search)
        except Exception as e:
            print(e)

        if results is None:
            print("over")
            return 0

        for result in results:
            code_no = None
            if result.asin is None:
                code_no = '123456'
            else:
                code_no = result.asin

            supply_rate1 = result.rating
            component_rate1 = ((result.rating) /
                               (result.rating + 1)) * supply_rate1
            price1 = result.prices_main
            supply1 = get_title(result.url)
            #print(s)
            print(supply_rate1, component_rate1, price1, supply1, component,
                  code_no)
            pipe_to_db(component, supply1, code_no, price1, supply_rate1,
                       component_rate1)

            db = pymysql.connect("localhost", "root", "password", "DELL")
            cursor = db.cursor()
            cursor.execute('SELECT * FROM DELLDB ORDER BY SNO DESC LIMIT 10')
            nested_data = cursor.fetchall()
            #db.commit()
            db.close()

        #return render_template('index.html')
        return render_template('index2.html', nested_data=nested_data)
Beispiel #10
0
def get_rss_amazon(key_words):
    store = file.Storage('token.json')
    creds = store.get()
    if not creds or creds.invalid:
        flow = client.flow_from_clientsecrets('credentials.json', SCOPES)
        creds = tools.run_flow(flow, store)
    service = build('drive', 'v3', http=creds.authorize(Http()))
    title_list = []
    list_rating = []
    list_review = []
    list_url = []
    for i in key_words:
        results = list(amazonscraper.search(i))
        print(i)
        print(results)
        for result in results:
            title_list.append(result.title)
            list_rating.append(result.rating)
            list_review.append(result.review_nb)
            list_url.append(result.url)

    result = zip(title_list, list_rating, list_review, list_url)
    feed = feedgenerator.Rss201rev2Feed(title="all events",
                                        link="https://www.amazon.com/",
                                        description="New in amazon",
                                        language="en")
    for info in result:
        feed.add_item(title=info[0],
                      link=info[3],
                      description=info[1],
                      unique_id='no')
    with open('rss_by_keywords_amazon.rss', 'w') as fp:
        feed.write(fp, 'utf-8')
    file_metadata = {'name': 'rss_by_keywords_amazon.rss'}
    media = MediaFileUpload('rss_by_keywords_amazon.rss',
                            mimetype='text/plain',
                            resumable=True)
    fili = service.files().create(body=file_metadata,
                                  media_body=media,
                                  fields='id').execute()
    o = feed.writeString('utf-8')
    soup = BeautifulSoup(o, "xml")
    soup = soup.prettify()
    with open('templates/rss_by_keywords_amazon.rss', 'w') as fp:
        fp.write(str(soup))
def main(keywords, url, csvseparator, maxproductnb, outputhtml):
    """ Search for products on Amazon, and extract it as CSV """
    # products = amazonscraper.search(
    #                                 keywords="2060",
    #                                 search_url=url,
    #                                 max_product_nb=100)

    # ORIGINALE
    # """ Search for products on Amazon, and extract it as CSV """
    products = amazonscraper.search(keywords=keywords,
                                    search_url=url,
                                    max_product_nb=maxproductnb)

    print(products.csv("estrazione.csv", separator=csvseparator))

    if (outputhtml != ""):
        with open(outputhtml, "w") as f:
            f.write(products.last_html_page)
def test_amazonscraper_sign_in_suggestion_url():
    # or https://www.amazon.com/ref=assoc_res_sw_logo
    url = "https://www.amazon.com/gp/aw/ref=mw_access"
    products = amazonscraper.search(search_url=url,
                                    max_product_nb=_MAX_PRODUCT_NB)
    assert len(products) == 0
def test_amazonscraper_invalid_url():
    url = "https://0.0.0.0"
    with pytest.raises(Exception):
        amazonscraper.search(search_url=url, max_product_nb=_MAX_PRODUCT_NB)
def test_amazonscraper_csv_header():
    products = amazonscraper.search(keywords="Python", max_product_nb=1)
    products.csv('test.csv')
    with open('test.csv') as f:
        csv_str = f.read()
    assert "title,rating,review_nb,img,url,asin,prices_per_unit,units,prices_main" in csv_str
Beispiel #15
0
import amazonscraper
results = amazonscraper.search("", max_product_nb=2)


def get_title(url):
    ctr = 0
    for i in range(len(url)):
        if url[i] == "/":
            ctr += 1
        if ctr == 3:
            url_new = url[i + 1:]
            break
    for j in range(len(url_new)):
        if url_new[j] == "/":
            url_new = url_new[:j]
            break
    url_new.replace("-", " ")
    return url_new


with open("test.html", "w") as f:
    f.write(results.last_html_page)
for result in results:
    print("{}".format(result.title))
    print("  - ASIN : {}".format(result.asin))
    print("  - {} out of 5 stars, {} customer reviews".format(
        result.rating, result.review_nb))
    print(" link - {}".format(result.url))
    print("  - Image : {}".format(result.img))
    print("  - prices : {}".format(result.prices_main))
    print("  - title : {}".format(result.title))
Beispiel #16
0
import amazonscraper

results = amazonscraper.search("Python programming", max_product_nb=2)

for result in results:
    print("{}".format(result.title))
    print("  - ASIN : {}".format(result.asin))
    print(
        "  - {} out of 5 stars, {} customer reviews".format(
            result.rating, result.review_nb
        )
    )
    print("  - {}".format(result.url))
    print("  - Image : {}".format(result.img))
    print()

print("Number of results : %d" % (len(results)))
Beispiel #17
0
def main(category_list):

    logger = logging.getLogger(__name__)
    time = str(strftime("%Y-%m-%d %H:%M:%S", localtime()))
    output_dir = "C:/Users/Tan Ye Kai/Documents/Uni work/Y1S2/HackNroll/Scraper/output"

    # category_list_path = "C:/Users/Tan Ye Kai/Documents/Uni work/Y1S2/HackNroll/Scraper/data/categories.csv"
    # category_list = global_func.read_csv(category_list_path)

    # category_list = ['Sweatpants']

    for dic in category_list:

        new_output_dir = global_func.make_directory(output_dir, "files")
        image_dir = global_func.make_directory(output_dir, "images")

        category = dic["category_name"]
        category_id = dic["id"]
        new_output_dir = global_func.make_directory(new_output_dir, category)

        if (len(category_list) > 0):
            print(time + ": Number of categories in list: {}".format(
                len(category_list)))
        else:
            logger.error("Category list not extracted")

        print(time + ": Scraping.....")
        results = amazonscraper.search(category, max_product_nb=50)

        lorem = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."

        for result in results:
            # print(result)
            json_object = json_format.initialise_job_roles()
            json_object["name"] = result.title
            json_object["description"] = lorem
            json_object["category_id"] = category_id
            json_object["provider"] = "Amazon"

            price = cleanup_price(result.price)
            json_object["price"] = price

            image = cleanup_img(result.img)
            json_object["image"] = image

            # print(result.url)
            json_object["url"] = result.url

            if (result.rating):
                rating = convert_to_percentage(float(result.rating))
                json_object["rating"] = rating

            # print("{}".format(result.title))
            # print("  - ASIN : {}".format(result.asin))
            # print("  - {} out of 5 stars, {} customer reviews".format(result.rating, result.review_nb))
            # print("  - {}".format(result.url))
            # print("  - Image : {}".format(result.img))
            # print("  - Price : {}".format(result.price))
            # print()

            title = (result.title[:40] +
                     '') if len(result.title) > 50 else result.title
            title = title.replace(" ", "_")
            title = re.match("^[a-zA-Z0-9_]*$", title)

            if (title):
                filename, file_extension = os.path.splitext(image)
                img_path = global_func.make_filepath(image_dir, filename,
                                                     file_extension)
                if (global_func.download_img(result.img, img_path)):
                    title = title.group(0)
                    file_path = global_func.make_filepath(
                        new_output_dir, title, ".json")
                    global_func.make_json(file_path, json_object)

            else:
                pass

    return new_output_dir
Beispiel #18
0
import amazonscraper

results = amazonscraper.search("corsair")

for result in results:
    print("{}".format(result.title))
    print("  - ASIN : {}".format(result.asin))
    print("  - {} out of 5 stars, {} customer reviews".format(
        result.rating, result.review_nb))
    print("  - {}".format(result.url))
    print("  - Image : {}".format(result.img))
    print()

print("Number of results : %d" % (len(results)))