def setUpClass(cls): config = {} try: config['access_key'] = os.environ['AWS_ACCESS_KEY_ID'] config['secret_key'] = os.environ['AWS_SECRET_ACCESS_KEY'] config['associate_tag'] = os.environ['AWS_ASSOCIATE_TAG'] except: raise AssertionError(''' The following environment variables must be set: "AWS_ACCESS_KEY_ID" "AWS_SECRET_ACCESS_KEY" "AWS_ASSOCIATE_TAG" ''') cls.amzn = AmazonScraper(MaxQPS=0.5, **config)
def initialize(prodId): amzn = AmazonScraper(acess_key, secret_key, customer_tag, Region='IN') p = amzn.lookup(ItemId=prodId) rs = amzn.reviews(ItemId=prodId) reviews, reviews_title = [], [] i = 1 for r in rs: fr = r.full_review() print_review(fr.title, fr.text, i) reviews.append(fr.text) reviews_title.append(fr.title) i += 1 prodName = p.title for x in range(len(prodName)): string = list(prodName) if string[x] == '.' or string[x] == '/': string[x] = '-' prodName = ''.join(string) return reviews, reviews_title, prodName
return True auth_args = [AMZ_ACCESS_KEY, AMZ_SECRET_KEY, AMZ_ASSOC_TAG] auth_kwargs = { 'Region': 'CN', 'MaxQPS': 0.9, 'Timeout': 5.0, 'ErrorHandler': error_handler} # region_options = bottlenose.api.SERVICE_DOMAINS.keys() amz_product = AmazonAPI(*auth_args, **auth_kwargs) amz_scraper = AmazonScraper(*auth_args, **auth_kwargs) amz_nose = bottlenose.Amazon( Parser=lambda text: BeautifulSoup(text, 'xml'), *auth_args, **auth_kwargs) def print_products(products): # product.featrues: List: 商品详情 with open('result.txt', 'w') as f: for i, product in enumerate(products): line = "{0}. '{1}'".format(i, product.title.encode('utf8')) print(line) f.write(line + '\n')
def main(num_items, heading_level, args): """Main routine""" # Retrieve the contents of the API key file apikey = get_api_config('.amznrc') # Create AmazonScraper object using API key amznscpr = AmazonScraper(*apikey) # Check keyword list entered on the command line if len(args) < 1: print('Missing search terms. For usage help: python amznsrch.py -h') sys.exit(1) # Loop through quoted lists of search terms from command line arguments for arg in args: # Print search terms as a markdown heading srch_terms = str(arg) if heading_level > 0 and heading_level < 7: print '\n' + '#' * heading_level + ' ' + srch_terms + '\n' # Fetch and return results for item in itertools.islice( amznscpr.search(Keywords=srch_terms, SearchIndex='Books'), num_items): # Skip if no title, else encode, remove parenthetical text, & quote if not item.title: continue else: bktitle = item.title.encode('utf8') bktitle = re.sub('\s*[(\[].*[)\]]', '', bktitle) bktitlesrch = urllib.quote_plus('"' + bktitle + '"') # Encode author, if present, and format for printing if not item.author: bkauthor = '' else: bkauthor = 'by ' + item.author.encode('utf8') # Add associate tag to item URL bkurl = str(item.url) + '/?tag=' + apikey[2] # Construct links as desired amzn = '[AMZN](' + bkurl + ')' goog = ('[GOOG]' + '(https://www.google.com/' + 'search?tbo=p&tbm=bks&q=intitle:' + bktitlesrch + '&num=10&gws_rd=ssl)') spl = ('[SPL](https://seattle.bibliocommons.com/search?' + 't=title&search_category=title&q=' + bktitlesrch + '&commit=Search)') uwl = ('[UW](http://alliance-primo.hosted.exlibrisgroup.com/' + 'primo_library/libweb/action/search.do?fn=search&' + 'ct=search&vid=UW&vl%28753972432UI0%29=title&' + 'vl%281UIStartWith0%29=starts+with&vl%28freeText0%29=' + bktitlesrch + '&Submit=Search)') # Searching UW Libraries through WorldCat to be deprecated 2015-09 #uwl = ('[UW](http://uwashington.worldcat.org' + # '/search?q=ti%3A' + bktitlesrch + '&qt=advanced)') # Print markdown for title, author, and links as bulleted list item print('- _' + bktitle + '_ ' + bkauthor + ' ( ' + goog + ' | ' + amzn + ' | ' + spl + ' | ' + uwl + ' )')
time.sleep(random.expovariate(0.1)) return True # Amazon api 验证资料。 AUTH_ARGS = [AMZ_ACCESS_KEY, AMZ_SECRET_KEY, AMZ_ASSOC_TAG] # Amazon api 请求设置。 AUTH_KWARGS = { 'Region': 'CN', 'MaxQPS': 0.9, 'Timeout': 5.0, 'ErrorHandler': error_handler } amz_product = AmazonAPI(*AUTH_ARGS, **AUTH_KWARGS) amz_scraper = AmazonScraper(*AUTH_ARGS, **AUTH_KWARGS) amz_nose = bottlenose.Amazon(Parser=lambda text: BeautifulSoup(text, 'xml'), *AUTH_ARGS, **AUTH_KWARGS) class AmazonLookupItem(object): # Wrap all the useful api from AmazonAPI and add some new. def __init__(self, asin): amz = AmazonAPI(*AUTH_ARGS, **AUTH_KWARGS) print('\n>>> Parsing item %s from api...' % asin) self.item_api = amz.lookup(ItemId=asin) print('Done.\n') @property def is_prime(self):
def main(): validate_args() scrapper = AmazonScraper('products.yml') print('===============================INICIO===============================') process_search(scrapper) print('===============================FIN===============================')
import requests, time, bottlenose, math, urllib, csv from bs4 import BeautifulSoup from amazon_scraper import AmazonScraper # https://github.com/adamlwgriffiths/amazon_scraper from decimal import Decimal from app.categories import * from app_config import * # Amazon scraper + Amazon API wrapper amazon = AmazonScraper(app.config['AMZ_API_KEY'], app.config['AMZ_API_SECRET'], app.config['AMZ_ASSOCIATE']) # Access Raw Amazon XML Response amazon_raw = bottlenose.Amazon(app.config['AMZ_API_KEY'], app.config['AMZ_API_SECRET'], app.config['AMZ_ASSOCIATE']) def upc_to_asin(upc): time.sleep(1) p = amazon.lookup(ItemId=upc, IdType='UPC', SearchIndex='All') if type(p) != list: asin = [p.asin] else: asin = [] count = 0 while count <= len(p) - 1: asin.append(p[count].asin) count += 1 return (asin)
import requests import re import json from textblob import TextBlob import itertools import pickle # Disable request waring from requests.packages.urllib3.exceptions import InsecureRequestWarning requests.packages.urllib3.disable_warnings(InsecureRequestWarning) from amazon_scraper import AmazonScraper amzn = AmazonScraper("AKIAJ5G4TDSHO2D54APQ", "DkMW4edxLB91MGcnDhChkciqj2XumqlySi9yOhT6", "beproject0d-20", Region='IN', MaxQPS=0.9, Timeout=5.0) app = Flask(__name__) mysql = MySQL() app.config['MYSQL_DATABASE_USER'] = '******' app.config['MYSQL_DATABASE_PASSWORD'] = '******' app.config['MYSQL_DATABASE_DB'] = 'review_data' app.config['MYSQL_DATABASE_HOST'] = 'localhost' mysql.init_app(app) conn = mysql.connect() cursor = conn.cursor() asin_regex = r'/([A-Z0-9]{10})' isbn_regex = r'/([0-9]{10})' def get_amazon_item_id(url):
def setUp(self): args = {'asin': 'a'} self.amazon_scraper = AmazonScraper(**args)
resp = requests.get('http://icanhazip.com') print "My current IP address:", resp.content.strip() AUTH = requests.auth.HTTPProxyAuth('manutd0707', 'manutd0707') PROXIES = {'http': 'http://us-dc.proxymesh.com:31280'} resp = requests.get('http://icanhazip.com', proxies=PROXIES, auth=AUTH, verify=False) print "My new IP address via ProxyMesh:", resp.content.strip() AMAZON_ACCESS_KEY = "AMAZON_ACCESS_KEY" AMAZON_SECRET_KEY = "AMAZON_SECRET_KEY" AMAZON_ASSOCIATE_TAG = "AMAZON_ASSOCIATE_TAG" amzn = AmazonScraper(AMAZON_ACCESS_KEY, AMAZON_SECRET_KEY, AMAZON_ASSOCIATE_TAG) # You need 3 things for the above keys: AWS account (first two codes above), # Amazon Associates account (final code), and then you need to sign up to use # the Product Advertising API within the Associates account filename = "reviews_allinfo.csv" filename2 = "reviews_notext.csv" save_path = 'c:/output/' with open('product_ids.csv', 'rb') as f: csv_f = csv.reader(f) items = [row[0].strip() for row in csv_f] for number in items: