def __init__(self, sqs_queue, batch_size=1): settings = common.get_settings() self.batch_size = batch_size conn = boto.sqs.connect_to_region(settings['region']) self.queue = conn.create_queue(settings[sqs_queue]) self.queue.set_message_class(boto.sqs.message.RawMessage) self.local_cache = []
import requests import time import random from retrying import retry import logging from jetcomcrawl.libs import common USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36" DELAY = common.get_settings()["request_delay"] @retry(stop_max_attempt_number=5, wait_exponential_multiplier=2000, wait_exponential_max=20000) def get(url): delay() headers = {"user-agent": USER_AGENT} resp = requests.get(url, headers=headers, timeout=30) assert resp.status_code == 200 return resp def delay(): val = random.uniform(0, DELAY) logging.info("Sleeping {}s".format(val)) time.sleep(val) class Session(object): def __init__(self): self.s = requests.Session()
def __init__(self): settings = common.get_settings() conn = boto.dynamodb2.connect_to_region('us-west-2') self.table = boto.dynamodb2.table.Table(settings['dynamodb_table'], connection=conn)