예제 #1
0
 def __init__(self, sqs_queue, batch_size=1):
     settings = common.get_settings()
     self.batch_size = batch_size
     conn = boto.sqs.connect_to_region(settings['region'])
     self.queue = conn.create_queue(settings[sqs_queue])
     self.queue.set_message_class(boto.sqs.message.RawMessage)
     self.local_cache = []
예제 #2
0
import requests
import time
import random
from retrying import retry
import logging

from jetcomcrawl.libs import common


USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"
DELAY = common.get_settings()["request_delay"]


@retry(stop_max_attempt_number=5, wait_exponential_multiplier=2000, wait_exponential_max=20000)
def get(url):
    delay()
    headers = {"user-agent": USER_AGENT}
    resp = requests.get(url, headers=headers, timeout=30)
    assert resp.status_code == 200
    return resp


def delay():
    val = random.uniform(0, DELAY)
    logging.info("Sleeping {}s".format(val))
    time.sleep(val)


class Session(object):
    def __init__(self):
        self.s = requests.Session()
예제 #3
0
 def __init__(self):
     settings = common.get_settings()
     conn = boto.dynamodb2.connect_to_region('us-west-2')
     self.table = boto.dynamodb2.table.Table(settings['dynamodb_table'], connection=conn)