Ejemplo n.º 1
0
 def wait_all_requests_completed(self):
     if len(self.requests_to_send) == 0:
         return
     try:
         for resp, result in zip(erequests.map(self.requests_to_send), self.results):
             result.response = resp
     finally:
         self.requests_to_send = []
         self.results = []
Ejemplo n.º 2
0
    def test_hooks(self):
        result = {}

        def hook(r, **kwargs):
            result[r.url] = True
            return r

        reqs = [erequests.get_async(url, hooks={'response': [hook]}) for url in URLS]
        resp = list(erequests.map(reqs, size=N))
        self.assertEqual(sorted(result.keys()), sorted(URLS))
Ejemplo n.º 3
0
    def test_hooks(self):
        result = {}

        def hook(r, **kwargs):
            result[r.url] = True
            return r

        reqs = [erequests.get(url, hooks={'response': [hook]}) for url in URLS]
        resp = erequests.map(reqs, size=N)
        self.assertEqual(sorted(result.keys()), sorted(URLS))
Ejemplo n.º 4
0
 def wait_all_requests_completed(self):
     if len(self.requests_to_send) == 0:
         return
     try:
         for resp, result in zip(erequests.map(self.requests_to_send),
                                 self.results):
             result.response = resp
     finally:
         self.requests_to_send = []
         self.results = []
Ejemplo n.º 5
0
    def test_timeout(self):
        n = 5
        reqs = [erequests.get(httpbin('delay/3'), timeout=(i+1),
                              exception_handler=mark_exception)
                for i in range(n)]

        resp = erequests.map(reqs, size=n)
        self.assertListEqual(resp[0:3], [erequests.Timeout,
                                         erequests.Timeout,
                                         erequests.Timeout],
                             "First three requests should have timeout")
Ejemplo n.º 6
0
def api_async(params, token=None):
    """
    Takes the uris of the API to call
    """
    headers = {}
    keys = []
    urls = []

    for key, value in params.items():
        method, uri, get_vars = value
        keys.append(key)
        urls.append((method, build_url(uri, get_vars)))

    if token is not None:
        headers['Authorization'] = 'Bearer %s' % token
    else:
        encoded_consumer = b64encode('%s:%s' % (settings.MASTER_OAUTH_KEY, settings.MASTER_OAUTH_SECRET))
        headers['Authorization'] = 'Basic %s' % encoded_consumer

    reqs = (erequests.get(url, headers=headers) for method, url in urls)
    reqs_map = erequests.map(reqs)
    return dict(zip(keys, reqs_map))
Ejemplo n.º 7
0
def api_async(params, token=None):
    """
    Takes the uris of the API to call
    """
    headers = {}
    keys = []
    urls = []

    for key, value in params.items():
        method, uri, get_vars = value
        keys.append(key)
        urls.append((method, build_url(uri, get_vars)))

    if token is not None:
        headers['Authorization'] = 'Bearer %s' % token
    else:
        encoded_consumer = b64encode(
            '%s:%s' %
            (settings.MASTER_OAUTH_KEY, settings.MASTER_OAUTH_SECRET))
        headers['Authorization'] = 'Basic %s' % encoded_consumer

    reqs = (erequests.get(url, headers=headers) for method, url in urls)
    reqs_map = erequests.map(reqs)
    return dict(zip(keys, reqs_map))
Ejemplo n.º 8
0
 def test_concurrency_with_delayed_url(self):
     t = time.time()
     n = 10
     reqs = [erequests.get(httpbin('delay/1')) for _ in range(n)]
     resp = erequests.map(reqs, size=n)
     self.assertLess((time.time() - t), n)
Ejemplo n.º 9
0
 def get(self, url, **kwargs):
     return list(erequests.map([erequests.get_async(url, **kwargs)]))[0]
Ejemplo n.º 10
0
 def test_concurrency_with_delayed_url(self):
     t = time.time()
     n = 10
     reqs = [erequests.get_async(httpbin('delay/1')) for _ in range(n)]
     resp = list(erequests.map(reqs, size=n))
     self.assertLess((time.time() - t), n)
Ejemplo n.º 11
0
HTTPBIN_URL = os.environ.get('HTTPBIN_URL', 'http://httpbin.org/')

def httpbin(*suffix):
    """Returns url for HTTPBIN resource."""
    return HTTPBIN_URL + '/'.join(suffix)


N = 5
URLS = [httpbin('get?p=%s' % i) for i in range(N)]


class GrequestsCase(unittest.TestCase):

    def test_map(self):
        reqs = [erequests.async.get(url) for url in URLS]
        resp = erequests.map(reqs, size=N)
        self.assertEqual([r.url for r in resp], URLS)

    def test_imap(self):
        reqs = (erequests.async.get(url) for url in URLS)
        i = 0
        for i, r in enumerate(erequests.imap(reqs, size=N)):
            self.assertTrue(r.url in URLS)
        self.assertEqual(i, N - 1)

    def test_hooks(self):
        result = {}

        def hook(r, **kwargs):
            result[r.url] = True
            return r
Ejemplo n.º 12
0
            genotypes.append(individual_genotype)

    return genotypes


def get_list_of_rs(list_of_rs):
    """Return a list with all the Ensembl responses for a list of rs."""
    ensembl_uri = \
        "http://rest.ensembl.org/variation/human/{}" +\
        "?content-type=application/json;genotypes=1"

    # Create all the URL request strings
    list_of_urls = [ensembl_uri.format(_) for _ in list_of_rs]

    # Put every request in a pool (unsent)
    responses = (erequests.async.get(_) for _ in list_of_urls)

    # Send every request and get the JSON from each one.
    return [_.json() for _ in erequests.map(responses)]


def get_rs(rs_id):
    """Return the JSON for a given rs_id."""
    ensembl_uri = \
        "http://rest.ensembl.org/variation/human/{}".format(rs_id) +\
        "?content-type=application/json;genotypes=1"

    json_response = requests.get(ensembl_uri).json()

    return json_response
Ejemplo n.º 13
0
 def test_map(self):
     reqs = [erequests.get_async(url) for url in URLS]
     resp = erequests.map(reqs, size=N)
     self.assertEqual([r.url for r in resp], URLS)
Ejemplo n.º 14
0
db = Database()

#General logging stuff so it doesn't break
logger = logging.getLogger(__name__)	
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
#fh.setFormatter(formatter)	
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)
logger.info("Logger created")


url = 'http://www.bbc.co.uk/1/hi/england/hampshire/6245861.stm'
referer = 'http://www.bbc.co.uk/news/health-22855670'
urls = [url]

rs = (erequests.async.get(u) for u in urls)



for u in erequests.map(rs):	
	print '\n\n\n\n%s' % u.url
	#print db.singleValueSelectQuery('SELECT rowid FROM BBCPages WHERE Url = ?', [u.url])
		
	id = BBC.getIdByUrl(u.url)
	print "id: %s" % str(id)
	bbc = BBC(u, id, logger, referer)

Ejemplo n.º 15
0
			randy = random.randint(4,8)	
			logger.info("Processing links from %s.  About to sleep for %ds...." % (referer, randy))
			
			time.sleep(randy)
		else:
			logger.info("Processing links from %s.  Not sleeping because the last list was small/empty" % referer)
			#time.sleep(1)
		header={'Accept-Encoding':'gzip, deflate', 'User-Agent':'Scraper', 'Referer' : referer}
		
		print url_list[0]
		#print url_list[0]["%d|%s" % (id,referer)]		
		#list_not_empty = bool(url_list[0]["%d|%s" % (id,referer)])
		length = len(url_list[0]["%d|%s" % (id,referer)])
		print "List length: %d" % len(url_list[0]["%d|%s" % (id,referer)])
		rs = (erequests.async.get(u.split('|')[1], headers=header) for u in url_list[0]["%d|%s" % (id,referer)])		
		for u in erequests.map(rs):
			try:
				logger.info('Starting to process %s' % u.url)
				id = BBC.getIdByUrl(u.url)
				bbc = BBC(u, id, logger, referer)

				url_list.append({"%d|%s" % (id, bbc.url) : bbc.urls})		
			except Exception, e:
				#Just die and we can deal with it later
				logger.error(str(e))
		logger.info("Finished processing remaining URLs for %s" % referer)
		del url_list[0]		
		save_todo_list(url_list)


Ejemplo n.º 16
0
 def get(self, url, **kwargs):
     return erequests.map([erequests.get(url, **kwargs)])[0]
Ejemplo n.º 17
0
 def test_stream_enabled(self):
     r = erequests.map([erequests.get(httpbin('stream/10'))],
                       size=2, stream=True)[0]
     self.assertFalse(r._content_consumed)
Ejemplo n.º 18
0
 def test_map(self):
     reqs = [erequests.get(url) for url in URLS]
     resp = erequests.map(reqs, size=N)
     self.assertEqual([r.url for r in resp], URLS)
Ejemplo n.º 19
0
 def get(self, url, **kwargs):
     return list(erequests.map([erequests.async.get(url, **kwargs)]))[0]
Ejemplo n.º 20
0
 def test_calling_request(self):
     reqs = [erequests.request('POST', httpbin('post'), data={'p': i})
             for i in range(N)]
     resp = erequests.map(reqs, size=N)
     self.assertEqual([int(r.json()['form']['p']) for r in resp], list(range(N)))
Ejemplo n.º 21
0
 def test_calling_request(self):
     reqs = [erequests.request_async('POST', httpbin('post'), data={'p': i})
             for i in range(N)]
     resp = erequests.map(reqs, size=N)
     self.assertEqual([int(r.json()['form']['p']) for r in resp], list(range(N)))
Ejemplo n.º 22
0
 def test_stream_enabled(self):
     r = list(erequests.map([erequests.get_async(httpbin('stream/10'), stream=True)],
                       size=2))[0]
     self.assertFalse(r._content_consumed)
Ejemplo n.º 23
0
HTTPBIN_URL = os.environ.get('HTTPBIN_URL', 'http://httpbin.org/')


def httpbin(*suffix):
    """Returns url for HTTPBIN resource."""
    return HTTPBIN_URL + '/'.join(suffix)


N = 5
URLS = [httpbin('get?p=%s' % i) for i in range(N)]


class GrequestsCase(unittest.TestCase):
    def test_map(self):
        reqs = [erequests. async .get(url) for url in URLS]
        resp = erequests.map(reqs, size=N)
        self.assertEqual([r.url for r in resp], URLS)

    def test_imap(self):
        reqs = (erequests. async .get(url) for url in URLS)
        i = 0
        for i, r in enumerate(erequests.imap(reqs, size=N)):
            self.assertTrue(r.url in URLS)
        self.assertEqual(i, N - 1)

    def test_hooks(self):
        result = {}

        def hook(r, **kwargs):
            result[r.url] = True
            return r