Example #1
0
    def _get_raw_data(self):
        docktor_config = providers_config.providers['docktor']
        apps = []
        session = FuturesSession(max_workers=CONCURRENT_JOBS_LIMIT)
        session.mount('https://', self.__requests_http_adapter)
        session.mount('http://', self.__requests_http_adapter)
        for stage in docktor_config:
            for zone in docktor_config[stage]:
                apps_uri = '{uri}/apps/'.format(uri=docktor_config[stage][zone]['uri'])
                try:
                    r = session.get(apps_uri, timeout=REQUEST_TIMEOUT).result()
                    r.raise_for_status()
                    apps_list = r.json()
                except ValueError as e:
                    logger.error("Non json response {} from {}-{} docktor".format(r.content, stage, zone))
                    raise e
                except Exception as e:
                    logger.error("Exception raised on {}-{} docktor".format(stage, zone))
                    raise e

                future_apps_details = [session.get('{apps_uri}{app}'.format(apps_uri=apps_uri, app=app), timeout=REQUEST_TIMEOUT) for app in apps_list]

                try:
                    apps_details = [a.result() for a in future_apps_details]
                except Exception as e:
                    logger.error("Exception raised on {}-{} docktor".format(stage, zone))
                    raise e

                partial_get_app_info = partial(self.get_app_info, stage, zone)

                apps.extend(map(lambda a: partial_get_app_info(a), apps_details))
        return apps
Example #2
0
class CustomStreamListener(tweepy.StreamListener):
  def __init__(self, socketio, track):
    super(CustomStreamListener, self).__init__()
    self.socketio = socketio
    self.room = track
    self.session = FuturesSession()

  def get_geonames_username(self):
    return "yasyf{}".format(random.randint(1,5))

  def on_status(self, status):
    if status.coordinates or status.author.location:
      data = {'text': status.text.encode('utf-8')}
      data.update({k:getattr(status.author, k) for k in ['time_zone', 'location']})
      data.update({k:getattr(status, k) for k in ['lang', 'coordinates']})

      def add_sentiment(session, response):
        data['sentiment'] = response.json()['results']
        self.socketio.emit('status', data, room=self.room)

      def add_country_code(session, response):
        try:
          json = response.json()
          if json['totalResultsCount'] > 0:
            result = json['geonames'][0]
            data['country'] = result['countryCode']
            data['coordinates'] = {'coordinates': [float(result['lng']), float(result['lat'])]}
          else:
            return
        except:
          data['country'] = response.text.strip()

        if TEST_MODE:
          data['sentiment'] = random.random()
          self.socketio.emit('status', data, room=self.room)
        else:
          url = "http://apiv2.indico.io/sentiment"
          args = {'key': os.getenv('INDICOIO_API_KEY')}
          self.session.post(url, data={'data': data['text']}, params=args, background_callback=add_sentiment)

      if status.coordinates:
        url = "http://ws.geonames.org/countryCode"
        args = {'lat': status.coordinates['coordinates'][1], 'lng': status.coordinates['coordinates'][0],
               'username': self.get_geonames_username()}
        self.session.get(url, params=args, background_callback=add_country_code)
      else:
        url = "http://api.geonames.org/search"
        args = {'q': status.author.location, 'username': self.get_geonames_username(),
                'maxRows': 1, 'type': 'json'}
        self.session.get(url, params=args, background_callback=add_country_code)
    return True

  def on_error(self, status_code):
    print 'Encountered error with status code:', status_code
    self.socketio.emit('error', {'status_code': status_code}, room=self.room)
    return True

  def on_timeout(self):
    print 'Timeout...'
    return True
Example #3
0
def add_list_new() -> None:
    requester = FuturesSession(executor=ProcessPoolExecutor(30), session=requests.session())
    api_key = settings.TBA_API_HEADERS

    team_list_get = lambda p: requester.get(team_by_page_url_template(page=p), headers=api_key)
    team_participation_get = lambda tn: requester.get(team_participation_url_template(team=tn), headers=api_key)

    page_range = get_page_range()

    print("\nStarting %d HTTP requests for team lists, split between %d processes..." % (
        page_range[1] - page_range[0], requester.executor._max_workers))
    team_list_futures = [team_list_get(p) for p in range(*page_range)]
    print("Waiting...")
    wait(team_list_futures)
    print("Done!\n")

    teams_lists = map(lambda f: f.result().json(), team_list_futures)
    teams_data = [item for page_data in teams_lists for item in page_data]
    team_numbers = [*map(lambda t: t['team_number'], teams_data)]

    print("Starting %d HTTP requests for team participation data, split between %d processes..." % (
        len(team_numbers), requester.executor._max_workers))
    team_participation_futures = [team_participation_get(tn) for tn in team_numbers]
    print("Waiting...")
    wait(team_participation_futures)
    print("Done!\n")

    team_participations = map(lambda f: f.result().json(), team_participation_futures)
    arg_list = zip(team_numbers, teams_data, team_participations)

    for args in arg_list:
        add_team(*args)
    def test_futures_session(self):
        # basic futures get
        sess = FuturesSession()
        future = sess.get(httpbin('get'))
        self.assertIsInstance(future, Future)
        resp = future.result()
        self.assertIsInstance(resp, Response)
        self.assertEqual(200, resp.status_code)

        # non-200, 404
        future = sess.get(httpbin('status/404'))
        resp = future.result()
        self.assertEqual(404, resp.status_code)

        def cb(s, r):
            self.assertIsInstance(s, FuturesSession)
            self.assertIsInstance(r, Response)
            # add the parsed json data to the response
            r.data = r.json()

        future = sess.get(httpbin('get'), background_callback=cb)
        # this should block until complete
        resp = future.result()
        self.assertEqual(200, resp.status_code)
        # make sure the callback was invoked
        self.assertTrue(hasattr(resp, 'data'))

        def rasing_cb(s, r):
            raise Exception('boom')

        future = sess.get(httpbin('get'), background_callback=rasing_cb)
        with self.assertRaises(Exception) as cm:
            resp = future.result()
        self.assertEqual('boom', cm.exception.args[0])
Example #5
0
def bench_requests_futures_async(number_reqs, nb_worker):
    # https://pypi.python.org/pypi/requests-futures
    l=[]

    start = datetime.datetime.now()
    print('Start : ', start)

    def bg_cb(sess, resp):
        # resp.text
        if resp.status_code != requests.codes.ok:
            print(resp.status_code)
            resp.raise_for_status()
        #print(dir(resp))
        l.append(1)
        l_size = len(l)
        print(l_size)
        #print(len(response.body))
        if l_size == number_reqs:
            tornado.ioloop.IOLoop.instance().stop()
        if datetime.datetime.now() - start == 60:
            tornado.ioloop.IOLoop.instance().stop()

    session = FuturesSession( max_workers=10 )
    for elem in range(int(number_reqs/nb_worker)):
        for e in range(nb_worker):
            session.get(
                        "http://www.leboncoin.fr/",
                        background_callback = bg_cb
                        )
        time.sleep(1)
    print('[Rq TURFU] Done :', datetime.datetime.now() - start)
def fetchReviews(unique_id):

	s = FuturesSession()
	
	# Hand shake proc. to figure out how many calls we send to server
	api_format = 'https://watcha.net/comment/list?unique_id={unique_id}&start_index={start_index}&count=10&type=like'
	handshake = api_format.format(unique_id=unique_id, start_index=str(0))
	hs = s.get(handshake).result().content
	json_hs = json.loads(hs)
	total_count = int(json_hs['meta']['total_count'])
	how_many_queries = total_count / 10 + 1

	query_urls = [api_format.format(unique_id=unique_id, start_index=str(i * 10)) for i in xrange(0, how_many_queries, 1)]
	reviews = [
		{
			'movie_title': r['movie_title'],
			'rating': r['rating'],
			'text': r['text'],
			'updated_at': time.mktime(dateutil.parser.parse(r['updated_at']).timetuple()),
			'comment_id': r['comment_id']
		}
		for qu in query_urls
		for r in json.loads(s.get(qu).result().content)['data']
	]
	return reviews
    def test_redirect(self):
        """ Tests for the ability to cleanly handle redirects. """
        sess = FuturesSession()
        future = sess.get(httpbin('redirect-to?url=get'))
        self.assertIsInstance(future, Future)
        resp = future.result()
        self.assertIsInstance(resp, Response)
        self.assertEqual(200, resp.status_code)

        future = sess.get(httpbin('redirect-to?url=status/404'))
        resp = future.result()
        self.assertEqual(404, resp.status_code)
Example #8
0
    def asyncDepartureBoards(self, request_list):
        header = {"Authorization": self.auth.token}
        url = "https://api.vasttrafik.se/bin/rest.exe/v2/departureBoard"

        # Start a session for the async requests
        session = FuturesSession()
        reqs = []
        for req in request_list:
            # Send the requests
            req["format"] = "json"
            future = session.get(url, headers=header, params=req)
            reqs.append(future)
            time.sleep(0.02)  # Without this everything breaks

        responses = []
        for req in reqs:
            # Get the results
            r = req.result()
            responses.append(r)

        # Check for errors
        resp = self.auth.checkResponses(responses)

        output = []
        for response in resp:
            output.append(response.json())

        return output
Example #9
0
    def _chapter_pages(self, soup, html):
        # For webtoons, all pages are shown in a single page.
        # When that's the case, there's this element that asks if you want to
        # view page-by-page instead. Let's use this element to check if we're
        # parsing a webtoon chapter.
        webtoon = soup.find('a', href='?supress_webtoon=t')
        if webtoon is not None:
            img_tags = soup.find_all(_page_img_tag)
            return [
                tag['src']
                for tag in img_tags
                ]

        # a <select> tag has options that each points to a page
        opts = soup.find('select', id='page_select').find_all('option')
        urls = [opt['value'] for opt in opts]

        # Page 1 has already been fetched (stored in this html param, duh!)
        # so let's save ourselves an http request
        pages_htmls = [html]
        urls = urls[1:]
        session = FuturesSession()

        for order, url in enumerate(urls):
            res = session.get(url).result()
            if res.status_code != 200:
                raise HtmlError('cannot fetch')
            pages_htmls.append(res.content)

        returns = []
        for page_html in pages_htmls:
            soup = BeautifulSoup(page_html)
            img_url = soup.find('img', id='comic_page')['src']
            returns.append(img_url)
        return returns
Example #10
0
def parse_feeds():
    session = FuturesSession(max_workers=256)
    feeds = RssFeed.query.all()
    urls = []
    for feed in feeds:
        urls.append(feed.link)
    responses = []
    delete_entries()
    for url in urls:
        responses.append(session.get(url))
    for i in range(len(urls)):
        try:
            feed = feeds[i]
            data = responses[i].result()
            data = feedparser.parse(data.text)
            try:
                base_site = get_site_from_link(data.feed.link)
                favicon = base_site + "/favicon.ico"
                feed.favicon = favicon
                db.session.commit()
            except:
                db.session.rollback()
            add_new_entries(data, feed, base_site)
            # logging.info(str(time.time()) + " " + str(i))
        except Exception as e:
            print(e)
            logging.info(e)
    # delete_entries() uncomment this and comment the previous to delete every
    # entry not in the date, also the ones in the feed file
    logging.info("feeds parsed")
    logging.info(datetime.now())
Example #11
0
    def getVenue(self, venueId):
        dictres = None

        if check_cache('venue_' + venueId + '.json', False):
            cache = retrieve_cache('venue_' + venueId + '.json', False)
            dictres = json.loads(cache)
        else:
            session = FuturesSession(executor=ThreadPoolExecutor(
                max_workers=1))
            rawtime = date.today() - timedelta(days=1)
            parsedtime = rawtime.strftime('%Y%m%d')

            params = dict(client_id=config.FOURSQUARE_CLIENT_ID,
                          client_secret=config.FOURSQUARE_CLIENT_SECRET,
                          v=parsedtime)
            futures.append(
                session.get('https://api.foursquare.com/v2/venues/' + venueId,
                            params=params))
            response = future.result()

            if response.status_code != 200:
                return None

            content = response.text
            dictres = json.loads(content)
            vid = dictres['response']['venue']['id']
            store_cache(content, 'venue_' + vid + '.json')

        if dictres is None:
            return None

        return mp.f_location(dictres)
Example #12
0
    def _chapter_pages(self, soup, html):
        # For webtoons, all pages are shown in a single page.
        # When that's the case, there's this element that asks if you want to
        # view page-by-page instead. Let's use this element to check if we're
        # parsing a webtoon chapter.
        webtoon = soup.find("a", href="?supress_webtoon=t")
        if webtoon is not None:
            img_tags = soup.find_all(_page_img_tag)
            return [tag["src"] for tag in img_tags]

        # a <select> tag has options that each points to a page
        opts = soup.find("select", id="page_select").find_all("option")
        urls = [opt["value"] for opt in opts]

        # Page 1 has already been fetched (stored in this html param, duh!)
        # so let's save ourselves an http request
        pages_htmls = [html]
        urls = urls[1:]
        session = FuturesSession()

        for order, url in enumerate(urls):
            res = session.get(url).result()
            if res.status_code != 200:
                raise HtmlError("cannot fetch")
            pages_htmls.append(res.content)

        returns = []
        for page_html in pages_htmls:
            soup = BeautifulSoup(page_html)
            img_url = soup.find("img", id="comic_page")["src"]
            returns.append(img_url)
        return returns
Example #13
0
def load_services(bootstrap_url):

    session = FuturesSession(max_workers=10)

    jobs = {
        url: session.get(url,
                         headers={"Accept": "application/json"},
                         params={"type": "service"})
        for url in load_sls_hosts(bootstrap_url)
    }

    all_responses = {}
    for url, job in jobs.items():

        try:
            rsp = job.result()
        except requests.ConnectionError as e:
            logger.error(str(e))
            continue

        if rsp.status_code == 200:
            all_responses[url] = rsp.json()
        else:
            logger.error("'%s' returned status code %d" %
                         (url, rsp.status_code))
            all_responses[url] = []
    return all_responses
Example #14
0
    def asyncDepartureBoards(self, stops, **kwargs):
        token, scope = self.auth.get_token()
        header = {"Authorization": token}
        url = "https://api.vasttrafik.se/bin/rest.exe/v2/departureBoard"
        kwargs["format"] = "json"

        # Start a session for the async requests
        session = FuturesSession()
        reqs = []
        for stop in stops:
            # Send the requests
            params = kwargs
            params["id"] = stop
            future = session.get(url, headers=header, params=params)
            reqs.append(future)
            time.sleep(0.01) # Without this everything breaks

        responses = []
        for req in reqs:
            # Get the results
            r = req.result()
            responses.append(r)

        # Check for errors
        resp = self.auth.check_responses(responses, scope)

        output = []
        for response in resp:
            output.append(response.json())

        return output
Example #15
0
def get_blocks(*heights):
    urls = [get_block_coinsecrets_url(h) for h in heights]
    session = FuturesSession()
    reqs = [session.get(url) for url in urls]
    responses = [r.result() for r in reqs]
    resps_json = [json.loads(r.content.decode()) for r in responses]
    return resps_json
Example #16
0
    def check_responses(self, response_list, scope):
        fine = True
        for resp in response_list:
            # Check for any errors
            if resp.status_code != 200:
                fine = False

        if fine:
            return response_list
        else:
            print("Renewing token " + str(scope))
            token = self.__renew_token(scope)
            header = {"Authorization": token}

            # Retry!
            session = FuturesSession()
            reqs = []
            for resp in response_list:
                # Send the new requests
                url = resp.url
                reqs.append(session.get(url, headers=header))
                time.sleep(0.01)

            # Get the results
            resps = []
            for req in reqs:
                resps.append(req.result())

            if resps[0].status_code != 200:
                raise requests.exceptions.HTTPError(f'{resps[0].status_code} {resps[0].reason}')

            return resps
Example #17
0
    def _chapter_pages(self, soup, html):

        # a <select> tag has options that each points to a page
        neighbour = soup.find('select', id='combobox').find_next_sibling('select')
        opts = neighbour.find_all('option')
        urls = [opt['value'] for opt in opts]

        # Page 1 has already been fetched (stored in this html param, duh!)
        # so let's save ourselves an http request
        pages_htmls = [html]
        urls = urls[1:]
        session = FuturesSession()

        for order, url in enumerate(urls):
            uri = self.netlocs[2] + url
            print(uri)
            res = session.get(uri).result()
            if res.status_code != 200:
                raise HtmlError('cannot fetch')
            pages_htmls.append(res.content)

        returns = []
        for page_html in pages_htmls:
            soup = BeautifulSoup(page_html)
            img_url = soup.find('img', id='mainImg')['src']
            returns.append(img_url)
        return returns
Example #18
0
    def get_frames(self, count):
        """Get a list of images from Environment Canada."""
        soup = BeautifulSoup(requests.get(self.IMAGES_URL.format(self.station_code)).text, 'html.parser')
        image_links = [tag['href'] for tag in soup.find_all('a') if '.gif' in tag['href']]

        if len([i for i in image_links[:8] if 'COMP' in i]) > 4:
            image_string = '_'.join([self.station_code, 'COMP_PRECIPET', self.get_precip_type() + '.gif'])
        else:
            image_string = '_'.join([self.station_code, 'PRECIPET', self.get_precip_type() + '.gif'])

        images = [tag['href'] for tag in soup.find_all('a') if image_string in tag['href']]

        futures = []
        session = FuturesSession(max_workers=count)

        for i in reversed(images[:count]):
            url = self.FRAME_URL.format(self.station_code, i)
            futures.append(session.get(url=url).result().content)

        def add_layers(frame):
            frame_bytesio = BytesIO()
            base = Image.open(BytesIO(frame)).convert('RGBA')
            base.alpha_composite(self.roads)
            base.alpha_composite(self.cities)
            base.save(frame_bytesio, 'GIF')
            frame_bytesio.seek(0)
            return frame_bytesio.read()

        frames = [add_layers(f) for f in futures if f[0:3] == b'GIF']

        """Repeat last frame."""
        for i in range(0, 2):  # pylint: disable=unused-variable
            frames.append(frames[count - 1])

        return frames
Example #19
0
class BlueFloodMetricsClient(object):

    def __init__(self, token, project_id, executors):
        self.token = token
        self.project_id = project_id
        self.session = FuturesSession(max_workers=executors)
        self.headers = {
            'X-Project-ID': self.project_id
        }
        if self.token:
            self.headers.update({
                'X-Auth-Token': self.token
            })
        self.session.headers.update(self.headers)

    def async_requests(self, urls):
        futures_results = []
        for url in urls:
            LOG.info("Request made to URL: {0}".format(url))
            futures_results.append(self.session.get(url))

        responses = []

        for future in futures.as_completed(fs=futures_results):
            resp = future.result()
            LOG.info("Request completed to URL: {0}".format(resp.url))
            responses.append((resp))

        return responses
Example #20
0
 def get_usgs_nearby_cities(self, earthquake):
     """
     performs request on local earthquake nearby cities url and returns the data
     """
     try:
         nearest_cities_object = earthquake[
             "properties"]["products"]["nearby-cities"]
         nearest_cities_url = nearest_cities_object[0][
             "contents"]["nearby-cities.json"]["url"]
     except:
         nearest_cities_url = None
     if nearest_cities_url:
         session = FuturesSession(max_workers=1)
         nearest_cities_response = session.get(
             nearest_cities_url, headers=app.config["API_MANAGER_HEADERS"])
         nearest_cities_details = nearest_cities_response.result().json()
         list_of_nearby_cities = []
         for item in nearest_cities_details:
             city = NearestCity(
                 id=None,
                 distance=item["distance"],
                 direction=item["direction"],
                 name=item["name"],
                 latitude=item["latitude"],
                 longitude=item["longitude"],
                 population=item["population"],
                 earthquake_id=None
             )
             list_of_nearby_cities.append(city)
         earthquake["properties"]["nearest_cities_url"] = nearest_cities_url
         earthquake["properties"]["nearest_cities"] = list_of_nearby_cities
     else:
         earthquake["properties"]["nearest_cities_url"] = None
         earthquake["properties"]["nearest_cities"] = []
     return earthquake
Example #21
0
    def search(self, q='', cat='', indexer='all', **kwargs):
        self.logger.debug("Searching for %s category %s on indexer %s" % (q, cat, indexer))
        if cat:
            cat = '&cat=' + cat

        sess = FuturesSession(max_workers=8)
        job_list = []

        if indexer == 'all':
            for i in NewznabIndexers.select():
                cmd = 'search&q=' + urllib2.quote(q.encode(encoding="UTF-8")) + cat + '&extended=1'
                u = i.apiurl
                u += cmd
                u = u.replace('o=json', 'o=xml')
                job_list.append(u)
        else:
            for i in NewznabIndexers.select():
                if i.name == indexer:
                    cmd = 'search&q=' + urllib2.quote(q.encode(encoding="UTF-8")) + cat + '&extended=1'
                    u = i.apiurl
                    u += cmd
                    u = u.replace('o=json', 'o=xml')
                    job_list.append(u)

        result = []
        future = []

        for url in job_list:
            try:
                self.logger.debug('Fetching search results from %s' % url)
                t = sess.get(url, timeout=60, headers=self.headers)
            except Exception as e:
                self.logger.error('%s when fetching %s' % (e, url))
                continue

            future.append(t)

        for future in cf.as_completed(future):
            if future.exception() is not None:
                self.logger.error('Failed to fetch results %s' % (future.exception()))
            else:
                f = []
                res = future.result()
                try:
                    provider_res = xmltodict.parse(res.content, attr_prefix='')
                    if provider_res:
                        if 'rss' in provider_res:
                            if 'channel' in provider_res['rss']:
                                    if 'item' in provider_res['rss']['channel']:
                                        f.append(provider_res['rss']['channel'])

                        if 'error' in provider_res:
                            self.logger.debug('%s %s' % (provider_res['rss']['channel']['title'], provider_res['error']['description']))

                except Exception as e:
                    self.logger.error(res.url, e, exc_info=True)

                result.append(f)

        return result
Example #22
0
class RemoteTTS(TTS):
    """
    Abstract class for a Remote TTS engine implementation.

    It provides a common logic to perform multiple requests by splitting the
    whole sentence into small ones.
    """

    def __init__(self, lang, voice, url, api_path, validator):
        super(RemoteTTS, self).__init__(lang, voice, validator)
        self.api_path = api_path
        self.url = remove_last_slash(url)
        self.session = FuturesSession()

    def execute(self, sentence):
        phrases = self.__get_phrases(sentence)

        if len(phrases) > 0:
            for req in self.__requests(phrases):
                try:
                    self.__play(req)
                except Exception as e:
                    LOGGER.error(e.message)

    @staticmethod
    def __get_phrases(sentence):
        phrases = re.split('\.+[\s+|\n]', sentence)
        phrases = [p.replace('\n', '').strip() for p in phrases]
        phrases = [p for p in phrases if len(p) > 0]
        return phrases

    def __requests(self, phrases):
        reqs = []
        for p in phrases:
            reqs.append(self.__request(p))
        return reqs

    def __request(self, p):
        return self.session.get(
            self.url + self.api_path, params=self.build_request_params(p),
            timeout=10, verify=False)

    @abc.abstractmethod
    def build_request_params(self, sentence):
        pass

    def __play(self, req):
        resp = req.result()
        if resp.status_code == 200:
            self.__save(resp.content)
            play_wav(self.filename).communicate()
        else:
            LOGGER.error(
                '%s Http Error: %s for url: %s' %
                (resp.status_code, resp.reason, resp.url))

    def __save(self, data):
        with open(self.filename, 'wb') as f:
            f.write(data)
Example #23
0
def request_product(auth_token, duns, product_code, version='3.1'):
    session = FuturesSession()
    url = 'https://maxcvservices.dnb.com/V' + version + '/organizations/' + duns + '/products/' + product_code
    url += "?OrderReasonCode=6332"
    print url
    headers = {'Authorization': auth_token}
    future = session.get(url, headers=headers)
    return future
Example #24
0
def async_requests(locations, site=None):
    session = FuturesSession()
    check_date = datetime.now() + timedelta(hours=-4)
    for location in locations:
        gig = Gigs.select().where(Gigs.location.contains(location)).order_by(Gigs.datetime.desc()).first()
        if (gig is None) or ((datetime.strptime(gig.datetime, '%Y-%m-%d %H:%M') < check_date)):
            url = "https://{}.craigslist.org/search/{}/".format(location, (site or CRAIGSLIST_SITE))
            future = session.get(url, background_callback=insert_callback)
Example #25
0
def get_games_chessdotcom(username: str, limit: PositiveInt = 10) -> dict:
    """
    Get all the games from a chess.com user.
    """
    session = FuturesSession()

    # get the list of monthly archives
    url = urljoin(CHESSDOTCOM_API_HOST,
                  f"pub/player/{username}/games/archives")
    archives = session.get(url).result()
    archives.raise_for_status()

    # fetch all the games
    games = []
    with FuturesSession() as session:
        futures = [
            session.get(url) for url in reversed(archives.json()["archives"])
        ]
        for future in futures:
            monthly_games = future.result()
            sleep = 1
            while monthly_games.status_code == 429:  # pragma: no cover
                msg = (f"Sleeping for {sleep}s while "
                       f"getting chess.com games for {username}.")
                logger.debug(msg)
                time.sleep(2 * sleep)
                monthly_games = session.get(url).result()
            monthly_games.raise_for_status()
            for game in monthly_games.json()["games"]:
                game["white"]["name"] = game["white"].pop("username")
                game["white"]["url"] = game["white"].pop("@id")
                game["black"]["name"] = game["black"].pop("username")
                game["black"]["url"] = game["black"].pop("@id")
                game["eco_url"] = game.pop("eco", None)
                game["tournament_url"] = game.pop("tournament", None)
                game["match_url"] = game.pop("match", None)
                games.append(game)
                if len(games) >= limit:
                    break

            if len(games) >= limit:
                break

    [Game(**game)
     for game in games[:limit]]  # just to push through model validation
    return make_response(200, games[:limit])
def get_subscriptions(feeds, workers):
    session = FuturesSession(max_workers=workers)
    futures = [session.get(f, hooks={'response': feed_to_dicts})
               for f in feeds]
    entry_lists = [f.result().data for f in futures]
    subscriptions = sorted([i for s in entry_lists for i in s],
                           key=lambda x: x['published'], reverse=True)
    return subscriptions
Example #27
0
	def requestPool(parameters, url):
		"""
		Generator that asynchronously processes profile requests and yields profile futures.
		"""
		session = FuturesSession(max_workers=10)
		for parameter in parameters:
			future = session.get(url, params=parameter)
			yield future
Example #28
0
 def async_next(self, list_url):
     '''utility to dowload like async.io multiple url
     and send them to extract_nexts
     '''
     session = FuturesSession(max_workers=5)
     for url in list_url:
         future = session.get(url)
         future.add_done_callback(self.extract_nexts)
Example #29
0
def requestsAsync(urls, header, pageSize):
    '''helper function to make async HTTP requests'''
    session = FuturesSession(executor=cf.ThreadPoolExecutor(max_workers=10))
    responses = {}
    for url in urls:
        request = session.get(url['url'], headers=header)
        responses[request] = {'cHash': url['cHash']}
    return responses
Example #30
0
 def fetch(self):
     session = FuturesSession()
     dat = session.get(self.url).result().json()
     if self.field:
         dat = dat[self.field]
     if self.records is False:
         dat = [dat]
     raise Return(dat)
Example #31
0
 def send_requests(self, urls):
     session = FuturesSession()
     futures = [session.get(u, headers=self.headers) for u in urls]
     result = []
     for f in futures:
         res = json.loads(f.result().text)['result']['match']  # [{},{},...]
         result += res
     return result
def download_all_sites(sites):
    session = FuturesSession()
    req_list = []
    for url in sites:
        req_list.append(session.get(url))
    for req, url in zip(req_list, sites):
        response = req.result()
        print(f"Read {len(response.content)} from {url}")
Example #33
0
    def asyncRequest(self, reqUrl=None, urlParams=None, targetArr=None):
        taskUrlname = reqUrl.split("/")[3]
        t1 = time.time()
        while True:
            if self.future_req:
                session = FuturesSession(max_workers=self.chunksize)
                if urlParams == None:
                    rs = (session.get(reqUrl.format(i)) for i in targetArr)
                else:
                    rs = (session.get(reqUrl.format(i, *urlParams))
                          for i in targetArr)
                try:
                    myresponse = list(map(lambda x: x.result(), rs))
                except:
                    continue
            else:
                if not self.grequests_imported:
                    import grequests
                    grequests_imported = True
                if urlParams == None:
                    rs = (grequests.get(reqUrl.format(i),
                                        proxies=self.proxy,
                                        timeout=10) for i in targetArr)
                else:
                    rs = (grequests.get(reqUrl.format(i, *urlParams),
                                        proxies=self.proxy,
                                        timeout=10) for i in targetArr)
                try:
                    myresponse = grequests.map(rs)
                except:
                    continue
            status = [
                int(i.status_code == 200) for i in myresponse if i != None
            ]
            httpstatus = sum(status)
            print("sum of http200Status {0} : {1}".format(
                taskUrlname, httpstatus))
            if taskUrlname == "ISteamUserStats":
                httpstatus += 1
            self.proxySetting()

            if len(status) != 0 and httpstatus != 0:
                break
        #print("t1",time.time()-t1)
        return myresponse, httpstatus
def get(request):
    session = FuturesSession(max_workers=1)
    future = next(as_completed([session.get(
        request.url, headers=request.headers, timeout=request.timeout)]))
    if future.exception() is not None:
        return DownloadError(request, future.exception())
    else:
        resp = future.result()
        return HtmlDocument(resp.url, resp.content)
 def fetch_pe_ratios(self):
     self.pe_ratios = MSNMoney(self.ticker_symbol)
     session = FuturesSession()
     rpc = session.get(self.pe_ratios.url,
                       allow_redirects=True,
                       hooks={
                           'response': self.parse_pe_ratios,
                       })
     self.rpcs.append(rpc)
Example #36
0
def search_restaurants(loc_id,
                       res_names,
                       cat_ids,
                       cu_ids,
                       establ_ids,
                       connection_session=None):
    url = 'https://developers.zomato.com/api/v2.1/search'

    start = 0
    headers = {'user_key': '85955a2247d2beb1f5ecadf80fbc4666'}

    session = FuturesSession()
    futures = []
    ids = []

    for res_name in res_names:
        params = {
            'entity_id': loc_id,
            'q': res_name,
            'cuisine': list_to_string(cu_ids),
            'establishment_type': list_to_string(establ_ids),
            'category': list_to_string(cat_ids),
            'entity_type': 'city',
            'start': start
        }
        response = connection_session.get(
            url, headers=headers,
            params=params) if connection_session else requests.get(
                url, headers=headers, params=params)

        if response:
            body = response.json()
            while start < body['results_found']:
                params = {
                    'entity_id': loc_id,
                    'q': res_name,
                    'cuisine': list_to_string(cu_ids),
                    'establishment_type': list_to_string(establ_ids),
                    'category': list_to_string(cat_ids),
                    'entity_type': 'city',
                    'start': start
                }
                futures.append(session.get(url, headers=headers,
                                           params=params))
                start += 20

        start = 0

    for future in cf.as_completed(futures):
        response = future.result()

        if response:
            body = response.json()
            for restaurant in body['restaurants']:
                ids.append(restaurant['restaurant']['R']['res_id'])

    return ids
Example #37
0
def check_on_site(entries):
    """ Checks those entries on gepetto's website """
    session = FuturesSession(executor=ThreadPoolExecutor(max_workers=40))
    for future in [
            session.get(GEPETTO_URL % entry['ID']) for entry in entries
    ]:
        response = future.result()
        if b'Invalid bibtex entry' in response.content:
            print('INVALID', response.url)
 def fetch_yahoo_finance_quote(self):
     self.yahoo_finance_quote = YahooFinanceQuote(self.ticker_symbol)
     session = FuturesSession()
     rpc = session.get(self.yahoo_finance_quote.url,
                       allow_redirects=True,
                       hooks={
                           'response': self.parse_yahoo_finance_quote,
                       })
     self.rpcs.append(rpc)
Example #39
0
def shopee(keyword):
    url = "https://shopee.tw/api/v2/search_items/?by=ctime&keyword={}&limit=50&newest=0&order=desc&page_type=search".format(
        urllib.parse.quote_plus(keyword))

    title = "蝦皮搜尋 - {}".format(keyword)

    feed = feedgen.feed.FeedGenerator()
    feed.author({"name": "Feed Generator"})
    feed.id(url)
    feed.link(href=url, rel="alternate")
    feed.title(title)

    r = requests.get(url, headers={"User-agent": user_agent}, timeout=5)
    body = json.loads(r.text)

    session = FuturesSession(executor=ThreadPoolExecutor(max_workers=10))
    futures = []

    for item in body["items"]:
        itemid = item["itemid"]
        name = item["name"]
        shopid = item["shopid"]

        itemapi_url = "https://shopee.tw/api/v2/item/get?itemid=%d&shopid=%d" % (
            itemid,
            shopid,
        )
        futures.append(
            session.get(itemapi_url,
                        headers={"User-agent": user_agent},
                        timeout=5))

    for f in futures:
        r = f.result()
        item = json.loads(r.text)["item"]

        itemid = item["itemid"]
        name = item["name"]
        shopid = item["shopid"]

        prod_url = "https://shopee.tw/product/%d/%d" % (shopid, itemid)
        img_url = "https://cf.shopee.tw/file/%s" % (item["image"])

        content = '{}<br/><img alt="{}" src="{}"/>'.format(
            html.escape(name), html.escape(name), html.escape(img_url))

        entry = feed.add_entry()
        entry.content(content, type="xhtml")
        entry.id(prod_url)
        entry.link(href=prod_url)
        entry.title(name)

    bottle.response.set_header("Cache-Control", "max-age=300,public")
    bottle.response.set_header("Content-Type", "application/atom+xml")

    return feed.atom_str()
Example #40
0
class HFStory(Story):
    publisher = "hentai-foundry.com"
    author = chapters = title = None

    def __init__(self, url):
        self.url = url

        # need to be slightly careful around cookies/etc
        self.session = requests.Session()
        self.session.headers["User-Agent"] = "Mozilla/5"
        self.futures = FuturesSession(session=self.session, max_workers=5)

        r = self.session.get(self.url)
        if not r.ok:
            raise IOError("Error: {}".format(r.status_code))
        self.soup = soupify(r.content)

        a = self.soup.find(id="frontPage_link")
        if a:
            r = self.session.get(urljoin(self.url, a["href"] + "&size=1000"),
                                 cookies=r.cookies)
            if not r.ok:
                raise IOError("Error: {}".format(r.status_code))
            self.soup = soupify(r.content)

        if self.soup.find(id="viewChapter"):
            self.url = url = urljoin(
                url,
                self.soup.select_one(".storyRead a:not(.pdfLink)")["href"])
            self.soup = soupify_request(self.futures.get(url))

        self.author = self.soup.select_one(
            ".storyInfo a[href^='/user']").text.strip()
        self.title = self.soup.select_one(
            ".titlebar a[href^='/stories']").text.strip()

        box = self.soup.find("h2",
                             text="Chapters").parent.find(class_="boxbody")
        self.chapters = [
            HFChapter(self.futures.get(urljoin(self.url,
                                               p.find("a")["href"])))
            for p in box.find_all("p")
        ]
Example #41
0
def fetch_raw_habr_pages_requests_futures(pages=10):
    ''' Получить сырые данные с хабра '''
    session = FuturesSession(executor=ThreadPoolExecutor(max_workers=20),
                             session=Session())
    pages_habr = []
    for page_number in range(1, pages + 1):
        r = session.get('https://habr.com/all/page%d/' % page_number)
        pages_habr.append(r)
    pages_habr = [r.result().text for r in pages_habr]
    return pages_habr
Example #42
0
def collect_html(btags):
    session = FuturesSession()

    url_prefix = 'http://playoverwatch.com/en-us/career/pc/'
    btag_urls = [url_prefix + quote(btag) for btag in btags]
    btag_htmls = [
        session.get(url).result().text[241780:241850] for url in btag_urls
    ]

    return btag_htmls[0].find('u-align-center h5">3685<')
Example #43
0
def load_formats():
    session = FuturesSession()

    def bg_cb(sess, resp):
        resp.data = utils.load_js_obj_literal(resp.text)

    future = session.get('https://raw.githubusercontent.com/Zarel/Pokemon-Showdown/master/data/formats-data.js',
                         background_callback=bg_cb)
    r = future.result()
    return r.data
Example #44
0
def retrieve_users_status(contest_id, handles):
    session = FuturesSession(max_workers=1)
    futures = {}
    for handle in handles:
        futures[handle] = session.get("http://codeforces.com/api/contest.status?contestId=%d&handle=%s" % (contest_id, handle))
    ret = {}
    for handle, future in futures.items():
        response = future.result()
        ret[handle] = response.json()['result']
    return ret
Example #45
0
def create_html_request(word: str, old_id: int, session: FuturesSession):
    """Create a request to get the HTML of a word definition page."""
    print(f"Creating request for '{word}-{old_id}' HTML.")

    def on_load(response, *args, **kwargs):
        save_html_test_file(word, old_id, response)

    return session.get(wiktionary_base_url + word,
                       params={'oldid': old_id},
                       hooks={'response': on_load})
Example #46
0
def get_url_batch(url_list, use_ssl=False, callback='', threads=5):
    """
    Processes a list of URLs, sending the results back to the calling
    function in real-time via the `callback` parameter
    """

    # Start a counter for a status message
    tick = {}
    tick['total'] = len(url_list)
    tick['current'] = 0

    # Break the url list into smaller lists based on thread size
    queue = [url_list[x:x + threads] for x in range(0, len(url_list), threads)]

    # Define the protocol
    if use_ssl:
        proto = 'https://'
    else:
        proto = 'http://'

    # Start a requests object
    session = FuturesSession(executor=ThreadPoolExecutor(max_workers=threads))

    # Using the async requests-futures module, work in batches based on
    # the 'queue' list created above. Call each URL, sending the results
    # back to the callback function.
    for batch in queue:
        batch_pending = {}
        batch_results = {}

        # First, grab the pending async request and store it in a dict
        for url in batch:
            batch_pending[url] = session.get(proto + url)

        # Then, grab all the results from the queue
        for url in batch_pending:
            batch_results[url] = batch_pending[url].result()

        # Now, send all the results to the callback function for analysis
        # We need a way to stop processing unnecessary brute-forces, so the
        # callback may tell us to bail out.
        for url in batch_results:
            check = callback(batch_results[url])
            if check == 'breakout':
                return

        # Refresh a status message
        tick['current'] += threads
        sys.stdout.flush()
        sys.stdout.write("    {}/{} complete...".format(
            tick['current'], tick['total']))
        sys.stdout.write('\r')

    # Clear the status message
    sys.stdout.write('                            \r')
class Fetcher:
    def __init__(self, pages_packet):
        """
        Pages structure:
        {
            URL: (String base_url->[String individual_url]),
            ...
        }
        """

        # Singleton
        self.session = FuturesSession()
        self.pages_packet = pages_packet

    def _format_urls(self):
        """
        Using the mapping functions to create all variants of the urls, create a list of all url variants.
        :return: all url variants.
        :rtype: list
        """

        output = list()

        for url in Urls:
            mapper = self.pages_packet.get(url)
            pages = mapper()
            output += pages

        return output

    def get_results(self):
        """
        Fetch all HTML pages asynchronously. Return object structure:

        {
            "URL_NAME": [HTML(page)],
            ...
        }

        Use get() to get the HTML of a given page.

        :return: each page name with it's corresponding result.
        :rtype: dict
        """

        results = list()
        urls = self._format_urls()

        for url in urls:
            request = self.session.get(url)
            results.append(request.result())

        output = dict(zip([url.name for url in Urls], results))

        return output
Example #48
0
def add_all_matches(*years: Iterable[int], new: bool) -> None:
    """
    Given a list of years, analyzes all matches from those years.

    :param years:
        Sequence of one or more years
    :param new:
        Whether or not to only add new matches
    """

    years = [*years]
    print("Executing for years: %s" % years)

    requester = FuturesSession(executor=ProcessPoolExecutor(30), session=requests.Session())

    event_get = lambda e: requester.get(event_url_template(event=e.key), headers=__api_key)
    teams_get = lambda e: requester.get(event_teams_url_template(event=e.key), headers=__api_key)
    matches_get = lambda e: requester.get(event_matches_url_template(event=e.key), headers=__api_key)

    if new:
        events = Event.objects.prefetch_related('alliances').filter(
            year__in=years, end_date__lt=datetime.now()).annotate(match_count=Count('match')).filter(
            match_count=0).order_by('end_date').all()
    else:
        events = Event.objects.prefetch_related('alliances').filter(
            year__in=years, end_date__lt=datetime.now()).order_by('end_date').all()

    print("Starting {} HTTP requests split between {} processes.".format(3 * len(events),
                                                                         requester.executor._max_workers))
    matches_futures = [matches_get(e) for e in events]  # type: List[Future]
    event_futures = [event_get(e) for e in events]  # type: List[Future]
    event_teams_futures = [teams_get(e) for e in events]  # type: List[Future]

    print("Waiting on HTTP requests.")
    wait(matches_futures + event_futures + event_teams_futures)
    requester.executor.shutdown(wait=True)

    arg_list = zip(events, [list_of_matches_json_converter(f.result().json()) for f in matches_futures], event_futures,
                   event_teams_futures)

    for args in arg_list:
        _add_matches_from_event(*args)
Example #49
0
def _async_requests(urls):
    """
    Sends multiple non-blocking requests. Returns
    a list of responses.

    :param urls:
        List of urls
    """
    session = FuturesSession(max_workers=30)
    futures = [session.get(url) for url in urls]
    return [future.result() for future in futures]
 def test_supplied_session(self):
     """ Tests the `session` keyword argument. """
     requests_session = session()
     requests_session.headers['Foo'] = 'bar'
     sess = FuturesSession(session=requests_session)
     future = sess.get(httpbin('headers'))
     self.assertIsInstance(future, Future)
     resp = future.result()
     self.assertIsInstance(resp, Response)
     self.assertEqual(200, resp.status_code)
     self.assertEqual(resp.json()['headers']['Foo'], 'bar')
 def test_supplied_session(self):
     """ Tests the `session` keyword argument. """
     requests_session = session()
     requests_session.headers['Foo'] = 'bar'
     sess = FuturesSession(session=requests_session)
     future = sess.get(httpbin('headers'))
     self.assertIsInstance(future, Future)
     resp = future.result()
     self.assertIsInstance(resp, Response)
     self.assertEqual(200, resp.status_code)
     self.assertEqual(resp.json()['headers']['Foo'], 'bar')
Example #52
0
def main():
    urls = {}
    requests = []
    session = FuturesSession(max_workers=10)
    for year in YEARS_TO_PARSE:
        landing_page = SEARCH_LANDING + '&year=' + str(year)
        landing_res = session.get(landing_page).result()
        landing_bs = BS(landing_res.content, 'html5lib')
        number_span = landing_bs.select('li.ep_tag_selected span')[0].text
        number_of_question = int(re.findall(r'\d+', number_span)[0])
        number_of_pages = math.ceil(
            number_of_question / 10)  # change to per page
        for page_num in range(1, number_of_pages + 1):
            res = session.get(landing_page + '&currentPage=' + str(page_num))
            requests.append(res)
        for request in tqdm(requests):
            try:
                request_result = request.result()
            except ConnectionError:
                print(
                    'Due to the ConnectionError page {} hasn\'t been parsed'.format(page_num))
                continue
            page = BS(request_result.content, "html5lib")
            if page:
                for notice in page.select('.results div.notice'):
                    for url in notice.select('ul.documents li a'):
                        title_text = notice.select(
                            'p.title a.result_details_link')[0].text
                        title_date = notice.select(
                            'div.date_reference span.date')[0].text
                        question_format = url.get('href').split('.')[-1]
                        title = '{} ({}).{}'.format(
                            title_text, title_date, question_format)
                        title = re.sub(r'[\n\r\t]', '', title)
                        title = title.replace('/', '-')
                        urls[url.get('href')] = title
            else:
                break
    if not os.path.exists(FOLDER_TO_DOWNLOAD):
        os.mkdir(FOLDER_TO_DOWNLOAD)
    download(urls, FOLDER_TO_DOWNLOAD)
Example #53
0
def _async_requests(urls):
    """
    Sends multiple non-blocking requests. Returns
    a list of responses.

    :param urls:
        List of urls
    """

    session = FuturesSession(max_workers=30)
    futures = [session.get(url) for url in urls]
    return [future.result() for future in futures]
Example #54
0
 def testMakePosts(self):
     s = FuturesSession()
     new_post = json.dumps({'content': 'testing'})
     print new_post
     p = s.post('https://cs242project.herokuapp.com/submitPost', data=new_post)
     res = p.result()
     print res
     print res.content
     r = s.get('https://cs242project.herokuapp.com/getPosts')
     res2 = r.result()
     print res2.content
     self.assertEqual("test", "test")
Example #55
0
def add_all(*years: Iterable[int]):
    years = [*years]

    requester = FuturesSession(executor=ProcessPoolExecutor(MAX_WORKERS), session=requests.Session())
    api_key = settings.TBA_API_HEADERS

    event_list_get = lambda y: requester.get(event_by_year_url_template(year=y), headers=api_key)
    event_get = lambda key: requester.get(event_url_template(event=key), headers=api_key)
    event_teams_get = lambda key: requester.get(event_teams_url_template(event=key), headers=api_key)

    print("Getting event lists for years: %s" % years)
    event_list_futures = [event_list_get(y) for y in years]
    print("Waiting on %d requests..." % len(years))
    wait(event_list_futures)
    print("Done!\n")

    event_lists = [f.result().json() for f in event_list_futures]
    event_data_jsons = [item for year_data in event_lists for item in year_data]

    print("Grabbing event keys...")
    event_keys = [event_data['key'] for event_data in event_data_jsons]
    print("Starting {} requests for event data and teams-by-event data, split between {} processes...".format(
        2 * len(event_keys), MAX_WORKERS))
    event_json_futures = [event_get(key) for key in event_keys]
    event_team_json_futures = [event_teams_get(key) for key in event_keys]

    print("Waiting...")
    wait(event_json_futures + event_team_json_futures)
    requester.executor.shutdown()
    print("Done!\n")

    event_jsons = [f.result().json() for f in event_json_futures]
    event_team_json = [f.result().json() for f in event_team_json_futures]

    print("Adding teams data to event data under 'teams' field...")
    event_jsons = [dict(e, teams=t) for e, t in zip(event_jsons, event_team_json)]
    arg_list = zip(event_keys, event_jsons)

    for args in arg_list:
        add_event(*args)
Example #56
0
File: scrape.py Project: Pync/Pync
def listings(base_url, needles):
    """ takes the needles as a || seperated list of needles and
    returns a map of neeldes to a list of dictionaries for matches """
    needles = [kw.strip() for kw in needles.split("||")]

    # Prepare the URL for requests
    url = base_url + "/tv/getProgInfo?major={}"
    session = FuturesSession(max_workers=30)

    # initialize our matches
    matches = {}
    for needle in needles:
        matches[needle] = []

    # Check each channel concurrently
    responses = {}
    for i in SCAN_RANGE:
        responses[i] = session.get(url.format(i))

    # Wait on all responses
    for i in SCAN_RANGE:
        responses[i] = responses[i].result()
        log.debug("channel {} has responded".format(i))

    # Filter out non-200 responses
    responses_200 = []
    for i in SCAN_RANGE:
        if responses[i].status_code == 200:
            responses_200.append(responses[i].text)

    # Make nice JSON of listings
    listings = []
    for response in responses_200:
        tmp = json.loads(response)
        tmp = {
            "title": tmp["title"],
            "major": tmp["major"],
            "callsign": tmp["callsign"],
            "duration": tmp["duration"],
            "startTime": tmp["startTime"],
            "isRecording": tmp["isRecording"],
        }
        listings.append(tmp)

    # Map listings to matching needles
    for listing in listings:
        for needle in needles:
            if needle.lower() in listing["title"].lower():
                log.info("Match for {} with {}".format(needle, listing["title"]))
                matches[needle].append(listing)

    return matches
Example #57
0
def extract_all(name):

    spider = SPIDERS[name]


    session = FuturesSession(max_workers=10)

    futures=[(pid, session.get(url)) for pid, url in get_urls()]

    future_responses = [(pid, future.result()) for pid, future in futures]

    for pid,response in future_responses:
        spider.extract(pid,response)
Example #58
0
 def get_usgs_details_response(self, url):
     """
     performs request on local earthquake details url and returns the data
     """
     session = FuturesSession(max_workers=1)
     usgs_api_details = session.get(
         url, headers=app.config["API_MANAGER_HEADERS"])
     try:
         earthquake_details = usgs_api_details.result().json()
         return earthquake_details
     except requests.exceptions as exception:
         logger.error("%s: %s" % (exception))
         return False
def resps_from_urls(potential_urls):
    """
    Gather valid responses from the list of potential urls
    """
    resps = []
    session = FuturesSession(max_workers=30)
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20100101 Firefox/31.0"}

    for url in potential_urls:
        resp = session.get(url, timeout=25, verify=False, headers=headers)
        resps.append(resp)

    return resps