Ejemplo n.º 1
0
    def update_courses(self, max_retries=5, timeout=20):
        """Extract courses from the index page.

        :return: list of Course objects
        """
        session = requests.Session()
        retry_adapter = HTTPAdapter(max_retries=5)
        session.mount(self._course_list_url, retry_adapter)
        html = session.get(self._course_list_url).text
        links, names = self._course_link_filter(html)
        links = list(
            map(lambda l: parse.urljoin(self._course_list_url, l), links))

        session = FuturesSession()
        # we assume the individual courses have the same prefix as the course
        # list
        parsed_url = parse.urlparse(self._course_list_url)
        prefix = parsed_url.scheme + '://' + parsed_url.netloc
        session.mount(prefix, retry_adapter)
        futures = []
        for i, l in enumerate(links):
            print('Processing {} ({} of {})'.format(l, i + 1, len(links)))
            futures.append((l, session.get(l, timeout=timeout)))

        self._courses = [
            course for lst in (self._course_filter(f.result().text, page_url=l)
                               for l, f in futures
                               if not f.exception()) for course in lst
        ]
Ejemplo n.º 2
0
def AsyncSession(
    retries: int = 10,
    backoff_factor: float = 0.3,
    allowed_methods: Iterable[str] = ('HEAD', 'TRACE', 'GET', 'POST', 'PUT', 'OPTIONS', 'DELETE'),
    status_forcelist: Iterable[int] = (408, 429, 500, 502, 503, 504),
) -> FuturesSession:
    """Return a Session object with full retry capabilities.

    Args:
        retries (int): number of retries
        backoff_factor (float): speed factor for retries (in seconds)
        allowed_methods (iterable): http methods to retry on
        status_forcelist (iterable): http status codes to retry on

    Returns:
        :py:class:`requests.Session`: session object
    """
    session = FuturesSession()
    retry = Retry(
        total=retries,
        connect=retries,
        read=retries,
        redirect=retries,
        # status=retries,
        allowed_methods=allowed_methods,
        status_forcelist=status_forcelist,
        backoff_factor=backoff_factor,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session
Ejemplo n.º 3
0
def _init_session(session, **kwargs):
    if session is None:
        if kwargs.get('asynchronous'):
            session = FuturesSession(max_workers=kwargs.get('max_workers', 8))
        else:
            session = Session()
        if kwargs.get('proxies'):
            session.proxies = kwargs.get('proxies')
        retries = Retry(
            total=3,
            backoff_factor=1,
            status_forcelist=[429, 500, 502, 503, 504],
            method_whitelist=["HEAD", "GET", "OPTIONS", "POST", "TRACE"])
        session.mount('https://', TimeoutHTTPAdapter(
            max_retries=retries,
            timeout=kwargs.get('timeout', DEFAULT_TIMEOUT)))
        # TODO: Figure out how to utilize this within the validate_response
        # TODO: This will be a much better way of handling bad requests than
        # TODO: what I'm currently doing.
        # session.hooks['response'] = \
        #     [lambda response, *args, **kwargs: response.raise_for_status()]
        session.headers.update({
            "User-Agent": random.choice(USER_AGENT_LIST)
        })
    return session
Ejemplo n.º 4
0
def get_events_from_icalendars():
    global now, midnight

    now = localtz.localize(datetime.datetime.now())
    midnight = localtz.localize(datetime.datetime.combine(now, datetime.time(0,0,0)))

    cz = Calzone()

    session = FuturesSession()
    session.mount('https://', CacheControlAdapter(cache=FileCache('.webcache'), heuristic=ForceCacheHeuristic()))

    cals = {k: session.get(u) for k,u in calendars.items()}

    concurrent.futures.wait(cals.values())

    for k,req in cals.items():
        try:
            cz.load(req.result().text)
        except Exception as err:
            print("Failed to load calendar '{}'".format(k))
            print (err)

    try:
        events = cz.get_events(midnight, midnight + datetime.timedelta(days=90))
    except Exception as e:
        print (e)

    events.sort(key=lambda e: e.start)

    return events
Ejemplo n.º 5
0
class APNsClient(object):
    def __init__(self, cert_file, use_sandbox=False, use_alternative_port=False):
        server = 'api.development.push.apple.com' if use_sandbox else 'api.push.apple.com'
        port = 2197 if use_alternative_port else 443
        self.cert = cert_file
        self.server = "https://{}:{}".format(server, port)
        self.__connection = FuturesSession()
        self.__connection.mount('https://', HTTP20Adapter())

    def send_notification(self, tokens, notification, priority=NotificationPriority.Immediate, topic=None):
        # print(notification.dict())
        json_payload = json.dumps(notification.dict(), ensure_ascii=False, separators=(',', ':')).encode('utf-8')

        headers = {
            'apns-priority': priority.value
        }
        if topic:
            headers['apns-topic'] = topic

        if not isinstance(tokens, list):
            tokens = [tokens]

        for token in tokens:
            url = '{}/3/device/{}'.format(self.server, token)
            self.__connection.post(url, json_payload, headers=headers, cert=self.cert, background_callback=req_callback)
Ejemplo n.º 6
0
def _init_session(session=None, **kwargs):
    session_headers = headers
    if session is None:
        if kwargs.get("asynchronous"):
            session = FuturesSession(max_workers=kwargs.get("max_workers", 8))
        else:
            session = Session()
        if kwargs.get("proxies"):
            session.proxies = kwargs.get("proxies")
        retries = Retry(
            total=kwargs.get("retry", 5),
            backoff_factor=kwargs.get("backoff_factor", 0.3),
            status_forcelist=kwargs.get("status_forcelist",
                                        [429, 500, 502, 503, 504]),
            method_whitelist=["HEAD", "GET", "OPTIONS", "POST", "TRACE"],
        )
        if kwargs.get("verify"):
            session.verify = kwargs.get("verify")
        session.mount(
            "https://",
            TimeoutHTTPAdapter(max_retries=retries,
                               timeout=kwargs.get("timeout", DEFAULT_TIMEOUT)),
        )
        # TODO: Figure out how to utilize this within the validate_response
        # TODO: This will be a much better way of handling bad requests than
        # TODO: what I'm currently doing.
        # session.hooks['response'] = \
        #     [lambda response, *args, **kwargs: response.raise_for_status()]
        user_agent = kwargs.get("user_agent", random.choice(USER_AGENT_LIST))
        session_headers["User-Agent"] = user_agent
        if kwargs.get("headers") and isinstance(kwargs.get("headers"), dict):
            session_headers.update(**headers)
        session.headers.update(**session_headers)
    return session
Ejemplo n.º 7
0
    def _get_raw_data(self):
        docktor_config = providers_config.providers['docktor']
        apps = []
        session = FuturesSession(max_workers=CONCURRENT_JOBS_LIMIT)
        session.mount('https://', self.__requests_http_adapter)
        session.mount('http://', self.__requests_http_adapter)
        for stage in docktor_config:
            for zone in docktor_config[stage]:
                apps_uri = '{uri}/apps/'.format(uri=docktor_config[stage][zone]['uri'])
                try:
                    r = session.get(apps_uri, timeout=REQUEST_TIMEOUT).result()
                    r.raise_for_status()
                    apps_list = r.json()
                except ValueError as e:
                    logger.error("Non json response {} from {}-{} docktor".format(r.content, stage, zone))
                    raise e
                except Exception as e:
                    logger.error("Exception raised on {}-{} docktor".format(stage, zone))
                    raise e

                future_apps_details = [session.get('{apps_uri}{app}'.format(apps_uri=apps_uri, app=app), timeout=REQUEST_TIMEOUT) for app in apps_list]

                try:
                    apps_details = [a.result() for a in future_apps_details]
                except Exception as e:
                    logger.error("Exception raised on {}-{} docktor".format(stage, zone))
                    raise e

                partial_get_app_info = partial(self.get_app_info, stage, zone)

                apps.extend(map(lambda a: partial_get_app_info(a), apps_details))
        return apps
Ejemplo n.º 8
0
def get_async_requests_session(num_retries,
                               backoff_factor,
                               pool_size,
                               status_forcelist=None):
    # Use requests & urllib3 to auto-retry.
    # If the backoff_factor is 0.1, then sleep() will sleep for [0.1s, 0.2s,
    # 0.4s, ...] between retries. It will also force a retry if the status
    # code returned is in status_forcelist.
    if status_forcelist is None:
        status_forcelist = [500, 502, 503, 504]
    session = FuturesSession(max_workers=pool_size)

    # If any regular response is generated, no retry is done. Without using
    # the status_forcelist, even a response with status 500 will not be
    # retried.
    retries = Retry(total=num_retries,
                    backoff_factor=backoff_factor,
                    status_forcelist=status_forcelist)

    # Mount handler on both HTTP & HTTPS.
    session.mount(
        'http://',
        HTTPAdapter(max_retries=retries,
                    pool_connections=pool_size,
                    pool_maxsize=pool_size))
    session.mount(
        'https://',
        HTTPAdapter(max_retries=retries,
                    pool_connections=pool_size,
                    pool_maxsize=pool_size))

    return session
Ejemplo n.º 9
0
def compile_data(
    api_url,
    last_audit_year,
    last_opinion_year,
    last_uifw_year,
    last_audit_quarter,
):
    # Setup the client
    http_client = FuturesSession(executor=ThreadPoolExecutor(max_workers=10))
    http_client.mount(
        f'{urlparse(api_url).scheme}://',
        HTTPAdapter(max_retries=Retry(total=5,
                                      backoff_factor=1,
                                      status_forcelist=[500]), ),
    )

    def get(url, params):
        return http_client.get(url, params=params, verify=False)

    api_client = ApiClient(get, api_url)
    # Compile data
    compile_profiles(
        api_client,
        last_audit_year,
        last_opinion_year,
        last_uifw_year,
        last_audit_quarter,
    )
    compile_medians(api_client)
    compile_rating_counts(api_client)
Ejemplo n.º 10
0
class Scraper(object):
    def __init__(self, url):
        self.url = url
        self.session = FuturesSession(max_workers=100)
        adapter = requests.adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100)
        self.session.mount('http://', adapter)
        self.session.mount('https://', adapter)

    def _extract_image_urls(self, soup):
        for img in soup.findAll("img", src=True):
            yield urljoin(self.url, img["src"])

    def _find_thumbnail_image(self):
        content_type, content = _fetch_url(self.url, session=self.session)
        soup = BeautifulSoup(content)
        image_urls = self._extract_image_urls(soup)
        image_urls = [u for u in image_urls] # turn to list
        image_urls = list(set(image_urls)) # lose duplicates
        image_sizes = _parallel_get_sizes(image_urls, self.session)
        logging.debug('got sizes for {} images'.format(len(image_sizes)))
        # find biggest
        max_area = 0
        max_url = None
        for image_url in image_urls:
            size = image_sizes[image_url]
            if not size:
                continue

            # ignore little images
            area = size[0] * size[1]
            if area < 5000:
                logging.debug('ignore little {}'.format(image_url))
                continue

            # ignore excessively long/wide images
            if max(size) / min(size) > 1.5:
                logging.debug('ignore dimensions {}'.format(image_url))
                continue

            # penalize images with "sprite" in their name
            if 'sprite' in image_url.lower():
                logging.debug('penalizing sprite {}'.format(image_url))
                area /= 10

            if area > max_area:
                max_area = area
                max_url = image_url
        return max_url


    def scrape(self):
        thumbnail_url = self._find_thumbnail_image()
        #thumbnail = _make_thumbnail_from_url(thumbnail_url, referer=self.url)
        return thumbnail_url
Ejemplo n.º 11
0
    def make(cls, future=False, timeout=5, max_workers=10):
        if future:
            executor = ThreadPoolExecutor(max_workers=max_workers)
            session = FuturesSession(executor=executor)
        else:
            session = StandardSession()

        adapter = TimeoutHTTPAdapter(timeout=timeout)
        session.mount(prefix="http://", adapter=adapter)
        session.mount(prefix="https://", adapter=adapter)

        return session
Ejemplo n.º 12
0
def _init_session(session, **kwargs):
    if session is None:
        if kwargs.get('asynchronous'):
            session = FuturesSession(max_workers=kwargs.get('max_workers', 8))
        else:
            session = Session()
        if kwargs.get('proxies'):
            session.proxies = kwargs.get('proxies')
    retries = \
        Retry(total=3, backoff_factor=1, status_forcelist=[502, 503, 504])
    session.mount('http://', HTTPAdapter(max_retries=retries))
    return session
Ejemplo n.º 13
0
class CrashInfo(object):

    # TODO: count is probably erroneous since there is a range by default in supersearch...
    CRASH_STATS_URL = 'https://crash-stats.mozilla.com'
    SUPERSEARCH_URL = CRASH_STATS_URL + '/api/SuperSearch'
    TIMEOUT = 5
    MAX_RETRIES = 5
    MAX_WORKERS = multiprocessing.cpu_count()

    def __init__(self, paths, credentials = None):
        self.results = [ ]
        self.credentials = credentials
        self.info = { }
        self.paths = [paths] if type(paths) == str else paths 
        for path in self.paths:
            self.info[path] = { 'crashes': -1 }
                                
        self.session = FuturesSession(max_workers = self.MAX_WORKERS)
        self.session.mount(self.CRASH_STATS_URL, HTTPAdapter(max_retries = self.MAX_RETRIES))
        self.__get_info()

    def get(self):
        for r in self.results:
            r.result()
        return self.info

    def __get_apikey(self):
        if self.credentials:
            return self.credentials['tokens'][self.CRASH_STATS_URL]
        else:
            return ''
    
    def __info_cb(self, path):
        def cb(sess, res):
            self.info[path]['crashes'] = res.json()['total']

        return cb

    def __get_info(self):
        header = { 'Auth-Token': self.__get_apikey() }
        for path in self.paths:
            filename = os.path.basename(path)
            self.results.append(self.session.get(self.SUPERSEARCH_URL,
                                                 params = { 'product': 'Firefox',
                                                            'topmost_filenames': filename, 
                                                            '_results_number': 0,
                                                            '_facets': 'product',
                                                            '_facets_size': 1 },
                                                 headers = header,
                                                 timeout = self.TIMEOUT,
                                                 background_callback = self.__info_cb(path)))
Ejemplo n.º 14
0
def async_retryable_session(executor: ThreadPoolExecutor) -> FuturesSession:
    session = FuturesSession(executor)
    retries = 3
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=0.5,
        status_forcelist=(SC_TOO_MANY_REQUESTS, ),
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session
Ejemplo n.º 15
0
class FXRevision(object):

    ARCHIVES_URL = 'http://archive.mozilla.org'
    NIGHTLY_URL = ARCHIVES_URL + '/pub/firefox/nightly/'
    TIMEOUT = 5
    MAX_RETRIES = 5

    def __init__(self, versions, fx_version, os):
        self.results = [ ]
        self.dates = { }
        self.fx_version = fx_version
        self.os = os
        self.info = { }
        pattern = re.compile('([0-9]{4})([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})')
        for version in versions:
            m = pattern.search(version)
            self.dates[version] = [m.group(i) for i in range(1, 7)]

        self.session = FuturesSession()
        self.session.mount(self.ARCHIVES_URL, HTTPAdapter(max_retries = self.MAX_RETRIES))
        self.__get_info()

    def get(self):
        for r in self.results:
            r.result()
        return self.info
        
    def __make_url(self, l):
        return self.NIGHTLY_URL + l[0] + '/' + l[1] + '/' + '-'.join(l) + '-mozilla-central/firefox-' + self.fx_version + '.en-US.' + self.os + '.json'

    def __info_cb(self, sess, res):
        json = res.json()
        self.info[json['buildid']] = json['moz_source_stamp']
    
    def __get_info(self):
        for date in self.dates.itervalues():
            self.results.append(self.session.get(self.__make_url(date),
                                                 timeout = self.TIMEOUT,
                                                 background_callback = self.__info_cb))
Ejemplo n.º 16
0
    def concurrent_post(self,
                        parents: list,
                        threads=CONCURRENT_POST_DEFAULT_THREADS,
                        raise_http_error=False,
                        log=False) -> list:
        request_list = []
        session = FuturesSession(executor=ThreadPoolExecutor(
            max_workers=threads))
        session.mount('http://', HTTPAdapter(max_retries=self.RETRIES_CONFIG))

        for parent in parents:
            processed_parent = self.remove_empty_fields(parent)
            item_type = processed_parent['item_type']

            future_request = session.post(
                f'{self.BASE_URL}{POST_ENDPOINTS[item_type]}',
                **self._prepare_post_kwargs(processed_parent))
            future_request.parent = processed_parent
            request_list.append(future_request)

        post_results = []
        estimator = Estimate()
        for index, completed_request in enumerate(as_completed(request_list)):
            res = completed_request.result()

            if log:
                progress, eta = estimator.get(index + 1, len(request_list))
                log_message = f'POST {res.status_code} variant({completed_request.parent["rid"]}) processed {index+1}/{len(request_list)}({progress}%) requests ETA {eta}'
                if res.status_code < 400:
                    self.logger.info(log_message)
                else:
                    self.logger.error(log_message)

            post_results.append(
                self._handle_post_response(
                    res, completed_request.parent,
                    completed_request.parent['item_type'], raise_http_error))

        return post_results
Ejemplo n.º 17
0
def _init_session(session, **kwargs):
    session_headers = headers
    if session is None:
        if kwargs.get('asynchronous'):
            session = FuturesSession(max_workers=kwargs.get('max_workers', 8))
        else:
            session = Session()
        if kwargs.get('proxies'):
            session.proxies = kwargs.get('proxies')
        retries = Retry(
            total=kwargs.get('retry', 5),
            backoff_factor=kwargs.get('backoff_factor', .3),
            status_forcelist=kwargs.get('status_forcelist',
                                        [429, 500, 502, 503, 504]),
            method_whitelist=['HEAD', 'GET', 'OPTIONS', 'POST', 'TRACE'])
        if kwargs.get('verify'):
            session.verify = kwargs.get('verify')
        session.mount(
            'https://',
            TimeoutHTTPAdapter(max_retries=retries,
                               timeout=kwargs.get('timeout', DEFAULT_TIMEOUT)))
        # TODO: Figure out how to utilize this within the validate_response
        # TODO: This will be a much better way of handling bad requests than
        # TODO: what I'm currently doing.
        # session.hooks['response'] = \
        #     [lambda response, *args, **kwargs: response.raise_for_status()]
        user_agent = kwargs.get('user_agent', random.choice(USER_AGENT_LIST))
        # if kwargs.get('cookies'):
        #     cookies = get_cookies(user_agent)
        #     [session.cookies.set(c['name'], c['value']) for c in cookies]
        #     session_headers.update({
        #         'cookie': '; '.join([
        #             item['name'] + "=" + item['value'] for item in cookies
        #         ])
        #     })
        session_headers['User-Agent'] = user_agent
        session.headers.update(**session_headers)
    return session
Ejemplo n.º 18
0
class Backtrace(object):

    CRASH_STATS_URL = 'https://crash-stats.mozilla.com'
    PROCESSED_URL = CRASH_STATS_URL + '/api/ProcessedCrash/'
    TIMEOUT = 5
    MAX_RETRIES = 5
    MAX_WORKERS = multiprocessing.cpu_count()
    
    def __init__(self, uuids, fraction = 0.2, just_hg = False, credentials = None):
        self.just_hg = just_hg
        self.results = [ ]
        self.credentials = credentials
        self.uuids = uuids
        self.fraction = max(0., min(fraction, 1.))
        self.info = { }
        self.session = FuturesSession(max_workers = self.MAX_WORKERS)
        self.session.mount(self.CRASH_STATS_URL, HTTPAdapter(max_retries = self.MAX_RETRIES))
        self.__get_info()

    def get(self):
        for r in self.results:
            r.result()
        return self.info

    def __get_apikey(self):
        if self.credentials:
            return self.credentials['tokens'][self.CRASH_STATS_URL]
        else:
            return ''
    
    @staticmethod
    def __cycles_detection(funs):
        # TODO: improve this algorithm (not sure that's a good one)
        positions = { }
        # we get the function positions in the trace
        for i in range(len(funs)):
            fun = funs[i]
            if fun in positions:
                positions[fun].append(i)
            else:
                positions[fun] = [ i ]

        lengths = { }
        for k, v in positions.iteritems():
            if len(v) >= 2:
                l = v[1] - v[0]
                good = True
                for i in range(2, len(v)):
                    if v[i] - v[i - 1] != l:
                        good = False
                        break
                if good:
                    if l in lengths:
                        lengths[l].append((k, v))
                    else:
                        lengths[l] = [ (k, v) ]

        cycles = [ ]
        for k, v in lengths.iteritems():
            l = sorted(v, cmp = lambda x, y: cmp(x[1][0], y[1][0]))
            pat = [ ]
            container = [ l[0][0] ]
            pos = l[0][1][0]
            for i in range(1, len(l)):
                _pos = l[i][1][0]
                if _pos == pos + 1:
                    container.append(l[i][0])
                    pos = _pos
                else:
                    pat.append(tuple(container))
                    container = [ l[i][0] ]
                    pos = _pos

            pat.append(tuple(container))
            cycles += pat

        cycles = tuple(cycles)
        
        return cycles
    
    def __info_cb(self, sess, res):
        json = res.json()
        if 'json_dump' in json:
            uuid = json['uuid']
            jd = json['json_dump']
            if 'crashedThread' in json and 'threads' in jd:
                ct = json['crashedThread']
                ct = jd['threads'][ct]
                self.info[uuid] = { 'cycles': [ ],
                                        'address': '',
                                        'cpu_name': json['cpu_name'],
                                        'cpu_info': json['cpu_info'],
                                        'reason': json['reason'],
                                        'os': json['os_pretty_version'] }
                if 'frames' in ct:
                    frames = ct['frames']
                    functions = [ ]
                    # we get the functions in the backtrace (to check if there is a recursion)
                    for frame in frames:
                        if 'function' in frame:
                            functions.append(frame['function'])
                    # check for duplicated entries in function
                    self.info[uuid]['cycles'] = Backtrace.__cycles_detection(functions)
                if 'crash_info' in jd:
                    addr = jd['crash_info']['address']
                    self.info[uuid]['address'] = addr


    def __hginfo_cb(self, sess, res):
        json = res.json()
        if 'json_dump' in json:
            uuid = json['uuid']
            jd = json['json_dump']
            if 'crashedThread' in json and 'threads' in jd:
                ct = json['crashedThread']
                ct = jd['threads'][ct]
                
                self.info[uuid] = { 'hgfiles': [ ] }
                if 'frames' in ct:
                    frames = ct['frames']
                    files = [ ]
                    # _files is just used to avoid duplicated in files
                    _files = set()
                    for frame in frames:
                        if 'file' in frame:
                            f = frame['file']
                            if f not in _files:
                                files.append(f)
                                _files.add(f)
                                
                    self.info[uuid] = files
        
    def __get_info(self):
        header = { 'Auth-Token': self.__get_apikey() }
        if self.just_hg:
            self.results.append(self.session.get(self.PROCESSED_URL,
                                                 params = { 'crash_id': self.uuids[0] },
                                                 headers = header,
                                                 timeout = self.TIMEOUT,
                                                 background_callback = self.__hginfo_cb))
            return
        
        if self.fraction != 1:
            L = len(self.uuids)
            indices = set()
            ratio = self.fraction if self.fraction <= 0.5 else 1 - self.fraction 
            N = int(float(L) * ratio)
            # we analyze only a fraction of all the uuids
            while len(indices) != N:
                indices.add(randint(0, L - 1))
            if self.fraction <= 0.5:
                uuids = [self.uuids[i] for i in indices]
            else:
                uuids = [ ]
                for i in range(L):
                    if i not in indices:
                        uuids.append(self.uuids[i])
        else:
            uuids = self.uuids
            
        for uuid in uuids:
            self.results.append(self.session.get(self.PROCESSED_URL,
                                                 params = { 'crash_id': uuid },
                                                 headers = header,
                                                 timeout = self.TIMEOUT,
                                                 background_callback = self.__info_cb))
Ejemplo n.º 19
0
    def addManyMementos(self, urims):

        module_logger.info("started with {} URI-Ms for processing...".format(len(urims)))

        # protect the function from duplicates in the urims list
        urims = list(set(urims))

        module_logger.info("found duplicates, now using {} URI-Ms for processing...".format(len(urims)))

        futuressession = FuturesSession(session=self.session)

        retry = Retry(
            total=10,
            read=10,
            connect=10,
            backoff_factor=0.3,
            status_forcelist=(500, 502, 504)
        )
        adapter = HTTPAdapter(max_retries=retry)
        futuressession.mount('http://', adapter)
        futuressession.mount('https://', adapter)

        futures = {}
        raw_futures = {}

        working_urim_list = []
        
        raw_urims = []

        for uri in urims:

            # raw_urim = otmt.generate_raw_urim(uri)
            working_urim_list.append(uri)
            futures[uri] = futuressession.get(uri)
            # futures[raw_urim] = futuressession.get(raw_urim)

        working_starting_size = len(working_urim_list)

        def uri_generator(urilist):

            while len(urilist) > 0:

                uchoice = random.choice(urilist)

                yield uchoice

        for uri in uri_generator(working_urim_list):

            if futures[uri].done():

                module_logger.debug("URI-M {} is done, processing...".format(uri))

                if len(working_urim_list) % 100 == 0:
                    module_logger.info("{}/{} mementos left to process".format(len(working_urim_list), working_starting_size))

                try:
                    r = futures[uri].result()

                    if len(r.history) == 0:
                        raw_urim = otmt.generate_raw_urim(uri)
                    else:
                        raw_urim = otmt.generate_raw_urim(r.url)

                    raw_urims.append( raw_urim )

                    if 'memento-datetime' not in r.headers:
                        self.addMementoError(uri, "URI-M {} does not produce a memento".format(uri))
                    else:
                        # the content should be cached by the session
                        # we just need to keep track of the URI-Ms for this run
                        self.urimlist.append(uri)

                except Exception as e:
                    self.addMementoError(uri, repr(e))

                working_urim_list.remove(uri)
                del futures[uri]

        module_logger.info("done adding {} mementos, now adding corresponding {} raw mementos...".format( len(urims), len(raw_urims) ))

        working_raw_urim_list = []

        for raw_urim in list(set(raw_urims)):

            working_raw_urim_list.append(raw_urim)
            raw_futures[raw_urim] = futuressession.get(raw_urim)

        working_rawurims_starting_size = len(working_raw_urim_list)

        # for raw_urim in uri_generator(working_raw_urim_list):

        while len(working_raw_urim_list) > 0:

            raw_urim = random.choice(working_raw_urim_list)

            module_logger.debug("fetching results for raw URI-M {}".format(raw_urim))
            # module_logger.debug("are the keys the same as the working list: {}".format( set(working_raw_urim_list) == set(list(raw_futures.keys())) ) )
            module_logger.debug("raw mementos working list size: {}".format(len(working_raw_urim_list)))
            module_logger.debug("raw mementos futures keys size: {}".format(len(raw_futures)))

            # try:
            #     raw_futures[raw_urim]
            # except KeyError:
            #     module_logger.error("{} is not in futures".format(raw_urim))
            #     module_logger.error("is it: {}".format( raw_urim in raw_futures ))
            #     module_logger.error("")
            #     module_logger.error("working list follows:")
            #     module_logger.error(pp.pformat(working_raw_urim_list))
            #     module_logger.error("")
            #     module_logger.error("raw_futures keys follows:")
            #     module_logger.error(pp.pformat(list(raw_futures.keys())))
                

            if raw_futures[raw_urim].done():
                module_logger.debug("raw URI-M {} is done, processing...".format(raw_urim))

                if len(working_raw_urim_list) % 100 == 0:
                    module_logger.info("{}/{} raw mementos left to process".format(len(working_raw_urim_list), working_rawurims_starting_size))

                try:
                    r = raw_futures[raw_urim].result()

                    if 'memento-datetime' not in r.headers:
                        self.addMementoError(uri, "raw URI-M {} does not produce a memento".format(raw_urim))
                    else:
                        # the content should be cached by the session
                        # we just need to keep track of the raw URI-Ms for this run
                        self.urimlist.append(raw_urim)

                except Exception as e:
                    self.addMementoError(raw_urim, repr(e))

                # module_logger.debug("removing {} from working raw URI-M list and raw futures keys".format(raw_urim))
                working_raw_urim_list.remove(raw_urim)
                del raw_futures[raw_urim]
                # module_logger.debug("raw URI-M {} in working raw URI-M list still? {}".format( raw_urim, raw_urim in working_raw_urim_list ))
                time.sleep(1)
Ejemplo n.º 20
0
class Connection(object):
    """Represents a connection to a server
    """

    TIMEOUT = 30
    MAX_RETRIES = 256
    MAX_WORKERS = multiprocessing.cpu_count()
    CHUNK_SIZE = 32
    TOKEN = ''
    USER_AGENT = config.get('User-Agent', 'name', 'libmozdata')
    X_FORWARDED_FOR = utils.get_x_fwed_for_str(
        config.get('X-Forwarded-For', 'data', ''))

    # Error 429 is for 'Too many requests' => we retry
    STATUS_FORCELIST = [429]

    def __init__(self, base_url, queries=None, **kwargs):
        """Constructor

        Args:
            base_url (str): the server's url
            queries (Optional[Query]): the queries
        """

        self.session = FuturesSession(max_workers=self.MAX_WORKERS)
        retries = Retry(total=Connection.MAX_RETRIES,
                        backoff_factor=1,
                        status_forcelist=Connection.STATUS_FORCELIST)
        self.session.mount(base_url, HTTPAdapter(max_retries=retries))
        self.results = []
        self.queries = queries

        if kwargs:
            if 'timeout' in kwargs:
                self.TIMEOUT = kwargs['timeout']
            if 'max_retries' in kwargs:
                self.MAX_RETRIES = kwargs['max_retries']
            if 'max_workers' in kwargs:
                self.MAX_WORKERS = kwargs['max_workers']
            if 'user_agent' in kwargs:
                self.USER_AGENT = kwargs['user_agent']
            if 'x_forwarded_for' in kwargs:
                self.X_FORWARDED_FOR = utils.get_x_fwded_for_str(
                    kwargs['x_forwarded_for'])

        self.exec_queries()

    def __get_cb(self, query):
        """Get the callback to use when data have been retrieved

        Args:
            query (Query): the query

        Returns:
            function: the callback for the query
        """
        def cb(sess, res):
            if res.status_code == 200:
                try:
                    response = res.json()
                except:
                    response = res.text

                if query.handlerdata is not None:
                    query.handler(response, query.handlerdata)
                else:
                    query.handler(response)
            else:
                print('Connection error:')
                print('   url: ', res.url)
                print('   text: ', res.text)

        return cb

    def wait(self):
        """Just wait that all the queries have been treated
        """
        for r in self.results:
            r.result()

    def get_apikey(self):
        """Get the api key

        Returns:
            str: the api key
        """
        return self.TOKEN

    def get_header(self):
        """Get the header to use each query

        Returns:
            dict: the header
        """
        if self.X_FORWARDED_FOR:
            return {
                'User-Agent': self.USER_AGENT,
                'X-Forwarded-For': self.X_FORWARDED_FOR,
                'Connection': 'close'
            }
        else:
            return {'User-Agent': self.USER_AGENT, 'Connection': 'close'}

    def get_auth(self):
        """Get the auth to use each query

        Returns:
            dict: the auth
        """
        return None

    def exec_queries(self, queries=None):
        """Set and exec some queries

        Args:
            queries (Optional[Query]): the queries to exec
        """
        if queries:
            self.queries = queries

        if self.queries:
            if isinstance(self.queries, Query):
                self.queries = [self.queries]

            header = self.get_header()
            auth = self.get_auth()

            for query in self.queries:
                cb = self.__get_cb(query)
                if query.params:
                    if isinstance(query.params, dict):
                        self.results.append(
                            self.session.get(query.url,
                                             params=query.params,
                                             headers=header,
                                             auth=auth,
                                             verify=True,
                                             timeout=self.TIMEOUT,
                                             background_callback=cb))
                    else:
                        for p in query.params:
                            self.results.append(
                                self.session.get(query.url,
                                                 params=p,
                                                 headers=header,
                                                 auth=auth,
                                                 verify=True,
                                                 timeout=self.TIMEOUT,
                                                 background_callback=cb))
                else:
                    self.results.append(
                        self.session.get(query.url,
                                         headers=header,
                                         auth=auth,
                                         verify=True,
                                         timeout=self.TIMEOUT,
                                         background_callback=cb))

    @staticmethod
    def chunks(l, chunk_size=CHUNK_SIZE):
        """Get chunk from a list

        Args:
            l (List): data to chunkify
            chunk_size (Optional[int]): the size of each chunk

        Yields:
            a chunk from the data
        """
        for i in range(0, len(l), chunk_size):
            yield l[i:(i + chunk_size)]
Ejemplo n.º 21
0
class HTTPDriver(BaseDriver):
    """HTTPDriver

    The :class:`HTTPDriver` class reads SBP messages from an HTTP
    service for a device and writes out to a stream. This driver is like
    a file-handle with read and writes over two separately HTTP
    connections, but can also be enabled and disabled by its consumer.

    Parameters
    ----------
    device_uid : uid
      Device unique id
    url : str
      HTTP endpoint
    retries : tuple
      Configure connect and read retry count. Defaults to
      (MAX_CONNECT_RETRIES, MAX_READ_RETRIES).
    timeout : tuple
      Configure connect and read timeouts. Defaults to
      (DEFAULT_CONNECT_TIMEOUT, DEFAULT_READ_TIMEOUT).

    """
    def __init__(
        self,
        device_uid=None,
        url="https://broker.staging.skylark.swiftnav.com",
        retries=DEFAULT_RETRIES,
        timeout=DEFAULT_TIMEOUT,
    ):
        self._retry = Retry(connect=DEFAULT_RETRIES[0],
                            read=DEFAULT_RETRIES[1],
                            redirect=MAX_REDIRECTS,
                            status_forcelist=[500],
                            backoff_factor=DEFAULT_BACKOFF_FACTOR)
        self.url = url
        self.read_session = requests.Session()
        self.read_session.mount(
            "http://",
            HTTPAdapter(pool_connections=DEFAULT_POOLSIZE,
                        pool_maxsize=DEFAULT_POOLSIZE,
                        pool_block=DEFAULT_POOLBLOCK,
                        max_retries=self._retry))
        self.read_session.mount(
            "https://",
            HTTPAdapter(pool_connections=DEFAULT_POOLSIZE,
                        pool_maxsize=DEFAULT_POOLSIZE,
                        pool_block=DEFAULT_POOLBLOCK,
                        max_retries=self._retry))
        self.write_session = None
        self.device_uid = device_uid
        self.timeout = timeout
        self.read_response = None
        self.write_response = None
        self.source = None

    def flush(self):
        """File-flush wrapper (noop).

        """
        pass

    def close(self):
        """File-handle close wrapper (noop).

        """
        try:
            self.read_close()
            self.write_close()
        except:
            pass

    @property
    def write_ok(self):
        """
        Are we connected for writes?
        """
        # Note that self.write_response is either None or a Response
        # object, which cast to False for 4xx and 5xx HTTP codes.
        return bool(self.write_response)

    def connect_write(self, source, whitelist, device_uid=None, pragma=None):
        """Initialize a streaming write HTTP response. Manually connects the
        underlying file-handle. In the event of a network disconnection,
        use to manually reinitiate an HTTP session.

        Parameters
        ----------
        source : sbp.client.handler.Handler
          Iterable source of SBP messages.
        whitelist : [int]
          Whitelist of messages to write

        """
        header_device_uid = device_uid or self.device_uid
        headers = {
            'Device-Uid': header_device_uid,
            'Content-Type': BROKER_SBP_TYPE,
            'Pragma': pragma
        }
        if not pragma:
            del headers['Pragma']
        try:
            self.executor = ThreadPoolExecutor(max_workers=DEFAULT_POOLSIZE)
            self.write_session = FuturesSession(executor=self.executor)
            self.write_session.mount(
                "http://",
                HTTPAdapter(pool_connections=DEFAULT_POOLSIZE,
                            pool_maxsize=DEFAULT_POOLSIZE,
                            pool_block=DEFAULT_POOLBLOCK,
                            max_retries=self._retry))
            self.write_session.mount(
                "https://",
                HTTPAdapter(pool_connections=DEFAULT_POOLSIZE,
                            pool_maxsize=DEFAULT_POOLSIZE,
                            pool_block=DEFAULT_POOLBLOCK,
                            max_retries=self._retry))
            self.source = source.filter(whitelist)
            gen = (msg.pack() for msg, _ in self.source)
            self.write_session.put(self.url, data=gen, headers=headers)
            self.write_response = True
        except requests.exceptions.ConnectionError:
            msg = "Client connection error to %s with [PUT] headers %s" \
                  % (self.url, headers)
            warnings.warn(msg)
        except requests.exceptions.ConnectTimeout:
            msg = "Client connection timeout to %s with [PUT] headers %s" \
                  % (self.url, headers)
            warnings.warn(msg)
        except requests.exceptions.RetryError:
            msg = "Client retry error to %s with [PUT] headers %s" \
                  % (self.url, headers)
            warnings.warn(msg)
        except requests.exceptions.ReadTimeout:
            msg = "Client read timeout to %s with [PUT] headers %s" \
                  % (self.url, headers)
            warnings.warn(msg)
        return self.write_ok

    def write(self, data):
        """Write wrapper (noop). Actual stream is initiated by the write
        connection.

        Parameters
        ----------
        data : object
          Data to write.

        """
        pass

    def write_close(self):
        """File-handle close wrapper (noop).

        """
        try:
            self.write_session.close()
            self.executor.shutdown(wait=False)
            self.source.breakiter()
            self.source = None
            self.executor = None
            self.write_session = None
        except:
            pass

    @property
    def read_ok(self):
        """
        Are we connected for reads?
        """
        return bool(self.read_response)

    def connect_read(self, device_uid=None, pragma=None):
        """Initialize a streaming read/write HTTP response. Manually connects
        the underlying file-handle. In the event of a network
        disconnection, use to manually reinitiate an HTTP session.

        """
        header_device_uid = device_uid or self.device_uid
        headers = {
            'Device-Uid': header_device_uid,
            'Accept': BROKER_SBP_TYPE,
            'Pragma': pragma
        }
        if not pragma:
            del headers['Pragma']
        try:
            self.read_response = self.read_session.get(self.url,
                                                       stream=True,
                                                       headers=headers,
                                                       timeout=self.timeout)
        except requests.exceptions.ConnectionError:
            msg = "Client connection error to %s with [GET] headers %s" \
                  % (self.url, headers)
            warnings.warn(msg)
        except requests.exceptions.ConnectTimeout:
            msg = "Client connection timeout to %s with [GET] headers %s" \
                  % (self.url, headers)
            warnings.warn(msg)
        except requests.exceptions.RetryError:
            msg = "Client retry error to %s with [GET] headers %s" \
                  % (self.url, headers)
            warnings.warn(msg)
        except requests.exceptions.ReadTimeout:
            msg = "Client read timeout to %s with [GET] headers %s" \
                  % (self.url, headers)
            warnings.warn(msg)
        return self.read_ok

    def read(self, size):
        """Read wrapper. If the client connection is closed or some other
        exception is thrown, raises an IOError.

        Parameters
        ----------
        size : int
          Size to read (in bytes).

        Returns
        ----------
        bytearray, or None

        """
        if self.read_response is None or not self.device_uid:
            raise ValueError("Invalid/insufficient HTTP request parameters!")
        elif not self.read_ok or self.read_response.raw.closed:
            raise IOError("HTTP read closed?!")
        try:
            return self.read_response.raw.read(size)
        except:
            raise IOError("HTTP read error!")

    def read_close(self):
        """File-handle close wrapper (noop).

        """
        try:
            self.read_response.close()
            self.read_response = None
        except:
            pass
Ejemplo n.º 22
0
class Connection(object):
    """Represents a connection to a server
    """

    TIMEOUT = 30
    MAX_RETRIES = 256
    MAX_WORKERS = multiprocessing.cpu_count()
    CHUNK_SIZE = 32
    TOKEN = ''

    # Error 429 is for 'Too many requests' => we retry
    STATUS_FORCELIST = [429]

    def __init__(self, base_url, queries=None, **kwargs):
        """Constructor

        Args:
            base_url (str): the server's url
            queries (Optional[Query]): the queries
        """

        self.session = FuturesSession(max_workers=self.MAX_WORKERS)
        retries = Retry(total=Connection.MAX_RETRIES, backoff_factor=1, status_forcelist=Connection.STATUS_FORCELIST)
        self.session.mount(base_url, HTTPAdapter(max_retries=retries))
        self.results = []
        self.queries = queries

        if kwargs:
            if 'timeout' in kwargs:
                self.TIMEOUT = kwargs['timeout']
            if 'max_retries' in kwargs:
                self.MAX_RETRIES = kwargs['max_retries']
            if 'max_workers' in kwargs:
                self.MAX_WORKERS = kwargs['max_workers']

        self.exec_queries()

    def __get_cb(self, query):
        """Get the callback to use when data have been retrieved

        Args:
            query (Query): the query

        Returns:
            function: the callback for the query
        """
        def cb(sess, res):
            if res.status_code == 200:
                try:
                    response = res.json()
                except:
                    response = res.text

                if query.handlerdata is not None:
                    query.handler(response, query.handlerdata)
                else:
                    query.handler(response)
            else:
                print('Connection error:')
                print('   url: ', res.url)
                print('   text: ', res.text)

        return cb

    def wait(self):
        """Just wait that all the queries have been treated
        """
        for r in self.results:
            r.result()

    def get_apikey(self):
        """Get the api key

        Returns:
            str: the api key
        """
        return self.TOKEN

    def get_header(self):
        """Get the header to use each query

        Returns:
            dict: the header
        """
        return {'User-Agent': 'clouseau', 'Connection': 'close'}

    def get_auth(self):
        """Get the auth to use each query

        Returns:
            dict: the auth
        """
        return None

    def exec_queries(self, queries=None):
        """Set and exec some queries

        Args:
            queries (Optional[Query]): the queries to exec
        """
        if queries:
            self.queries = queries

        if self.queries:
            if isinstance(self.queries, Query):
                self.queries = [self.queries]

            header = self.get_header()
            auth = self.get_auth()

            for query in self.queries:
                cb = self.__get_cb(query)
                if query.params:
                    if isinstance(query.params, dict):
                        self.results.append(self.session.get(query.url,
                                                             params=query.params,
                                                             headers=header,
                                                             auth=auth,
                                                             verify=True,
                                                             timeout=self.TIMEOUT,
                                                             background_callback=cb))
                    else:
                        for p in query.params:
                            self.results.append(self.session.get(query.url,
                                                                 params=p,
                                                                 headers=header,
                                                                 auth=auth,
                                                                 verify=True,
                                                                 timeout=self.TIMEOUT,
                                                                 background_callback=cb))
                else:
                    self.results.append(self.session.get(query.url,
                                                         headers=header,
                                                         auth=auth,
                                                         verify=True,
                                                         timeout=self.TIMEOUT,
                                                         background_callback=cb))

    @staticmethod
    def chunks(l, chunk_size=CHUNK_SIZE):
        """Get chunk from a list

        Args:
            l (List): data to chunkify
            chunk_size (Optional[int]): the size of each chunk

        Yields:
            a chunk from the data
        """
        for i in range(0, len(l), chunk_size):
            yield l[i:(i + chunk_size)]
Ejemplo n.º 23
0
            day.update({'type_id': resp.type_id, 'region_id': resp.region_id})
            writer.writerow(day)
    except Exception as e:
        logger.exception(e)


if __name__ == '__main__':
    session = FuturesSession(max_workers=50)
    session.headers.update({'UserAgent': 'Fuzzwork Market Monitor'})
    # https://stackoverflow.com/questions/40417503/applying-retry-on-grequests-in-python
    retries = Retry(total=50,
                    backoff_factor=0.5,
                    status_forcelist=[500, 502, 503, 504],
                    raise_on_redirect=True,
                    raise_on_status=True)
    session.mount('http://', HTTPAdapter(max_retries=retries, pool_maxsize=50))
    session.mount('https://', HTTPAdapter(max_retries=retries,
                                          pool_maxsize=50))

    regions = pd.read_csv('data/mapRegions.csv.bz2')
    types = pd.read_csv('data/invTypes.csv.bz2',
                        usecols=['typeID', 'published', 'marketGroupID'])
    types = types.query('published == 1 and marketGroupID != "None"')

    with gzip.open('data/history-latest.csv.gz', 'wt') as csvfile:
        fieldnames = [
            'type_id', 'region_id', 'date', 'lowest', 'highest', 'average',
            'volume', 'order_count'
        ]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
Ejemplo n.º 24
0
	addressEndpoint = '/api/Wallet/addresses'
	walletEndpoint = '/api/Wallet/files'
	buildTxEndpoint= '/api/Wallet/build-transaction'
	sendTxEndpoint= '/api/Wallet/send-transaction'
	mnemonicEndpoint='/api/Wallet/mnemonic'
	createWalletEndpoint='/api/Wallet/create'
	restoreWalletEndpoint='/api/Wallet/recover'
	nodeStatusEndpoint='/api/Dashboard/Stats'

	apiSession = requests.session()
	futuresSession=FuturesSession()

	retryCount=Retry(total=3,backoff_factor=0.1,status_forcelist=(400, 500, 502, 504))
	apiSession.mount('http://', HTTPAdapter(max_retries=retryCount))
	apiSession.mount('https://', HTTPAdapter(max_retries=retryCount))
	futuresSession.mount('http://', HTTPAdapter(max_retries=retryCount))
	futuresSession.mount('https://', HTTPAdapter(max_retries=retryCount))

	#refresh interval
	secToRefresh=int(xConfig['SETTINGS']['REFRESH_INTERVAL'])
	secCounter=0
	
	#GUI	
	QFontDatabase.addApplicationFont(":/base/Roboto-Regular.ttf")
	app.setFont(QFont("Roboto"))
	mainWin=QStackedWidget()
	walletPage=QWidget()
	dashboardPage=QWidget()
	sendPage=QWidget()
	settingsPage=QWidget()
	createRestorePage=QWidget()
Ejemplo n.º 25
0
class Dota_API():
    api_keys = [
        'FE70CE9FC0D6D99279498CE852587F59', '2FEC67172AAC0C393EC209A225A7E51E'
    ]
    api_key_num = 1
    api_key = api_keys[api_key_num]

    ips = ['162.213.199.143', '162.213.199.31']
    ip_num = 0

    data_source = 4

    headers = {'User-Agent': 'Script by Grue'}

    errors = 0

    session = FuturesSession()
    session.mount('http://', source.SourceAddressAdapter(ips[ip_num]))

    def matches_get(self, req_type=1, n_id='', **kwargs):
        if (req_type < 4):
            url = 'https://api.steampowered.com/IDOTA2Match_570/'
            url += 'GetMatchHistoryBySequenceNum' if req_type == 1 else 'GetMatchHistory'
            url += '/V001/?key=' + self.api_key + '&min_players=10&'
            if (req_type == 1):
                url += 'start_at_match_seq_num'
            elif (req_type == 2):
                url += 'start_at_match_id'
            elif (req_type == 3):
                url += 'account_id'
            url += '=' + str(n_id)
            if req_type != 1:
                url += '&skill=3'
        elif req_type == 4:
            url = 'http://www.dotabuff.com/matches/' + str(n_id)
        elif req_type == 5:
            url = 'http://dotamax.com/match/detail/' + str(n_id)
        elif req_type == 6:
            url = 'http://api.opendota.com/api/matches/' + str(n_id)
        return dict(req=self.session.get(url, timeout=7, headers=self.headers),
                    req_type=req_type,
                    n_id=n_id,
                    url=url,
                    ip_num=self.ip_num)

    def matches_result(self, request):
        req = request['req']
        try:
            res = req.result()
        except (requests.ConnectionError, requests.Timeout,
                socket.timeout) as e:
            return self.retry_request(request)
        if (res.status_code != 200):
            if (res.status_code == 404 and request['req_type'] == 6):
                #not found
                return None
            self.session = FuturesSession()
            # if last IP cycle through data sources
            if self.ip_num == len(self.ips) - 1:
                if request['req_type'] == 4 or request['req_type'] == 6:
                    request['req_type'] = (4 if
                                           (request['req_type'] == 6) else 6)
                    self.data_source = request['req_type']
            self.ip_num = (request['ip_num'] + 1) % len(self.ips)
            self.session.mount(
                'http://', source.SourceAddressAdapter(self.ips[self.ip_num]))
            return self.retry_request(request, sleep=1)
        if request['req_type'] == 4:
            return self.parse_skill(res)
        if request['req_type'] == 5:
            return self.parse_dota_max(res)
        if request['req_type'] == 6:
            #switch IPs and wait 0.5 seconds so that it is 1 request per second per IP
            time.sleep(1 / len(self.ips))
            self.ip_num = (request['ip_num'] + 1) % len(self.ips)
            self.session.mount(
                'http://', source.SourceAddressAdapter(self.ips[self.ip_num]))
            return self.parse_opendota_skill(res)
        try:
            matches = res.json()['result']['matches']
        except:
            if (request['req_type'] == 3):
                return []
            return self.retry_request(request)
        if len(matches) == 0:
            return self.retry_request(request)
        return matches

    def retry_request(self, request, sleep=7):
        #print(request)
        self.errors += 1
        time.sleep(sleep)
        return self.matches_result(self.matches_get(**request))

    def parse_skill(self, response):
        html = response.text
        end_index = html.find(' Skill</dd>')
        if end_index > -1:
            html = html[:end_index]
        else:
            return None
        start_index = html.rfind('<dd>')
        if start_index > -1:
            html = html[start_index + 4:]
        else:
            return None
        return html

    def parse_dota_max(self, response):
        html = response.text
        html_split = html.split('<td><font style="color: #f0a868;">')
        if len(html_split) > 1:
            html = html_split[1]
        else:
            return None
        html_split = html.split('</font></td>')
        if len(html_split) > 1:
            html = html_split[0]
        else:
            return None
        return html

    def parse_opendota_skill(self, response):
        m = response.json()
        if 'skill' not in m:
            return None
        if m['skill'] == 3:
            return 'Very High'
        return m['skill']
Ejemplo n.º 26
0
class AsyncConnection(AbstractConnection):
    def __init__(self,
                 *,
                 base_url,
                 disable_ssl_certificate,
                 token_manager,
                 retries,
                 max_requests_workers=6,
                 proxy_url=None):
        super().__init__(base_url=base_url,
                         disable_ssl_certificate=disable_ssl_certificate,
                         token_manager=token_manager,
                         retries=retries)

        executor = cf.ThreadPoolExecutor(max_workers=max_requests_workers)
        adapter_kwargs = {
            'pool_connections': max_requests_workers,
            'pool_maxsize': max_requests_workers,
            'max_retries': self._retries,
            'pool_block': True
        }
        self._asession = FuturesSession(executor=executor)
        self._asession.mount('https://', HTTPAdapter(**adapter_kwargs))
        self._asession.mount('http://', HTTPAdapter(**adapter_kwargs))
        if proxy_url is not None:
            self._asession.proxies = {
                'http': proxy_url,
                'https': proxy_url,
            }
        self._access_token_lock = Lock()
        self._max_requests_workers = max_requests_workers

    @property
    def executor(self):
        return self._asession.executor

    @property
    def max_request_workers(self):
        return self._max_requests_workers

    def _add_authorization_maybe(self, headers: dict, url: str):
        with self._access_token_lock:
            super()._add_authorization_maybe(headers, url)

    def post(self, path, headers=None, callback=None, data=None, timeout=30.0):
        url = urljoin(self._base_url, path)
        params = {
            'method': 'POST',
            'url': url,
            'headers': headers,
            'data': data,
            'verify': (not self._disable_ssl_certificate),
            'timeout': timeout
        }
        return self._send_request(params, on_finish_callback=callback)

    def put(self, path, headers=None, callback=None, files=None, timeout=30.0):
        url = urljoin(self._base_url, self._encode_spaces(path))
        params = {
            'method': 'PUT',
            'url': url,
            'headers': headers,
            'files': files,
            'verify': (not self._disable_ssl_certificate),
            'timeout': timeout
        }
        return self._send_request(params=params, on_finish_callback=callback)

    def _send_request(self, params, on_finish_callback):
        params['headers'] = params['headers'] or {}
        self._add_authorization_maybe(params['headers'], params['url'])
        self._add_user_agent(params['headers'])
        try:
            token = params['headers']['Authorization'].split(
                'Bearer')[1].strip()
        except KeyError:
            token = None

        def extended_callback(response, *args, **kwargs):
            if response.status_code == 401:
                LOGGER.debug('Got a 401 status')
                skip = self._skip_token_renewal(params['url'])
                if not skip:
                    with self._access_token_lock:  # block concurrent send requests
                        renewed = (token !=
                                   self._token_manager.token.access_token)
                        if renewed:
                            LOGGER.debug('Token already renewed')
                        else:
                            self._renew_token()

            if on_finish_callback:
                on_finish_callback(response)

        c_params = params
        c_params['hooks'] = {'response': extended_callback}
        LOGGER.debug('Making request {} to {}'.format(params['method'],
                                                      params['url']))
        return self._asession.request(**c_params)
Ejemplo n.º 27
0
class MultiRequest(object):
    """Wraps requests-futures to make simultaneous HTTP requests.

    Can use a RateLimiter to limit # of outstanding requests.
    Can also use AvailabilityLimiter to limit total # of request issuance threshold.
    `multi_get` and `multi_post` try to be smart about how many requests to issue:

    * One url & one param - One request will be made.
    * Multiple url & one query param - Multiple requests will be made, with differing urls and the same query param.
    * Multiple url & multiple query params - Multiple requests will be made, with the same url and differing query params.
    """

    _VERB_GET = 'GET'
    _VERB_POST = 'POST'

    def __init__(
        self,
        default_headers=None,
        max_requests=10,
        rate_limit=0,
        req_timeout=None,
        max_retry=10,
        total_retry=100,
        drop_404s=False,
    ):
        """Create the MultiRequest.

        Args:
            default_headers - A dict of headers which will be added to every request
            max_requests - Maximum number of requests to issue at once
            rate_limit - Maximum number of requests to issue per second
            req_timeout - Maximum number of seconds to wait without reading a response byte before deciding an error has occurred
            max_retry - The total number of attempts to retry a single batch of requests
            total_retry - The total number of request retries that can be made through the entire session
        Note there is a difference between `max_retry` and `total_retry`:
            - `max_retry` refers to how many times a batch of requests will be re-issued collectively
            - `total_retry` refers to a limit on the total number of outstanding requests made
            Once the latter is exhausted, no failed request within the whole session will be retried.
        """
        self._default_headers = default_headers
        self._max_requests = max_requests
        self._req_timeout = req_timeout or 25.0
        self._max_retry = max_retry
        self._drop_404s = drop_404s
        self._rate_limiter = RateLimiter(rate_limit) if rate_limit else None
        self._availability_limiter = AvailabilityLimiter(
            total_retry) if total_retry else None
        self._session = FuturesSession(max_workers=max_requests)
        retries = Retry(total=0,
                        status_forcelist=[500, 502, 503, 504],
                        raise_on_status=True)
        self._session.mount(
            'https://',
            SSLAdapter(
                max_retries=retries,
                pool_maxsize=max_requests,
                pool_connections=max_requests,
            ),
        )

    def multi_get(self,
                  urls,
                  query_params=None,
                  to_json=True,
                  file_download=False):
        """Issue multiple GET requests.

        Args:
            urls - A string URL or list of string URLs
            query_params - None, a dict, or a list of dicts representing the query params
            to_json - A boolean, should the responses be returned as JSON blobs
            file_download - A boolean, whether a file download is expected

        Returns:
            a list of dicts if to_json is set of requests.response otherwise.
        Raises:
            InvalidRequestError - Can not decide how many requests to issue.
        """
        return self._multi_request(
            MultiRequest._VERB_GET,
            urls,
            query_params,
            data=None,
            to_json=to_json,
            file_download=file_download,
        )

    def multi_post(self,
                   urls,
                   query_params=None,
                   data=None,
                   to_json=True,
                   send_as_file=False):
        """Issue multiple POST requests.

        Args:
            urls - A string URL or list of string URLs
            query_params - None, a dict, or a list of dicts representing the query params
            data - None, a dict or string, or a list of dicts and strings representing the data body.
            to_json - A boolean, should the responses be returned as JSON blobs
            send_as_file - A boolean, should the data be sent as a file.
        Returns:
            a list of dicts if to_json is set of requests.response otherwise.
        Raises:
            InvalidRequestError - Can not decide how many requests to issue.
        """
        return self._multi_request(
            MultiRequest._VERB_POST,
            urls,
            query_params,
            data,
            to_json=to_json,
            send_as_file=send_as_file,
        )

    def _create_request(self,
                        verb,
                        url,
                        query_params=None,
                        data=None,
                        send_as_file=False):
        """Helper method to create a single post/get requests.

        Args:
            verb - MultiRequest._VERB_POST or MultiRequest._VERB_GET
            url - A string URL
            query_params - None or a dict
            data - None or a string or a dict
            send_as_file - A boolean, should the data be sent as a file.
        Returns:
            requests.PreparedRequest
        Raises:
            InvalidRequestError - if an invalid verb is passed in.
        """

        # Prepare a set of kwargs to make it easier to avoid missing default params.
        kwargs = {
            'headers': self._default_headers,
            'params': query_params,
            'timeout': self._req_timeout,
        }

        if MultiRequest._VERB_POST == verb:
            if send_as_file:
                kwargs['files'] = {'file': data}
            else:
                kwargs['data'] = data
            return PreparedRequest(partial(self._session.post, url, **kwargs),
                                   url)
        elif MultiRequest._VERB_GET == verb:
            return PreparedRequest(partial(self._session.get, url, **kwargs),
                                   url)
        else:
            raise InvalidRequestError('Invalid verb {0}'.format(verb))

    def _zip_request_params(self, urls, query_params, data):
        """Massages inputs and returns a list of 3-tuples zipping them up.

        This is all the smarts behind deciding how many requests to issue.
        It's fine for an input to have 0, 1, or a list of values.
        If there are two inputs each with a list of values, the cardinality of those lists much match.

        Args:
            urls - 1 string URL or a list of URLs
            query_params - None, 1 dict, or a list of dicts
            data - None, 1 dict or string, or a list of dicts or strings
        Returns:
            A list of 3-tuples (url, query_param, data)
        Raises:
            InvalidRequestError - if cardinality of lists does not match
        """

        # Everybody gets to be a list
        if not isinstance(urls, list):
            urls = [urls]
        if not isinstance(query_params, list):
            query_params = [query_params]
        if not isinstance(data, list):
            data = [data]

        # Counts must not mismatch
        url_count = len(urls)
        query_param_count = len(query_params)
        data_count = len(data)

        max_count = max(url_count, query_param_count, data_count)

        if (max_count > url_count > 1 or max_count > query_param_count > 1
                or max_count > data_count > 1):
            raise InvalidRequestError(
                'Mismatched parameter count url_count:{0} query_param_count:{1} data_count:{2} max_count:{3}',
                url_count,
                query_param_count,
                data_count,
                max_count,
            )

        # Pad out lists
        if url_count < max_count:
            urls = urls * max_count
        if query_param_count < max_count:
            query_params = query_params * max_count
        if data_count < max_count:
            data = data * max_count

        return list(zip(urls, query_params, data))

    def _wait_for_response(self, requests):
        """Issues a batch of requests and waits for the responses.
        If some of the requests fail it will retry the failed ones up to `_max_retry` times.

        Args:
            requests - A list of requests
        Returns:
            A list of `requests.models.Response` objects
        Raises:
            InvalidRequestError - if any of the requests returns "403 Forbidden" response
        """
        failed_requests = []
        responses_for_requests = OrderedDict.fromkeys(requests)

        for retry in range(self._max_retry):
            try:
                logging.debug('Try #{0}'.format(retry + 1))
                self._availability_limiter.map_with_retries(
                    requests, responses_for_requests)

                failed_requests = []
                for request, response in responses_for_requests.items():
                    if self._drop_404s and response is not None and response.status_code == 404:
                        logging.warning(
                            'Request to {0} failed with status code 404, dropping.'
                            .format(request.url))
                    elif not response:
                        failed_requests.append((request, response))

                if not failed_requests:
                    break

                logging.warning(
                    'Try #{0}. Expected {1} successful response(s) but only got {2}.'
                    .format(
                        retry + 1,
                        len(requests),
                        len(requests) - len(failed_requests),
                    ))

                # retry only for the failed requests
                requests = [fr[0] for fr in failed_requests]
            except InvalidRequestError:
                raise
            except Exception as e:
                # log the exception for the informative purposes and pass to the next iteration
                logging.exception(
                    'Try #{0}. Exception occured: {1}. Retrying.'.format(
                        retry + 1, e))
                pass

        if failed_requests:
            logging.warning(
                'Still {0} failed request(s) after {1} retries:'.format(
                    len(failed_requests),
                    self._max_retry,
                ))
            for failed_request, failed_response in failed_requests:
                if failed_response is not None:
                    # in case response text does contain some non-ascii characters
                    failed_response_text = failed_response.text.encode(
                        'ascii', 'xmlcharrefreplace')
                    logging.warning(
                        'Request to {0} failed with status code {1}. Response text: {2}'
                        .format(
                            failed_request.url,
                            failed_response.status_code,
                            failed_response_text,
                        ))
                else:
                    logging.warning(
                        'Request to {0} failed with None response.'.format(
                            failed_request.url))

        return list(responses_for_requests.values())

    def _handle_file_download(self, response):
        name = None
        data = None
        try:
            name = re.findall('filename=(.+)',
                              response.headers['content-disposition'])[0]
            data = urlsafe_b64encode(
                response.text.encode('utf-8')).decode('utf-8')
        except Exception:
            logging.exception('Unable to extract download data for {} '.format(
                response.request.url))
        return {'data': {'id': name, 'text': data}}

    def _convert_to_json(self, response):
        """Converts response to JSON.
        If the response cannot be converted to JSON then `None` is returned.

        Args:
            response - An object of type `requests.models.Response`
        Returns:
            Response in JSON format if the response can be converted to JSON. `None` otherwise.
        """
        try:
            return response.json()
        except ValueError:
            logging.warning(
                'Expected response in JSON format from {0} but the actual response text is: {1}'
                .format(
                    response.request.url,
                    response.text,
                ))
        return None

    def _multi_request(self,
                       verb,
                       urls,
                       query_params,
                       data,
                       to_json=True,
                       send_as_file=False,
                       file_download=False):
        """Issues multiple batches of simultaneous HTTP requests and waits for responses.

        Args:
            verb - MultiRequest._VERB_POST or MultiRequest._VERB_GET
            urls - A string URL or list of string URLs
            query_params - None, a dict, or a list of dicts representing the query params
            data - None, a dict or string, or a list of dicts and strings representing the data body.
            to_json - A boolean, should the responses be returned as JSON blobs
        Returns:
            If multiple requests are made - a list of dicts if to_json, a list of requests responses otherwise
            If a single request is made, the return is not a list
        Raises:
            InvalidRequestError - if no URL is supplied or if any of the requests returns 403 Access Forbidden response
        """
        if not urls:
            raise InvalidRequestError('No URL supplied')

        # Break the params into batches of request_params
        request_params = self._zip_request_params(urls, query_params, data)
        batch_of_params = [
            request_params[pos:pos + self._max_requests]
            for pos in range(0, len(request_params), self._max_requests)
        ]

        # Iteratively issue each batch, applying the rate limiter if necessary
        all_responses = []
        for param_batch in batch_of_params:
            if self._rate_limiter:
                self._rate_limiter.make_calls(num_calls=len(param_batch))

            prepared_requests = [
                self._create_request(
                    verb,
                    url,
                    query_params=query_param,
                    data=datum,
                    send_as_file=send_as_file,
                ) for url, query_param, datum in param_batch
            ]

            responses = self._wait_for_response(prepared_requests)
            for response in responses:
                if response and not file_download:
                    all_responses.append(
                        self._convert_to_json(response
                                              ) if to_json else response)
                elif file_download:
                    all_responses.append(self._handle_file_download(response))
                else:
                    all_responses.append(None)

        return all_responses

    def post_file(self, url, file, to_json=True):
        request = self._create_request(MultiRequest._VERB_POST, url)
        return request

    @classmethod
    def error_handling(cls, fn):
        """Decorator to handle errors"""
        def wrapper(*args, **kwargs):
            try:
                result = fn(*args, **kwargs)
                return result
            except InvalidRequestError as e:
                write_exception(e)

                if hasattr(e, 'request'):
                    write_error_message('request {0}'.format(repr(e.request)))
                if hasattr(e, 'response'):
                    write_error_message('response {0}'.format(repr(
                        e.response)))

                raise e

        return wrapper
Ejemplo n.º 28
0
class Track(object):

    CRASH_STATS_URL = 'https://crash-stats.mozilla.com'
    SUPERSEARCH_URL = CRASH_STATS_URL + '/api/SuperSearch'
    TIMEOUT = 5
    MAX_RETRIES = 5
    MAX_WORKERS = multiprocessing.cpu_count()
    HG_PATTERN = re.compile('hg:hg.mozilla.org/mozilla-central:([^:]*):([a-z0-9]+)')
    
    def __init__(self, signature, day, day_delta = 1, credentials = None):
        self.results = [ ]
        self.credentials = credentials
        self.has_results = False
        self.day_delta = day_delta
        self.signature = signature
        self.info = { }
        self.date = utils.get_date_ymd(day)
        self.session = FuturesSession(max_workers = self.MAX_WORKERS)
        self.session.mount(self.CRASH_STATS_URL, HTTPAdapter(max_retries = self.MAX_RETRIES))
        self.__get_info()

    def get(self):
        if not self.has_results:
            for r in self.results:
                r.result()
            self.has_results = True
        return self.info

    def has_addons(self):
        return len(self.get()['addons']) != 0

    def __get_apikey(self):
        if self.credentials:
            return self.credentials['tokens'][self.CRASH_STATS_URL]
        else:
            return ''

    @staticmethod
    def __get_stats(info, field):
        l = info[field]
        total = float(info['total'])
        stats = { }
        for e in l:
            stats[e['term']] = utils.percent(float(e['count']) / total)
        return stats
    
    @staticmethod
    def __get_system_memory_use_mean(info):
        l = info['system_memory_use_percentage']
        total = float(info['total'])
        l = [(float(e['count']) / total, float(e['term'])) for e in l]  
        m = 0.
        for e in l:
            m += e[0] * e[1]

        v = 0.
        for e in l:
            v += e[0] * (m - e[1]) ** 2

        return {'mean': utils.simple_percent(round(m, 0)), 'stddev': utils.simple_percent(round(math.sqrt(v), 0))}

    @staticmethod
    def __is_weird_address(addr, cpu_name):
        if addr == '0x0':
            return True
        if utils.is64(cpu_name):
            if len(addr) <= 10:
                val = long(addr, 16)
                if val <= 1L << 16: # val <= 0xffff (ie: first 64k)
                    return True
            elif addr.startswith('0xffffffff'):
                addr = addr[10:] # 10 == len('0xffffffff')
                val = long(addr, 16)
                if val >= ((1L << 32) - (1L << 16)): # val >= 0xfffffffffff0000 (ie: last 64k)
                    return True
Ejemplo n.º 29
0
    it is simply not checked
    
    Generously provided by Juan Luis Boya
    """
    def cert_verify(self, conn, *args, **kwargs):
        """
        Avoids the verification of the SSL Hostname field

        :param Connection conn: The connection object
        """
        super(NotCheckingHostnameHTTPAdapter, self).cert_verify(conn, *args, **kwargs)
        conn.assert_hostname = False

# By changing the adapter no hostname is checked
futures_session = FuturesSession()
futures_session.mount('https://', NotCheckingHostnameHTTPAdapter())

#Creation of the temporal directory if it does not exists
if not os.path.exists(conf.TMPDIR):
    os.makedirs(conf.TMPDIR)

__UPLOADS__ = conf.TMPDIR # temporal directory were files will be stored

open_ws = set() #Set of the current alive websockets

class BaseHandler(RequestHandler):
    """
    The base class which the rest of HTTP handlers extends.
    Provides secure cookie decryption and error handling
    """
    def get_current_user(self):
Ejemplo n.º 30
0
class BaseConnection(object):
    """Base Connection Class."""
    def __init__(self,
                 debug=False,
                 method='GET',
                 proxy_host=None,
                 timeout=20,
                 proxy_port=80,
                 parallel=None,
                 escape_xml=False,
                 **kwargs):

        if debug:
            set_stream_logger()

        self.response = None
        self.request = None
        self.verb = None
        self.config = None
        self.debug = debug
        self.method = method
        self.timeout = timeout
        self.proxy_host = proxy_host
        self.proxy_port = proxy_port
        self.escape_xml = escape_xml
        self.datetime_nodes = []
        self._list_nodes = []

        self.proxies = dict()
        if self.proxy_host:
            proxy = 'http://%s:%s' % (self.proxy_host, self.proxy_port)
            self.proxies = {'http': proxy, 'https': proxy}

        self.session = FuturesSession()
        self.session.mount('http://', HTTPAdapter(max_retries=3))
        self.session.mount('https://', HTTPAdapter(max_retries=3))

        self.parallel = parallel

        self.base_list_nodes = []
        self.datetime_nodes = []

        self._reset()

    def debug_callback(self, debug_type, debug_message):
        log.debug('type: ' + str(debug_type) + ' message' + str(debug_message))

    def v(self, *args, **kwargs):
        return getValue(self.response.dict(), *args, **kwargs)

    def getNodeText(self, nodelist):
        return getNodeTextUtils(nodelist)

    def _reset(self):
        self.response = None
        self.request = None
        self.verb = None
        self._list_nodes = []
        self._request_id = None
        self._request_dict = {}
        self._time = time.time()
        self._response_content = None
        self._response_dom = None
        self._response_obj = None
        self._response_soup = None
        self._response_dict = None
        self._response_error = None
        self._resp_body_errors = []
        self._resp_body_warnings = []
        self._resp_codes = []

    def _add_prefix(self, nodes, verb):
        if verb:
            for i, v in enumerate(nodes):
                if not nodes[i].startswith(verb.lower()):
                    nodes[i] = "%sresponse.%s" % (verb.lower(),
                                                  nodes[i].lower())

    def execute(self,
                verb,
                data=None,
                list_nodes=[],
                verb_attrs=None,
                files=None):
        "Executes the HTTP request."
        log.debug('execute: verb=%s data=%s' % (verb, data))

        self._reset()

        self._list_nodes += list_nodes
        self._add_prefix(self._list_nodes, verb)

        if hasattr(self, 'base_list_nodes'):
            self._list_nodes += self.base_list_nodes

        self.build_request(verb, data, verb_attrs, files)
        self.execute_request()

        if hasattr(self.response, 'content'):
            self.process_response()
            self.error_check()

        log.debug('total time=%s' % (time.time() - self._time))

        return self.response

    def build_request(self, verb, data, verb_attrs, files=None):

        self.verb = verb
        self._request_dict = data
        self._request_id = uuid.uuid4()

        url = self.build_request_url(verb)

        headers = self.build_request_headers(verb)
        headers.update({
            'User-Agent': UserAgent,
            'X-EBAY-SDK-REQUEST-ID': str(self._request_id)
        })

        # if we are adding files, we ensure there is no Content-Type header already defined
        # otherwise Request will use the existing one which is likely not to be multipart/form-data
        # data must also be a dict so we make it so if needed

        requestData = self.build_request_data(verb, data, verb_attrs)
        if files:
            del (headers['Content-Type'])
            if isinstance(requestData, str):  # pylint: disable-msg=E0602
                requestData = {'XMLPayload': requestData}

        request = Request(
            self.method,
            url,
            data=smart_encode_request_data(requestData),
            headers=headers,
            files=files,
        )

        self.request = request.prepare()

    def build_request_headers(self, verb):
        return {}

    def build_request_data(self, verb, data, verb_attrs):
        return ""

    def build_request_url(self, verb):
        url = "%s://%s%s" % (HTTP_SSL[self.config.get('https', False)],
                             self.config.get('domain'), self.config.get('uri'))
        return url

    def execute_request(self):

        log.debug("REQUEST (%s): %s %s" %
                  (self._request_id, self.request.method, self.request.url))
        log.debug('headers=%s' % self.request.headers)
        log.debug('body=%s' % self.request.body)

        if self.parallel:
            self.parallel._add_request(self)
            return None

        self.response = self.session.send(self.request,
                                          verify=True,
                                          proxies=self.proxies,
                                          timeout=self.timeout,
                                          allow_redirects=True)

        log.debug('RESPONSE (%s):' % self._request_id)
        log.debug('elapsed time=%s' % self.response.elapsed)
        log.debug('status code=%s' % self.response.status_code)
        log.debug('headers=%s' % self.response.headers)
        log.debug('content=%s' % self.response.text)

    def process_response(self, parse_response=True):
        """Post processing of the response"""

        self.response = Response(self.response,
                                 verb=self.verb,
                                 list_nodes=self._list_nodes,
                                 datetime_nodes=self.datetime_nodes,
                                 parse_response=parse_response)

        self.session.close()
        # set for backward compatibility
        self._response_content = self.response.content

        if self.response.status_code != 200:
            self._response_error = self.response.reason

    def error_check(self):
        estr = self.error()

        if estr and self.config.get('errors', True):
            log.error(estr)
            raise ConnectionError(estr, self.response)

    def response_codes(self):
        return self._resp_codes

    def response_status(self):
        "Retuns the HTTP response status string."

        return self.response.reason

    def response_code(self):
        "Returns the HTTP response status code."

        return self.response.status_code

    def response_content(self):
        return self.response.content

    def response_soup(self):
        "Returns a BeautifulSoup object of the response."

        if not self._response_soup:
            try:
                from bs4 import BeautifulStoneSoup
            except ImportError:
                from BeautifulSoup import BeautifulStoneSoup
                log.warn(
                    'DeprecationWarning: BeautifulSoup 3 or earlier is deprecated; install bs4 instead\n'
                )

            self._response_soup = BeautifulStoneSoup(
                smart_decode(self.response_content))

        return self._response_soup

    def response_obj(self):
        log.warn('response_obj() DEPRECATED, use response.reply instead')
        return self.response.reply

    def response_dom(self):
        """ Deprecated: use self.response.dom() instead
        Returns the response DOM (xml.dom.minidom).
        """
        log.warn('response_dom() DEPRECATED, use response.dom instead')

        if not self._response_dom:
            dom = None
            content = None

            try:
                if self.response.content:
                    regex = re.compile(b'xmlns="[^"]+"')
                    content = regex.sub(b'', self.response.content)
                else:
                    content = "<%sResponse></%sResponse>" % (self.verb,
                                                             self.verb)

                dom = parseString(content)
                self._response_dom = dom.getElementsByTagName(self.verb +
                                                              'Response')[0]

            except ExpatError as e:
                raise ConnectionResponseError(
                    "Invalid Verb: %s (%s)" % (self.verb, e), self.response)
            except IndexError:
                self._response_dom = dom

        return self._response_dom

    def response_dict(self):
        "Returns the response dictionary."
        log.warn(
            'response_dict() DEPRECATED, use response.dict() or response.reply instead'
        )

        return self.response.reply

    def response_json(self):
        "Returns the response JSON."
        log.warn('response_json() DEPRECATED, use response.json() instead')

        return self.response.json()

    def _get_resp_body_errors(self):
        """Parses the response content to pull errors.

        Child classes should override this method based on what the errors in the
        XML response body look like. They can choose to look at the 'ack',
        'Errors', 'errorMessage' or whatever other fields the service returns.
        the implementation below is the original code that was part of error()
        """

        if self._resp_body_errors and len(self._resp_body_errors) > 0:
            return self._resp_body_errors

        errors = []

        if self.verb is None:
            return errors

        dom = self.response.dom()
        if dom is None:
            return errors

        return []

    def error(self):
        "Builds and returns the api error message."

        error_array = []
        if self._response_error:
            error_array.append(self._response_error)

        error_array.extend(self._get_resp_body_errors())

        if len(error_array) > 0:
            # Force all errors to be unicode in a proper way
            error_array = [smart_decode(smart_encode(e)) for e in error_array]
            error_string = u"{verb}: {message}".format(
                verb=self.verb, message=u", ".join(error_array))

            return error_string

        return None

    def opendoc(self):
        webbrowser.open(self.config.get('doc_url'))
Ejemplo n.º 31
0
class BZInfo(object):

    BZ_URL = 'https://bugzilla.mozilla.org'
    API_URL = BZ_URL + '/rest/bug'
    TIMEOUT = 60
    MAX_RETRIES = 5
    MAX_WORKERS = multiprocessing.cpu_count()
    CHUNK_SIZE = 8
    
    def __init__(self, bugids, credentials = None):
        self.results = [ ]
        self.credentials = credentials
        self.bugids = map(str, bugids)
        self.info = { }
        for bugid in self.bugids:
            self.info[bugid] = { 'ownership': [],
                                 'reviewers': set(),
                                 'commenters': { },
                                 'authorized': False }
        self.session = FuturesSession(max_workers = self.MAX_WORKERS)
        self.session.mount(self.BZ_URL, HTTPAdapter(max_retries = self.MAX_RETRIES))
        self.reply_pattern = re.compile('^\(In reply to .* comment #([0-9]+)\)')
        self.dupbug_pattern = re.compile('\*\*\* Bug [0-9]+ has been marked as a duplicate of this bug. \*\*\*')
        self.review_pattern= re.compile('review\?\(([^\)]+)\)')
        self.needinfo_pattern= re.compile('needinfo\?\(([^\)]+)\)')
        self.feedback_pattern= re.compile('feedback\?\(([^\)]+)\)')
        self.__get_info()
        self.__analyze_history()
        self.__analyze_comment()

    def get(self):
        for r in self.results:
            r.result()
        return self.info

    def get_best_collaborator(self):
        # a collaboration between A & B is when A reviews a patch of B (or reciprocally)
        # in term of graph:
        #   - each node represents a reviewer or a writter (owner)
        #   - each edge represents a collaboration
        # here we count the degree of each node and find out who's the best collaborator
        # TODO: use this graph to get other metrics (??)

        # it could be interesting to weight each contribution according to its date
        # someone who made 20 contribs recently is probably better than someone 50 contribs
        # two years ago...
        # TODO: We could weight a contrib with a gaussian which depends to the time
        
        collaborations = { }
        for info in self.get().itervalues():
            if info['authorized']:
                owner = info['owner']
                if owner not in collaborations:
                    collaborations[owner] = 0
                reviewers = info['reviewers']
                feedbacks = info['feedbacks']
                collabs = set()
                if reviewers and owner in reviewers:
                    collabs |= reviewers[owner]
                if feedbacks and owner in feedbacks:
                    collabs |= feedbacks[owner]
                if collabs:
                    collaborations[owner] += len(collabs)
                    for person in collabs:
                        collaborations[person] = collaborations[person] + 1 if person in collaborations else 1
 
        # maybe we should compute the percentage of collaborations just to give an idea
 
        return utils.get_best(collaborations)

    def get_best_component_product(self):
        # Just get stats on components and products
        comps_prods = { }
        for info in self.get().itervalues():
            if info['authorized']:
                comp_prod = (info['component'], info['product'])
                comps_prods[comp_prod] = comps_prods[comp_prod] + 1 if comp_prod in comps_prods else 1

        if comps_prods:
            return utils.get_best(comps_prods)
        else:        
            return None

    def __get_apikey(self):
        if self.credentials:
            return self.credentials['tokens'][self.BZ_URL]
        else:
            return ''

    def __info_cb(self, sess, res):
        bugs = res.json()['bugs']
        for bug in bugs:
            self.info[str(bug['id'])].update({ 'authorized': True,
                                               'severity': bug['severity'],
                                               'votes': bug['votes'],
                                               'component': bug['component'],
                                               'product': bug['product'],
                                               'nbcc': len(bug['cc']),
                                               'reporter': bug['creator'],
                                               'owner': bug['assigned_to_detail']['email']})

    def __get_info(self):
        def chunks():
            for i in range(0, len(self.bugids), self.CHUNK_SIZE):
                yield self.bugids[i:(i + self.CHUNK_SIZE)]
                
        for bugids in chunks():
            bugids = ','.join(map(str, bugids))
            self.results.append(self.session.get(self.API_URL,
                                                 params = {'api_key': self.__get_apikey(),
                                                           'id': bugids},
                                                 verify = True,
                                                 timeout = self.TIMEOUT,
                                                 background_callback = self.__info_cb))

    def __history_cb(self, sess, res):
        if res.status_code == 200:
            json = res.json()
            ownership = []
            reviewers = { }
            feedbacks = { }
            if 'bugs' in json and json['bugs']: 
                bug = json['bugs'][0]
                bugid = str(bug['id'])
                history = bug['history']
                for h in history:
                    who = h['who']
                    owner = None
                    changes = h['changes']
                    for change in changes:
                        nam = change['field_name']
                        rem = change['removed']
                        add = change['added']

                        if nam == 'status':
                            if rem == 'NEW' and add == 'ASSIGNED':
                                owner = who
                        elif nam == 'assigned_to':
                            owner = add
                        elif nam == 'flagtypes.name':
                            # Get the reviewers
                            for m in self.review_pattern.finditer(add):
                                if who in reviewers:
                                    reviewers[who].add(m.group(1))
                                else:
                                    reviewers[who] = set([m.group(1)])
 
                            # Get people pinged for feedback
                            for m in self.feedback_pattern.finditer(add):
                                if who in feedbacks:
                                    feedbacks[who].add(m.group(1))
                                else:
                                    feedbacks[who] = set([m.group(1)])

                    if owner and (not ownership or ownership[-1]['owner'] != owner):
                        ownership.append({ 'owner': owner,
                                           'touch_by': who,
                                           'touch_when': h['when']} )

                self.info[bugid].update({ 'ownership': ownership,
                                          'reviewers': reviewers,
                                          'feedbacks': feedbacks})
                
    def __analyze_history(self):
        for bugid in self.bugids:
            self.results.append(self.session.get(self.API_URL + '/' + bugid + '/history',
                                                 params = { 'api_key': self.__get_apikey() },
                                                 timeout = self.TIMEOUT,
                                                 background_callback = self.__history_cb))

    def __comment_cb(self, sess, res):
        if res.status_code == 200:
            json = res.json()
            if 'bugs' in json:
                bugs = json['bugs']
                keys = bugs.keys()
                bugid = ''
                for key in keys:
                    if isinstance(key, basestring) and key.isdigit():
                        bugid = key
                        break
                if bugid:
                    commenters = { }
                    bug = bugs[bugid]
                    if 'comments' in bug:
                        comments = bug['comments']
                        authors = []
                        for comment in comments:
                            text = comment['text']
                            if not self.dupbug_pattern.match(text):
                                author = comment['author']
                                authors.append(author)
                                if author not in commenters:
                                    commenters[author] = []

                                for m in self.reply_pattern.finditer(comment['raw_text']):
                                    n = int(m.group(1))
                                    if n >= 1 and n <= len(authors):
                                        commenters[authors[n - 1]].append(author)

                        self.info[bugid].update({'commenters': commenters})

    def __analyze_comment(self):
        for bugid in self.bugids:
            self.results.append(self.session.get(self.API_URL + '/' + bugid + '/comment',
                                                 params = {'api_key': self.__get_apikey()},
                                                 timeout = self.TIMEOUT,
                                                 background_callback = self.__comment_cb))
Ejemplo n.º 32
0
    def retrieve(self, catalog, *, dry_run=False, media_type=''):
        if not dry_run:
            distributions = Distribution.objects.filter(
                division_id=catalog.division_id, http_status_code__isnull=True)

            if media_type:
                distributions = distributions.filter(mediaType=media_type)

            if not distributions.exists():
                return

            # Collect the distribution-response pairs.
            def callback(distribution, response):
                results.append([distribution, response])

            # Create a closure.
            def factory(distribution):
                return lambda session, response: callback(
                    distribution, response)

            # @see http://docs.python-requests.org/en/latest/api/#requests.adapters.HTTPAdapter
            # @see https://github.com/ross/requests-futures/blob/master/requests_futures/sessions.py
            session = FuturesSession()
            # Avoids "Connection pool is full, discarding connection" warnings.
            adapter_kwargs = {'pool_maxsize': 10}
            session.mount('https://',
                          requests.adapters.HTTPAdapter(**adapter_kwargs))
            session.mount('http://',
                          requests.adapters.HTTPAdapter(**adapter_kwargs))

            # @see https://djangosnippets.org/snippets/1949/
            pk = 0
            last_pk = distributions.order_by('-pk')[0].pk
            distributions = distributions.order_by('pk')
            while pk < last_pk:
                # @see https://github.com/ross/requests-futures/issues/18
                # @see https://github.com/ross/requests-futures/issues/5
                futures = []
                results = []

                # If an exception occurs, we lose progress on at most 100 requests.
                for distribution in distributions.filter(pk__gt=pk)[:100]:
                    pk = distribution.pk

                    # @see http://docs.python-requests.org/en/latest/user/advanced/#body-content-workflow
                    # @see http://stackoverflow.com/a/845595/244258
                    futures.append(
                        session.get(quote(distribution.accessURL,
                                          safe="%/:=&?~#+!$,;'@()*[]"),
                                    stream=True,
                                    verify=False,
                                    background_callback=factory(distribution)))

                for future in futures:
                    try:
                        future.result()
                    except (requests.exceptions.ConnectionError,
                            requests.exceptions.InvalidSchema,
                            requests.exceptions.InvalidURL,
                            requests.exceptions.MissingSchema,
                            requests.exceptions.ReadTimeout,
                            requests.exceptions.SSLError,
                            requests.exceptions.TooManyRedirects,
                            requests.packages.urllib3.exceptions.ProtocolError
                            ):
                        self.exception('')

                for distribution, response in results:
                    status_code = response.status_code
                    charset = ''

                    content_length = response.headers.get('content-length')
                    if content_length:
                        content_length = int(content_length)

                    # @see https://github.com/kennethreitz/requests/blob/b137472936cbe6a6acabab538c1d05ed4c7da638/requests/utils.py#L308
                    content_type = response.headers.get('content-type', '')
                    if content_type:
                        content_type, params = cgi.parse_header(content_type)
                        if 'charset' in params:
                            charset = params['charset'].strip("'\"")

                    distribution.http_headers = dict(response.headers)
                    distribution.http_status_code = status_code
                    distribution.http_content_length = content_length
                    distribution.http_content_type = content_type
                    distribution.http_charset = charset
                    distribution.save()

                    self.debug('{} {} {}'.format(
                        status_code, number_to_human_size(content_length),
                        content_type))

                    response.close()
        self.info('{} done'.format(catalog))
Ejemplo n.º 33
0
)
# Enable For Debugging:
logging.disable(logging.INFO)

DELAY = .05  # Second delay between calculating trades.
RGAP = .005 # Max gap before cancelling a robux split trade
TGAP = .0025 # Max gap before cancelling a tix split trade
TRADE_LAG_TIME = 1.25 # Estimate of how long it takes for Roblox to process our requests
RESET_TIME = 240 # Number of seconds the bot goes without trading before resetting last rates to be able to trade again (might result in loss)
DEQUE_SIZE = 15 # Max number of past trade rates to keep track of to money prevent loss
NUM_TRADES = 19 # Number of trades that display on the trade currency page
# Initializing requests.Session for frozen application
os.environ["REQUESTS_CA_BUNDLE"] = find_data_file('cacert.pem')
session = FuturesSession(max_workers=15)
adapter = requests.adapters.HTTPAdapter(max_retries=Retry(total=20,connect=10,read=10,backoff_factor=.5))
session.mount("http://", adapter)
session.mount("https://", adapter)
# Storing variables since they can't be stored in QObject
rates = DottedDict(
    dict(
        last_tix_rate = 0,
        last_robux_rate = 0,
        current_tix_rate = 0,
        current_robux_rate = 0,
        past_tix_rates = deque(maxlen=DEQUE_SIZE),
        past_robux_rates = deque(maxlen=DEQUE_SIZE),
    )
)

class Trader(QtCore.QObject):
Ejemplo n.º 34
0
# Enable For Debugging:
logging.disable(logging.INFO)

DELAY = .05  # Second delay between calculating trades.
RGAP = .005  # Max gap before cancelling a robux split trade
TGAP = .0025  # Max gap before cancelling a tix split trade
TRADE_LAG_TIME = 1.25  # Estimate of how long it takes for Roblox to process our requests
RESET_TIME = 240  # Number of seconds the bot goes without trading before resetting last rates to be able to trade again (might result in loss)
DEQUE_SIZE = 15  # Max number of past trade rates to keep track of to money prevent loss
NUM_TRADES = 19  # Number of trades that display on the trade currency page
# Initializing requests.Session for frozen application
os.environ["REQUESTS_CA_BUNDLE"] = find_data_file('cacert.pem')
session = FuturesSession(max_workers=15)
adapter = requests.adapters.HTTPAdapter(
    max_retries=Retry(total=20, connect=10, read=10, backoff_factor=.5))
session.mount("http://", adapter)
session.mount("https://", adapter)
# Storing variables since they can't be stored in QObject
rates = DottedDict(
    dict(
        last_tix_rate=0,
        last_robux_rate=0,
        current_tix_rate=0,
        current_robux_rate=0,
        past_tix_rates=deque(maxlen=DEQUE_SIZE),
        past_robux_rates=deque(maxlen=DEQUE_SIZE),
    ))


class Trader(QtCore.QObject):
    def __init__(self, currency):
Ejemplo n.º 35
0
class ResourceSyncPuSH(object):
    """
    The base class for the publisher, hub and resource. Contains
    methods for reading config files, making http requests, error handling,
    etc.
    """
    def __init__(self):
        """
        Inititalizes the Futures-Requests session with the
        max number of workers and retires.
        """

        # max workers and retries should be configurable?
        self.session = FuturesSession(max_workers=10)
        adapter = HTTPAdapter(max_retries=3)
        self.session.mount("http://", adapter)
        self._start_response = None

        # config parameters
        self.config = {}
        self.config['log_mode'] = ""
        self.config['mimetypes'] = []
        self.config['trusted_publishers'] = []
        self.config['trusted_topics'] = []
        self.config['my_url'] = ""
        self.config['hub_url'] = ""
        self.config['topic_url'] = ""
        self.config['subscribers_file'] = ""
        self.config['server_path'] = ""

        # logging messages
        self.log_msg = {}
        self.log_msg['payload'] = ""
        self.log_msg['msg'] = []
        self.log_msg['link_header'] = ""
        self.log_msg['module'] = ""

    def get_config(self, classname=None):
        """
        Finds and reads the config file. Reads the appropriate config values
        for the classname provided. For eg: if the classname is hub, it will
        read from the [hub] section in the config file.
        """

        if not classname:
            classname = self.__class__.__name__.lower()

        self.log_msg['module'] = classname

        # NOTE: more paths can be added to look for the config files.
        # order of files matter, the config in the first file
        # will be overwritten by the values in the next file.
        cnf_file = []
        cnf_file.extend([
            os.path.join(os.path.dirname(__file__),
                         "../conf/resourcesync_push.ini"),
            "/etc/resourcesync_push.ini",
            "/etc/resourcesync_push/resourcesync_push.ini",
        ])

        # loading values from configuration file
        conf = ConfigParser.ConfigParser()
        conf.read(cnf_file)
        if not conf:
            raise IOError("Unable to read config file")

        if classname == "hub":
            self.get_hub_config(conf)
        elif classname == "publisher":
            self.get_publisher_config(conf)
        elif classname == "subscriber":
            try:
                self.config['my_url'] = conf.get("subscriber", "url")
            except (NoSectionError, NoOptionError):
                print("The url value for subscriber is required \
                      in the config file.")
                raise

        self.get_demo_config(conf)

    def get_demo_config(self, conf):
        """
        Reads the [demo_hub] section from the config file if the
        log mode is set to 'demo'.
        """
        try:
            self.config['log_mode'] = conf.get("general", "log_mode")
        except (NoSectionError, NoOptionError):
            pass

        if not self.config['log_mode'] == "demo":
            return

        try:
            self.config['demo_hub_url'] = conf.get("demo_mode", "hub_url")
        except (NoSectionError, NoOptionError):
            print("Demo log mode requires a hub_url in the \
                  [demo_mode] section")
            raise

        try:
            self.config['demo_topic_url'] = conf.get("demo_mode", "topic_url")
        except (NoSectionError, NoOptionError):
            print("Demo log mode requires a topic_url in the \
                  [demo_mode] section")
            raise
        return

    def get_hub_config(self, conf):
        """
        Reads the [hub] section from the config file.
        """

        try:
            self.config['mimetypes'] = conf.get("hub", "mimetypes")
        except (NoSectionError, NoOptionError):
            # reourcesync hub by default
            self.config['mimetypes'] = "application/xml"

        try:
            self.config['trusted_publishers'] = conf.get(
                "hub", "trusted_publishers")
        except (NoSectionError, NoOptionError):
            # will allow any publisher
            self.config['trusted_publishers'] = []

        try:
            self.config['trusted_topics'] = conf.get("hub", "trusted_topics")
        except (NoSectionError, NoOptionError):
            # will accept any topic
            self.config['trusted_topics'] = []

        try:
            self.config['my_url'] = conf.get("hub", "url")
        except (NoSectionError, NoOptionError):
            print("The url value for hub is required in the config file.")
            raise

        self.config['subscribers_file'] = os.path.join(
            os.path.dirname(__file__), "../db/subscriptions.pk")
        try:
            self.config['subscribers_file'] = conf.get("hub",
                                                       "subscribers_file")
        except (NoSectionError, NoOptionError):
            pass

        if not os.path.isfile(self.config['subscribers_file']):
            open(self.config['subscribers_file'], 'a').close()

        return

    def get_publisher_config(self, conf):
        """
        Reads the [publisher] section in the config file.
        """

        try:
            self.config['my_url'] = conf.get("publisher", "url")
        except (NoSectionError, NoOptionError):
            print("The url value for publisher is required \
                  in the config file.")
            raise

        try:
            self.config['server_path'] = conf.get("publisher", "server_path")
        except (NoSectionError, NoOptionError):
            pass

        try:
            self.config['hub_url'] = conf.get("publisher", "hub_url")
        except (NoSectionError, NoOptionError):
            print("The hub_url value for publisher is required \
                  in the config file.")
            raise

        try:
            self.config['topic_url'] = conf.get("publisher", "topic_url")
        except (NoSectionError, NoOptionError):
            print("The topic_url value for publisher is required \
                  in the config file.")
            raise

    def send(self, url, method='POST', data=None, callback=None, headers=None):
        """
        Performs http post and get requests. Uses futures-requests
        to make (threaded) async requests.
        """

        if method == 'POST':
            return self.session.post(url,
                                     data=data,
                                     background_callback=callback,
                                     headers=headers)
        elif method == 'GET':
            return self.session.get(url, headers=headers)
        elif method == 'HEAD':
            return self.session.head(url, headers=headers)
        else:
            return

    def respond(self, code=200, msg="OK", headers=None):
        """
        Sends the appropriate http status code with an
        error message.
        """

        print("HTTP %s: %s" % (code, msg))

        if not headers:
            headers = []
        if not str(code) == "204":
            headers.append(("Content-Type", "text/html"))

        code = str(code) + " " + HTTP_STATUS_CODE[code]

        self._start_response(code, headers)
        return [msg]

    @staticmethod
    def get_topic_hub_url(link_header):
        """
        Uses the parse_header_links method in requests to parse link
        headers and return the topic and hub urls.
        """

        links = parse_header_links(link_header)
        topic = ""
        hub_url = ""
        for link in links:
            if link.get('rel') == 'self':
                topic = link.get('url')
            elif link.get('rel') == 'hub':
                hub_url = link.get('url')
        return (topic, hub_url)

    def make_link_header(self, hub_url=None, topic_url=None):
        """
        Constructs the resourcesync link header.
        """

        if not hub_url and not topic_url:
            return self.respond(code=400,
                                msg="hub and topic urls are not set \
                                in config file.")
        link_header = []
        link_header.extend(["<", topic_url, ">;rel=", "self", ","])
        link_header.extend([" <", hub_url, ">;rel=", "hub"])
        return "".join(link_header)

    def log(self):
        """
        Log handler. Will send the log info as json to the
        demo hub if log_mode value is set to demo in the config file.
        """
        if self.config['log_mode'] == 'demo':
            headers = {}
            headers['Link'] = self.make_link_header(
                hub_url=self.config['demo_hub_url'],
                topic_url=self.config['demo_topic_url'])
            self.send(self.config['demo_hub_url'],
                      data=json.dumps(self.log_msg),
                      headers=headers)
        else:
            print(self.log_msg)
Ejemplo n.º 36
0
class HTTPDriver(BaseDriver):
  """HTTPDriver

  The :class:`HTTPDriver` class reads SBP messages from an HTTP
  service for a device and writes out to a stream. This driver is like
  a file-handle with read and writes over two separately HTTP
  connections, but can also be enabled and disabled by its consumer.

  Parameters
  ----------
  device_uid : uid
    Device unique id
  url : str
    HTTP endpoint
  retries : tuple
    Configure connect and read retry count. Defaults to
    (MAX_CONNECT_RETRIES, MAX_READ_RETRIES).
  timeout : tuple
    Configure connect and read timeouts. Defaults to
    (DEFAULT_CONNECT_TIMEOUT, DEFAULT_READ_TIMEOUT).

  """

  def __init__(self,
               device_uid=None,
               url="https://broker.staging.skylark.swiftnav.com",
               retries=DEFAULT_RETRIES,
               timeout=DEFAULT_TIMEOUT,):
    self._retry = Retry(connect=DEFAULT_RETRIES[0],
                        read=DEFAULT_RETRIES[1],
                        redirect=MAX_REDIRECTS,
                        status_forcelist=[500],
                        backoff_factor=DEFAULT_BACKOFF_FACTOR)
    self.url = url
    self.read_session = requests.Session()
    self.read_session.mount("http://",
                            HTTPAdapter(pool_connections=DEFAULT_POOLSIZE,
                                        pool_maxsize=DEFAULT_POOLSIZE,
                                        pool_block=DEFAULT_POOLBLOCK,
                                        max_retries=self._retry))
    self.read_session.mount("https://",
                            HTTPAdapter(pool_connections=DEFAULT_POOLSIZE,
                                        pool_maxsize=DEFAULT_POOLSIZE,
                                        pool_block=DEFAULT_POOLBLOCK,
                                        max_retries=self._retry))
    self.write_session = None
    self.device_uid = device_uid
    self.timeout = timeout
    self.read_response = None
    self.write_response = None
    self.source = None

  def flush(self):
    """File-flush wrapper (noop).

    """
    pass

  def close(self):
    """File-handle close wrapper (noop).

    """
    try:
      self.read_close()
      self.write_close()
    except:
      pass

  @property
  def write_ok(self):
    """
    Are we connected for writes?
    """
    # Note that self.write_response is either None or a Response
    # object, which cast to False for 4xx and 5xx HTTP codes.
    return bool(self.write_response)

  def connect_write(self, source, whitelist, device_uid=None, pragma=None):
    """Initialize a streaming write HTTP response. Manually connects the
    underlying file-handle. In the event of a network disconnection,
    use to manually reinitiate an HTTP session.

    Parameters
    ----------
    source : sbp.client.handler.Handler
      Iterable source of SBP messages.
    whitelist : [int]
      Whitelist of messages to write

    """
    header_device_uid = device_uid or self.device_uid
    headers = {'Device-Uid': header_device_uid, 'Content-Type': BROKER_SBP_TYPE, 'Pragma': pragma}
    if not pragma:
      del headers['Pragma']
    try:
      self.executor = ThreadPoolExecutor(max_workers=DEFAULT_POOLSIZE)
      self.write_session = FuturesSession(executor=self.executor)
      self.write_session.mount("http://",
                               HTTPAdapter(pool_connections=DEFAULT_POOLSIZE,
                                           pool_maxsize=DEFAULT_POOLSIZE,
                                           pool_block=DEFAULT_POOLBLOCK,
                                           max_retries=self._retry))
      self.write_session.mount("https://",
                               HTTPAdapter(pool_connections=DEFAULT_POOLSIZE,
                                           pool_maxsize=DEFAULT_POOLSIZE,
                                           pool_block=DEFAULT_POOLBLOCK,
                                           max_retries=self._retry))
      self.source = source.filter(whitelist)
      gen = (msg.pack() for msg, _ in self.source)
      self.write_session.put(self.url, data=gen, headers=headers)
      self.write_response = True
    except requests.exceptions.ConnectionError as err:
      msg = "Client connection error to %s with [PUT] headers %s: msg=%s" \
            % (self.url, headers, err.message)
      warnings.warn(msg)
    except requests.exceptions.ConnectTimeout as err:
      msg = "Client connection timeout to %s with [PUT] headers %s: msg=%s" \
            % (self.url, headers, err.message)
      warnings.warn(msg)
    except requests.exceptions.RetryError:
      msg = "Client retry error to %s with [PUT] headers %s: msg=%s" \
            % (self.url, headers, err.message)
      warnings.warn(msg)
    except requests.exceptions.ReadTimeout:
      msg = "Client read timeout to %s with [PUT] headers %s: msg=%s" \
            % (self.url, headers, err.message)
      warnings.warn(msg)
    return self.write_ok

  def write(self, data):
    """Write wrapper (noop). Actual stream is initiated by the write
    connection.

    Parameters
    ----------
    data : object
      Data to write.

    """
    pass

  def write_close(self):
    """File-handle close wrapper (noop).

    """
    try:
      self.write_session.close()
      self.executor.shutdown(wait=False)
      self.source.breakiter()
      self.source = None
      self.executor = None
      self.write_session = None
    except:
      pass

  @property
  def read_ok(self):
    """
    Are we connected for reads?
    """
    return bool(self.read_response)

  def connect_read(self, device_uid=None, pragma=None):
    """Initialize a streaming read/write HTTP response. Manually connects
    the underlying file-handle. In the event of a network
    disconnection, use to manually reinitiate an HTTP session.

    """
    header_device_uid = device_uid or self.device_uid
    headers = {'Device-Uid': header_device_uid, 'Accept': BROKER_SBP_TYPE, 'Pragma': pragma}
    if not pragma:
      del headers['Pragma']
    try:
      self.read_response = self.read_session.get(self.url,
                                                 stream=True,
                                                 headers=headers,
                                                 timeout=self.timeout)
    except requests.exceptions.ConnectionError as err:
      msg = "Client connection error to %s with [GET] headers %s: msg=%s" \
            % (self.url, headers, err.message)
      warnings.warn(msg)
    except requests.exceptions.ConnectTimeout as err:
      msg = "Client connection timeout to %s with [GET] headers %s: msg=%s" \
            % (self.url, headers, err.message)
      warnings.warn(msg)
    except requests.exceptions.RetryError:
      msg = "Client retry error to %s with [GET] headers %s: msg=%s" \
            % (self.url, headers, err.message)
      warnings.warn(msg)
    except requests.exceptions.ReadTimeout:
      msg = "Client read timeout to %s with [GET] headers %s: msg=%s" \
            % (self.url, headers, err.message)
      warnings.warn(msg)
    return self.read_ok

  def read(self, size):
    """Read wrapper. If the client connection is closed or some other
    exception is thrown, raises an IOError.

    Parameters
    ----------
    size : int
      Size to read (in bytes).

    Returns
    ----------
    bytearray, or None

    """
    if self.read_response is None or not self.device_uid:
      raise ValueError("Invalid/insufficient HTTP request parameters!")
    elif not self.read_ok or self.read_response.raw.closed:
      raise IOError("HTTP read closed?!")
    try:
      return self.read_response.raw.read(size)
    except:
      raise IOError("HTTP read error!")

  def read_close(self):
    """File-handle close wrapper (noop).

    """
    try:
      self.read_response.close()
      self.read_response = None
    except:
      pass
Ejemplo n.º 37
0
import us
import logging
import requests
import datetime
import lxml.html
import itertools
import urllib.parse as urlparse

from nameparser import HumanName
from requests.adapters import HTTPAdapter
from requests_futures.sessions import FuturesSession
from concurrent.futures import as_completed

session = FuturesSession(max_workers=10)
session.mount('https://classic.nga.org', HTTPAdapter(max_retries=15))

KNOWN_EXCEPTIONS = {
    'https://classic.nga.org/cms/sam-brownback':
        [{'start': '2011-01-10', 'end': '2015-01-12'},
        {'start': '2015-01-12', 'end': '2018-01-31'}]
}

def parse_date(text):
    if not text: return None
    return datetime.datetime.strptime(text, '%b %d, %Y').date().isoformat()

def parse_term(text):
    start, end = [parse_date(x.strip()) for x in text.split('-')]
    return {'start': start, 'end': end}
Ejemplo n.º 38
0
class CallHub:
    API_LIMIT = {
        "GENERAL": {"calls": 13, "period": 1},
        "BULK_CREATE": {"calls": 1, "period": 70},
    }

    def __init__(self, api_domain, api_key=None, rate_limit=API_LIMIT):
        """
        Instantiates a new CallHub instance
        >>> callhub = CallHub("https://api-na1.callhub.io")
        With built-in rate limiting disabled:
        >>> callhub = CallHub(rate_limit=False)
        Args:
            api_domain (``str``): Domain to access API (eg: api.callhub.io, api-na1.callhub.io), this varies by account
        Keyword Args:
            api_key (``str``, optional): Optional API key. If not provided,
                it will attempt to use ``os.environ['CALLHUB_API_KEY']``
            rate_limit (``dict``, optional): Enabled by default with settings that respect callhub's API limits.
                Setting this to false disables ratelimiting, or you can set your own limits by following the example
                below. Please don't abuse! :)
                >>> callhub = CallHub(rate_limit={"GENERAL": {"calls": 13, "period": 1},
                >>>                               "BULK_CREATE": {"calls": 1, "period": 70}})
                - Default limits bulk_create to 1 per 70 seconds (CallHub states their limit is every 60s but in
                  practice a delay of 60s exactly can trip their rate limiter anyways)
                - Default limits all other API requests to 13 per second (CallHub support states their limit is 20/s but
                  this plays it on the safe side, because other rate limiters seem a little sensitive)
        """
        self.session = FuturesSession(max_workers=43)

        # Attempt 3 retries for failed connections
        adapter = requests.adapters.HTTPAdapter(max_retries=3)
        self.session.mount('https://', adapter)
        self.session.mount('http://', adapter)

        # Truncate final '/' off of API domain if it was provided
        if api_domain[-1] == "/":
            self.api_domain = api_domain[:-1]
        else:
            self.api_domain = api_domain

        if rate_limit:
            # Apply general rate limit to self.session.get
            rate_limited_get = sleep_and_retry(limits(**rate_limit["GENERAL"])(FuturesSession.get))
            self.session.get = types.MethodType(rate_limited_get, self.session)
            
            # Apply general rate limit to self.session.post
            rate_limited_post = sleep_and_retry(limits(**rate_limit["GENERAL"])(FuturesSession.post))
            self.session.post = types.MethodType(rate_limited_post, self.session)
            
            # Apply bulk rate limit to self.bulk_create
            self.bulk_create = sleep_and_retry(limits(**rate_limit["BULK_CREATE"])(self.bulk_create))

        self.session.auth = CallHubAuth(api_key=api_key)

        # validate_api_key returns administrator email on success
        self.admin_email = self.validate_api_key()

        # cache for do-not-contact number/list to id mapping
        self.dnc_cache = {}

    def __repr__(self):
        return "<CallHub admin: {}>".format(self.admin_email)

    def _collect_fields(self, contacts):
        """ Internal Function to get all fields used in a list of contacts """
        fields = set()
        for contact in contacts:
            for key in contact:
                fields.add(key)
        return fields

    def _assert_fields_exist(self, contacts):
        """
        Internal function to check if fields in a list of contacts exist in CallHub account
        If fields do not exist, raises LookupError.
        """
        # Note: CallHub fields are implemented funkily. They can contain capitalization but "CUSTOM_FIELD"
        # and "custom_field" cannot exist together in the same account. For that reason, for the purposes of API work,
        # fields are treated as case insensitive despite capitalization being allowed. Attempting to upload a contact
        # with "CUSTOM_FIELD" will match to "custom_field" in a CallHub account.
        fields_in_contacts = self._collect_fields(contacts)
        fields_in_callhub = self.fields()

        # Ensure case insensitivity and convert to set
        fields_in_contact = set([field.lower() for field in fields_in_contacts])
        fields_in_callhub = set([field.lower() for field in fields_in_callhub.keys()])

        if fields_in_contact.issubset(fields_in_callhub):
            return True
        else:
            raise LookupError("Attempted to upload contact (s) that contain fields that haven't been "
                              "created in CallHub. Fields present in upload: {} Fields present in "
                              "account: {}".format(fields_in_contact, fields_in_callhub))

    def validate_api_key(self):
        """
        Returns admin email address if API key is valid. In rare cases, may be unable to find admin email address, and
        returns a warning in that case. If API key invalid, raises ValueError. If the CallHub API returns unexpected
        information, raises RunTimeError.
        Returns:
            username (``str``): Email of administrator account
        """
        response = self.session.get("{}/v1/agents/".format(self.api_domain)).result()
        if response.json().get("detail") in ['User inactive or deleted.', 'Invalid token.']:
            raise ValueError("Bad API Key")
        elif "count" in response.json():
            if response.json()["count"]:
                return response.json()["results"][0]["owner"][0]["username"]
            else:
                return "Cannot deduce admin account. No agent accounts (not even the default account) exist."
        else:
            raise RuntimeError("CallHub API is not returning expected values, but your api_key is fine. Their API "
                               "specifies that https://callhub-api-domain/v1/agents returns a 'count' field, but this was "
                               "not returned. Please file an issue on GitHub for this project, if an issue for this not "
                               "already exist.")

    def agent_leaderboard(self, start, end):
        params = {"start_date": start, "end_date": end}
        response = self.session.get("{}/v1/analytics/agent-leaderboard/".format(self.api_domain), params=params).result()
        return response.json().get("plot_data")

    def fields(self):
        """
        Returns a list of fields configured in the CallHub account and their ids
        Returns:
            fields (``dict``): dictionary of fields and ids
            >>> {"first name": 0, "last name": 1}
        """
        response = self.session.get('{}/v1/contacts/fields/'.format(self.api_domain)).result()
        return {field['name']: field["id"] for field in response.json()["results"]}

    def bulk_create(self, phonebook_id, contacts, country_iso):
        """
        Leverages CallHub's bulk-upload feature to create many contacts. Supports custom fields.
        >>> contacts = [{'first name': 'Sumiya', 'phone number':'5555555555', 'mobile number': '5555555555'},
        >>>             {'first name': 'Joe', 'phone number':'5555555555', 'mobile number':'5555555555'}]
        >>> callhub.bulk_create(885473, contacts, 'CA')
        Args:
            phonebook_id(``int``): ID of phonebank to insert contacts into.
            contacts(``list``): Contacts to insert (phone number is a MANDATORY field in all contacts)
            country_iso(``str``): ISO 3166 two-char country code,
                see https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
        """
        # Step 1. Get all fields from CallHub account
        # Step 2. Check if all fields provided for contacts exist in CallHub account
        # Step 3. Turn list of dictionaries into a CSV file and create a column mapping for the file
        # Step 4. Upload the CSV and column mapping to CallHub

        contacts = [CaseInsensitiveDict(contact) for contact in contacts]

        if self._assert_fields_exist(contacts):
            # Create CSV file in memory in a way that pleases CallHub and generate column mapping
            csv_file, mapping = csv_and_mapping_create(contacts, self.fields())

            # Upload CSV
            data = {
                'phonebook_id': phonebook_id,
                'country_choice': 'custom',
                'country_ISO': country_iso,
                'mapping': mapping
            }

            response = self.session.post('{}/v1/contacts/bulk_create/'.format(self.api_domain), data=data,
                                         files={'contacts_csv': csv_file}).result()
            if "Import in progress" in response.json().get("message", ""):
                return True
            elif 'Request was throttled' in response.json().get("detail", ""):
                raise RuntimeError("Bulk_create request was throttled because rate limit was exceeded.",
                                   response.json())
            else:
                raise RuntimeError("CallHub did not report that import was successful: ", response.json())

    def create_contact(self, contact):
        """
        Creates single contact. Supports custom fields.
        >>> contact = {'first name': 'Sumiya', 'phone number':'5555555555', 'mobile number': '5555555555'}
        >>> callhub.create_contact(contact)
        Args:
            contacts(``dict``): Contacts to insert
            Note that country_code and phone_number are MANDATORY
        Returns:
            (``str``): ID of created contact or None if contact not created
        """
        if self._assert_fields_exist([contact]):
            url = "{}/v1/contacts/".format(self.api_domain)
            responses, errors = self._handle_requests([{
                "func": self.session.post,
                "func_params": {"url": url, "data": {"name": contact}},
                "expected_status": 201
            }])
            if errors:
                raise RuntimeError(errors)
            return responses[0].json().get("id")

    def get_contacts(self, limit):
        """
        Gets all contacts.
        Args:
            limit (``int``): Limit of number of contacts to get. If limit not provided, will
                return first 100 contacts.
        Returns:
            contact_list (``list``): List of contacts, where each contact is a dict of key value pairs.
        """
        contacts_url = "{}/v1/contacts/".format(self.api_domain)
        return self._get_paged_data(contacts_url, limit)

    def _get_paged_data(self, url, limit=float(math.inf)):
        """
        Internal function. Leverages _bulk_requests to aggregate paged data and return it quickly.
        Args:
            url (``str``): API endpoint to get paged data from.
        Keyword Args:
            limit (``float or int``): Limit of paged data to get. Default is infinity.
        Returns:
            paged_data (``list``) All of the paged data as a signle list of dicts, where each dict contains key value
                pairs that represent each individual item in a page.
        """
        first_page = self.session.get(url).result()
        if first_page.status_code != 200:
            raise RuntimeError("Status code {} when making request to: "
                                "{}, expected 200. Details: {})".format(first_page.status_code,
                                                                        url,
                                                                        first_page.text))
        first_page = first_page.json()

        # Handle either limit of 0 or no results
        if first_page["count"] == 0 or limit == 0:
            return []

        # Set limit to the smallest of either the count or the limit
        limit = min(first_page["count"], limit)

        # Calculate number of pages
        page_size = len(first_page["results"])
        num_pages = math.ceil(limit/page_size)

        requests = []
        for i in range(1, num_pages+1):
            requests.append({"func": self.session.get,
                             "func_params": {"url": url, "params": {"page": i}},
                             "expected_status": 200})
        responses_list, errors = self._handle_requests(requests)
        if errors:
            raise RuntimeError(errors)

        # Turn list of responses into aggregated data from all pages
        paged_data = []
        for response in responses_list:
            paged_data += response.json()["results"]
        paged_data = paged_data[:limit]
        return paged_data

    def _handle_requests(self, requests_list, aggregate_json_value=None, retry=False, current_retry_count=0):
        """
        Internal function. Executes a list of requests in batches, asynchronously. Allows fast execution of many reqs.
        >>> requests_list = [{"func": session.get,
        >>>                   "func_params": {"url":"https://callhub-api-domain/v1/contacts/", "params":{"page":"1"}}}
        >>>                   "expected_status": 200]
        >>> _bulk_request(requests_list)
        Args:
            requests_list (``list``): List of dicts that each include a request function, its parameters, and an
                optional expected status. These will be executed in batches.
        """
        # Send bulk requests in batches of at most 500
        batch_size = 500
        requests_awaiting_response = []
        responses = []
        errors = []
        for i, request in enumerate(requests_list):
            # Execute request asynchronously
            requests_awaiting_response.append(request["func"](**request["func_params"]))
            # Every time we execute batch_size requests OR we have made our last request, wait for all requests
            # to have received responses before continuing. This batching prevents us from having tens or hundreds of
            # thousands of pending requests with CallHub
            if i % batch_size == 0 or i == (len(requests_list)-1):
                for req_awaiting_response in requests_awaiting_response:
                    response = req_awaiting_response.result()
                    try:
                        if requests_list[i]["expected_status"] and response.status_code != int(requests_list[i]["expected_status"]):
                            raise RuntimeError("Status code {} when making request to: "
                                               "{}, expected {}. Details: {})".format(response.status_code,
                                                                         requests_list[i]["func_params"]["url"],
                                                                         requests_list[i]["expected_status"],
                                                                         response.text))
                        responses.append(response)

                    except RuntimeError as api_except:
                        errors.append((requests_list[i], api_except))

                requests_awaiting_response = []

        if errors and retry and current_retry_count < 1:
            failed_requests = [error[0] for error in errors]
            new_responses, errors = self._handle_requests(failed_requests, retry=True, current_retry_count=current_retry_count+1)
            responses = responses + new_responses

        return responses, errors

    def get_dnc_lists(self):
        """
        Returns ids and names of all do-not-contact lists
        Returns:
            dnc_lists (``dict``): Dictionary of dnc lists where the key is the id and the value is the name
        """
        dnc_lists = self._get_paged_data("{}/v1/dnc_lists/".format(self.api_domain))
        return {dnc_list['url'].split("/")[-2]: dnc_list["name"] for dnc_list in dnc_lists}

    def pretty_format_dnc_data(self, dnc_contacts):
        dnc_lists = self.get_dnc_lists()
        dnc_phones = defaultdict(list)
        for dnc_contact in dnc_contacts:
            phone = dnc_contact["phone_number"]
            dnc_list_id = dnc_contact["dnc"].split("/")[-2]
            dnc_contact_id = dnc_contact["url"].split("/")[-2]
            dnc_list = {"list_id": dnc_list_id, "name": dnc_lists[dnc_list_id], "dnc_contact_id": dnc_contact_id}
            dnc_phones[phone].append(dnc_list)
        return dict(dnc_phones)

    def get_dnc_phones(self):
        """
        Returns all phone numbers in all DNC lists
        Returns:
            dnc_phones (``dict``): Dictionary of all phone numbers in all dnc lists. A phone number may be associated
                with multiple dnc lists. Note that each phone number on each dnc list has a unique dnc_contact_id that
                has NOTHING to do with the contact_id of the actual contacts related to those phone numbers. Schema:
                >>> dnc_contacts = {"16135554432": [
                >>>                                    {"list_id": 5543, "name": "Default DNC List", "dnc_contact_id": 1234}
                >>>                                    {"list_id": 8794, "name": "SMS Campaign", "dnc_contact_id": 4567}
                >>>                                 ]}}
        """
        dnc_contacts = self._get_paged_data("{}/v1/dnc_contacts/".format(self.api_domain))
        return self.pretty_format_dnc_data(dnc_contacts)


    def add_dnc(self, phone_numbers, dnc_list_id):
        """
        Adds phone numbers to a DNC list of choice
        Args:
            phone_numbers (``list``): Phone numbers to add to DNC
            dnc_list (``str``): DNC list id to add contact(s) to
        Returns:
            results (``dict``): Dict of phone numbers and DNC lists added to
            errors (``list``): List of errors and failures
        """
        if not isinstance(phone_numbers, list):
            raise TypeError("add_dnc expects a list of phone numbers. If you intend to only add one number to the "
                            "do-not-contact list, add a list of length 1")

        url = "{}/v1/dnc_contacts/".format(self.api_domain)
        requests = []
        for number in phone_numbers:
            data = {"dnc": "{}/v1/dnc_lists/{}/".format(self.api_domain, dnc_list_id), 'phone_number': number}
            requests.append({"func": self.session.post,
                             "func_params": {"url": url, "data":data},
                             "expected_status": 201})

        responses, errors = self._handle_requests(requests, retry=True)
        dnc_records = [request.json() for request in responses]
        results = self.pretty_format_dnc_data(dnc_records)
        return results, errors


    def remove_dnc(self, numbers, dnc_list=None):
        """
        Removes phone numbers from do-not-contact list. CallHub's api does not support this, instead it only supports
        removing phone numbers by their internal do not contact ID. I want to abstract away from that, but it requires
        building a table of phone numbers mapping to their dnc ids, which can slow this function down especially when
        using an account with many numbers already marked do-not-contact. This function takes advantage of caching to
        get around this, and a CallHub instance will have a cache of numbers and dnc lists -> dnc_contact ids available
        for use. This cache is refreshed if a number is requested to be removed from the DNC list that does not appear
        in the cache.
        Args:
            phone_numbers (``list``): Phone numbers to remove from DNC
        Keyword Args:
            dnc_list (``str``, optional): DNC list id to remove numbers from. If not specified, will remove number from
                all dnc lists.
        Returns:
            errors (``list``): List of errors
        """
        # Check if we need to refresh DNC phone numbers cache
        if not set(numbers).issubset(set(self.dnc_cache.keys())):
            self.dnc_cache = self.get_dnc_phones()

        dnc_ids_to_purge = []
        for number in numbers:
            for dnc_entry in self.dnc_cache[number]:
                if dnc_list and (dnc_entry["list_id"] == dnc_list):
                    dnc_ids_to_purge.append(dnc_entry["dnc_contact_id"])
                elif not dnc_list:
                    dnc_ids_to_purge.append(dnc_entry["dnc_contact_id"])

        url = "{}/v1/dnc_contacts/{}/"
        requests = []
        for dnc_id in dnc_ids_to_purge:
            requests.append({"func": self.session.delete,
                             "func_params": {"url": url.format(self.api_domain, dnc_id)},
                             "expected_status": 204})
        responses, errors = self._handle_requests(requests)
        return errors

    def create_dnc_list(self, name):
        """
        Creates a new DNC list
        Args:
            name (``str``): Name to assign to DNC list
        Returns:
            id (``str``): ID of created dnc list
        """
        url = "{}/v1/dnc_lists/".format(self.api_domain)
        responses, errors = self._handle_requests([{
            "func": self.session.post,
            "func_params": {"url": url, "data": {"name": name}},
            "expected_status": 201
        }])
        if errors:
            raise RuntimeError(errors)
        return responses[0].json()["url"].split("/")[-2]

    def remove_dnc_list(self, id):
        """
        Deletes an existing DNC list
        Args:
            id (``str``): ID of DNC list to delete
        """
        url = "{}/v1/dnc_lists/{}/"
        responses, errors = self._handle_requests([{
            "func": self.session.delete,
            "func_params": {"url": url.format(self.api_domain, id)},
            "expected_status": 204
        }])
        if errors:
            raise RuntimeError(errors)


    def get_campaigns(self):
        """
        Get call campaigns
        Returns:
            campaigns (``dict``): list of campaigns
        """
        url = "{}/v1/callcenter_campaigns/".format(self.api_domain)
        campaigns = self._get_paged_data(url)
        # Extract campaign id from url
        for i, campaign in enumerate(campaigns):
            id = campaign["url"].split("/")[-2]
            campaigns[i]["id"] = id
        return campaigns

    def create_phonebook(self, name, description=""):
        """
        Create a phonebook
        Args:
            name (``str``): Name of phonebook
        Keyword Args:
            description (``str``, optional): Description of phonebook
        Returns:
            id (``str``): id of phonebook
        """
        url = "{}/v1/phonebooks/".format(self.api_domain)
        responses, errors = self._handle_requests([{
            "func": self.session.post,
            "func_params": {"url": url, "data": {"name": name, "description": description}},
            "expected_status": 201
        }])
        if errors:
            raise RuntimeError(errors)
        id = responses[0].json()["url"].split("/")[-2]
        return id

    def create_webhook(self, target, event="cc.notes"):
        """
        Creates a webhook on a particular target
        Args:
            target (``str``): URL for CallHub to send webhook to
        Keyword Args:
            event (``str``, optional): Event which triggers webhook. Default: When an agent completes a call (cc.notes)
        Returns:
            id (``str``): id of created webhook
        """
        url = "{}/v1/webhooks/".format(self.api_domain)
        responses, errors = self._handle_requests([{
            "func": self.session.post,
            "func_params": {"url": url, "data": {"target": target, "event": event}},
            "expected_status": 201
        }])
        if errors:
            raise RuntimeError(errors)
        return responses[0].json()["id"]

    def get_webhooks(self):
        """
        Fetches webhooks created by a CallHub account
        Returns:
            webhooks (``dict``): list of webhooks
        """
        url = "{}/v1/webhooks/".format(self.api_domain)
        webhooks = self._get_paged_data(url)
        return webhooks

    def remove_webhook(self, id):
        """
        Deletes a webhook with a given id
        Args:
            id (``str``): id of webhook to delete
        """
        url = "{}/v1/webhooks/{}/".format(self.api_domain, id)
        responses, errors = self._handle_requests([{
            "func": self.session.delete,
            "func_params": {"url": url},
            "expected_status": 204
        }])
        if errors:
            raise RuntimeError(errors)

    def export_campaign(self, id):
        """
        Triggers an export from CallHub's campaign export API. Note that the returned download link only works in an
        authenticated USER session for the callhub account in question. There is no way to download call campaign
        results directly through the API, you can only trigger exports. Because of this, there is a very limited
        use case for this function.
        Args:
            id (``str``): id of campaign to export
        Returns:
            url (``str``): download link for campaign
        """
        # Step 1: Request export of campaign
        url = "{}/v1/power_campaign/{}/export/".format(self.api_domain, id)
        responses, errors = self._handle_requests([{
            "func": self.session.post,
            "func_params": {"url": url},
            "expected_status": 202
        }])
        if errors:
            raise RuntimeError(errors)
        polling_url = responses[0].json()["polling_url"]

        # Step 2: Continuously check if export is complete - 5 min maximum
        num_attempts_made = 0
        state = "PENDING"
        while state == "PENDING" or state == "PROGRESS":
            time.sleep(1)
            responses, errors = self._handle_requests([{
                "func": self.session.get,
                "func_params": {"url": polling_url},
                "expected_status": 200
            }])
            if errors:
                raise RuntimeError(errors)
            state = responses[0].json()["state"]

            num_attempts_made += 1
            if num_attempts_made == 300:
                state = "TIMEOUT"

        if state != "SUCCESS":
            raise RuntimeError("CallHub reported an error trying to export the campaign. State: {}. "
                               "Full Response: {}".format(state, responses[0].text))

        if responses[0].json()["data"]["code"] != 200:
            raise RuntimeError("CallHub reported an error trying to export the campaign. "
                               "Full Response: {}".format(responses[0].text))

        return responses[0].json()["data"]["url"]
Ejemplo n.º 39
0
import concurrent.futures
import pandas as pd
from datetime import datetime
from functools import reduce

from bs4 import BeautifulSoup, SoupStrainer
from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
from requests_futures.sessions import FuturesSession
from website.client import exceptions

logger = logging.getLogger(__name__)

session = FuturesSession(max_workers=6)
retries = Retry(total=3, backoff_factor=1.0, status_forcelist=[500, 502, 503, 504])
session.mount("http://", HTTPAdapter(max_retries=retries))

payloads = []


def set_credentials(username, password):
    try:
        logger.info("Setting credentials")
        r = session.get(
            "https://clairvia.texaschildrens.org/ClairviaWeb/Login.aspx"
        ).result()
        soup = BeautifulSoup(r.text, "lxml")

        login_form_inputs = [
            ((input.get("name"), input.get("value")))
            for input in soup.find_all("input")
Ejemplo n.º 40
0
            templateData = json.load(f)

    for definition in templateData:
        currentSeriesNames.append(definition["name"])
        currentSeries.append(ValueTemplate(definition["name"], definition["min"], definition["max"]))

else:
    currentSeries.append(ValueTemplate("temperature", 10, 40))
    currentSeries.append(ValueTemplate("pressure", 950, 1100))
    currentSeries.append(ValueTemplate("humidity", 20, 90))


m = Measurement(unide.process.local_now(), dimensions=currentSeriesNames)
a = requests.adapters.HTTPAdapter(max_retries=maxRetries)
session = FuturesSession()
session.mount('http://', a)
session.headers = {
    "Content-Type": "application/json",
    "Authorization": authHeader
}


def bg_cb(sess, resp):
    # parse the json storing the result on the response object
    resp.data = resp.json()
    print(resp)


while True:
    lastMeasurement = datetime.datetime.utcnow()
    newMetrics = dict()
Ejemplo n.º 41
0
class ResourceSyncPuSH(object):
    """
    The base class for the publisher, hub and resource. Contains
    methods for reading config files, making http requests, error handling,
    etc.
    """

    def __init__(self):
        """
        Inititalizes the Futures-Requests session with the
        max number of workers and retires.
        """

        # max workers and retries should be configurable?
        self.session = FuturesSession(max_workers=10)
        adapter = HTTPAdapter(max_retries=3)
        self.session.mount("http://", adapter)
        self._start_response = None

        # config parameters
        self.config = {}
        self.config['log_mode'] = ""
        self.config['mimetypes'] = []
        self.config['trusted_publishers'] = []
        self.config['trusted_topics'] = []
        self.config['my_url'] = ""
        self.config['hub_url'] = ""
        self.config['topic_url'] = ""
        self.config['subscribers_file'] = ""
        self.config['server_path'] = ""

        # logging messages
        self.log_msg = {}
        self.log_msg['payload'] = ""
        self.log_msg['msg'] = []
        self.log_msg['link_header'] = ""
        self.log_msg['module'] = ""

    def get_config(self, classname=None):
        """
        Finds and reads the config file. Reads the appropriate config values
        for the classname provided. For eg: if the classname is hub, it will
        read from the [hub] section in the config file.
        """

        if not classname:
            classname = self.__class__.__name__.lower()

        self.log_msg['module'] = classname

        # NOTE: more paths can be added to look for the config files.
        # order of files matter, the config in the first file
        # will be overwritten by the values in the next file.
        cnf_file = []
        cnf_file.extend([
            os.path.join(os.path.dirname(__file__),
                         "../conf/resourcesync_push.ini"),
            "/etc/resourcesync_push.ini",
            "/etc/resourcesync_push/resourcesync_push.ini",
        ])

        # loading values from configuration file
        conf = ConfigParser.ConfigParser()
        conf.read(cnf_file)
        if not conf:
            raise IOError("Unable to read config file")

        if classname == "hub":
            self.get_hub_config(conf)
        elif classname == "publisher":
            self.get_publisher_config(conf)
        elif classname == "subscriber":
            try:
                self.config['my_url'] = conf.get("subscriber", "url")
            except (NoSectionError, NoOptionError):
                print("The url value for subscriber is required \
                      in the config file.")
                raise

        self.get_demo_config(conf)

    def get_demo_config(self, conf):
        """
        Reads the [demo_hub] section from the config file if the
        log mode is set to 'demo'.
        """
        try:
            self.config['log_mode'] = conf.get("general", "log_mode")
        except (NoSectionError, NoOptionError):
            pass

        if not self.config['log_mode'] == "demo":
            return

        try:
            self.config['demo_hub_url'] = conf.get("demo_mode", "hub_url")
        except (NoSectionError, NoOptionError):
            print("Demo log mode requires a hub_url in the \
                  [demo_mode] section")
            raise

        try:
            self.config['demo_topic_url'] = conf.get("demo_mode", "topic_url")
        except (NoSectionError, NoOptionError):
            print("Demo log mode requires a topic_url in the \
                  [demo_mode] section")
            raise
        return

    def get_hub_config(self, conf):
        """
        Reads the [hub] section from the config file.
        """

        try:
            self.config['mimetypes'] = conf.get("hub", "mimetypes")
        except (NoSectionError, NoOptionError):
            # reourcesync hub by default
            self.config['mimetypes'] = "application/xml"

        try:
            self.config['trusted_publishers'] = conf.get("hub",
                                                         "trusted_publishers")
        except (NoSectionError, NoOptionError):
            # will allow any publisher
            self.config['trusted_publishers'] = []

        try:
            self.config['trusted_topics'] = conf.get("hub", "trusted_topics")
        except (NoSectionError, NoOptionError):
            # will accept any topic
            self.config['trusted_topics'] = []

        try:
            self.config['my_url'] = conf.get("hub", "url")
        except (NoSectionError, NoOptionError):
            print("The url value for hub is required in the config file.")
            raise

        self.config['subscribers_file'] = os.path.join(
            os.path.dirname(__file__),
            "../db/subscriptions.pk"
        )
        try:
            self.config['subscribers_file'] = conf.get("hub",
                                                       "subscribers_file")
        except (NoSectionError, NoOptionError):
            pass

        if not os.path.isfile(self.config['subscribers_file']):
            open(self.config['subscribers_file'], 'a').close()

        return

    def get_publisher_config(self, conf):
        """
        Reads the [publisher] section in the config file.
        """

        try:
            self.config['my_url'] = conf.get("publisher", "url")
        except (NoSectionError, NoOptionError):
            print("The url value for publisher is required \
                  in the config file.")
            raise

        try:
            self.config['server_path'] = conf.get("publisher", "server_path")
        except (NoSectionError, NoOptionError):
            pass

        try:
            self.config['hub_url'] = conf.get("publisher", "hub_url")
        except (NoSectionError, NoOptionError):
            print("The hub_url value for publisher is required \
                  in the config file.")
            raise

        try:
            self.config['topic_url'] = conf.get("publisher", "topic_url")
        except (NoSectionError, NoOptionError):
            print("The topic_url value for publisher is required \
                  in the config file.")
            raise

    def send(self, url, method='POST',
             data=None,
             callback=None,
             headers=None):
        """
        Performs http post and get requests. Uses futures-requests
        to make (threaded) async requests.
        """

        if method == 'POST':
            return self.session.post(url,
                                     data=data,
                                     background_callback=callback,
                                     headers=headers)
        elif method == 'GET':
            return self.session.get(url,
                                    headers=headers)
        elif method == 'HEAD':
            return self.session.head(url,
                                     headers=headers)
        else:
            return

    def respond(self, code=200, msg="OK", headers=None):
        """
        Sends the appropriate http status code with an
        error message.
        """

        print("HTTP %s: %s" % (code, msg))

        if not headers:
            headers = []
        if not str(code) == "204":
            headers.append(("Content-Type", "text/html"))

        code = str(code) + " " + HTTP_STATUS_CODE[code]

        self._start_response(code, headers)
        return [msg]

    @staticmethod
    def get_topic_hub_url(link_header):
        """
        Uses the parse_header_links method in requests to parse link
        headers and return the topic and hub urls.
        """

        links = parse_header_links(link_header)
        topic = ""
        hub_url = ""
        for link in links:
            if link.get('rel') == 'self':
                topic = link.get('url')
            elif link.get('rel') == 'hub':
                hub_url = link.get('url')
        return (topic, hub_url)

    def make_link_header(self, hub_url=None, topic_url=None):
        """
        Constructs the resourcesync link header.
        """

        if not hub_url and not topic_url:
            return self.respond(code=400,
                                msg="hub and topic urls are not set \
                                in config file.")
        link_header = []
        link_header.extend(["<", topic_url, ">;rel=", "self", ","])
        link_header.extend([" <", hub_url, ">;rel=", "hub"])
        return "".join(link_header)

    def log(self):
        """
        Log handler. Will send the log info as json to the
        demo hub if log_mode value is set to demo in the config file.
        """
        if self.config['log_mode'] == 'demo':
            headers = {}
            headers['Link'] = self.make_link_header(
                hub_url=self.config['demo_hub_url'],
                topic_url=self.config['demo_topic_url']
            )
            self.send(self.config['demo_hub_url'],
                      data=json.dumps(self.log_msg),
                      headers=headers)
        else:
            print(self.log_msg)
Ejemplo n.º 42
0
import os, shutil, time, concurrent.futures, ctypes
import requests
from requests_futures.sessions import FuturesSession
from bs4 import BeautifulSoup

os.system('title nHentai Downloader v1.0 By NekoChan')

session = FuturesSession()
session.mount('https://', requests.adapters.HTTPAdapter(max_retries=3))
kernel32 = ctypes.windll.kernel32

while True:
    kernel32.SetConsoleMode(
        kernel32.GetStdHandle(-10),
        (0x4 | 0x80 | 0x20 | 0x2 | 0x10 | 0x1 | 0x40 | 0x100))

    bookId = input(f'請輸入ID:')
    nBook = requests.get(f'https://nhentai.net/g/{bookId}/')

    if nBook.status_code == 200:
        kernel32.SetConsoleMode(
            kernel32.GetStdHandle(-10),
            (0x4 | 0x80 | 0x20 | 0x2 | 0x10 | 0x1 | 0x00 | 0x100))

        begin = time.time()
        html = BeautifulSoup(nBook.text, 'html.parser')
        coverImgHtml = html.select('#cover > a > img')[0].get('data-src')

        imgType = coverImgHtml.split('.')[-1]
        galleriesId = coverImgHtml.split('/')[-2]
        pages = int(
Ejemplo n.º 43
0
    def retrieve(self, catalog, *, dry_run=False, media_type=''):
        if not dry_run:
            distributions = Distribution.objects.filter(
                division_id=catalog.division_id, http_status_code__isnull=True)

            if media_type:
                distributions = distributions.filter(mediaType=media_type)

            if not distributions.exists():
                return

            # Collect the distribution-response pairs.
            def callback(distribution, response):
                results.append([distribution, response])

            # Create a closure.
            def factory(distribution):
                return lambda session, response: callback(distribution, response)

            # @see http://docs.python-requests.org/en/latest/api/#requests.adapters.HTTPAdapter
            # @see https://github.com/ross/requests-futures/blob/master/requests_futures/sessions.py
            session = FuturesSession()
            # Avoids "Connection pool is full, discarding connection" warnings.
            adapter_kwargs = {'pool_maxsize': 10}
            session.mount('https://',
                          requests.adapters.HTTPAdapter(**adapter_kwargs))
            session.mount('http://',
                          requests.adapters.HTTPAdapter(**adapter_kwargs))

            # @see https://djangosnippets.org/snippets/1949/
            pk = 0
            last_pk = distributions.order_by('-pk')[0].pk
            distributions = distributions.order_by('pk')
            while pk < last_pk:
                # @see https://github.com/ross/requests-futures/issues/18
                # @see https://github.com/ross/requests-futures/issues/5
                futures = []
                results = []

                # If an exception occurs, we lose progress on at most 100 requests.
                for distribution in distributions.filter(pk__gt=pk)[:100]:
                    pk = distribution.pk

                    # @see http://docs.python-requests.org/en/latest/user/advanced/#body-content-workflow
                    # @see http://stackoverflow.com/a/845595/244258
                    futures.append(
                        session.get(
                            quote(
                                distribution.accessURL,
                                safe="%/:=&?~#+!$,;'@()*[]"),
                            stream=True,
                            verify=False,
                            background_callback=factory(distribution)))

                for future in futures:
                    try:
                        future.result()
                    except (requests.exceptions.ConnectionError,
                            requests.exceptions.InvalidSchema,
                            requests.exceptions.InvalidURL,
                            requests.exceptions.MissingSchema,
                            requests.exceptions.ReadTimeout,
                            requests.exceptions.SSLError,
                            requests.exceptions.TooManyRedirects,
                            requests.packages.urllib3.exceptions.ProtocolError
                            ):
                        self.exception('')

                for distribution, response in results:
                    status_code = response.status_code
                    charset = ''

                    content_length = response.headers.get('content-length')
                    if content_length:
                        content_length = int(content_length)

                    # @see https://github.com/kennethreitz/requests/blob/b137472936cbe6a6acabab538c1d05ed4c7da638/requests/utils.py#L308
                    content_type = response.headers.get('content-type', '')
                    if content_type:
                        content_type, params = cgi.parse_header(content_type)
                        if 'charset' in params:
                            charset = params['charset'].strip("'\"")

                    distribution.http_headers = dict(response.headers)
                    distribution.http_status_code = status_code
                    distribution.http_content_length = content_length
                    distribution.http_content_type = content_type
                    distribution.http_charset = charset
                    distribution.save()

                    self.debug('{} {} {}'.format(
                        status_code, number_to_human_size(content_length),
                        content_type))

                    response.close()
        self.info('{} done'.format(catalog))