Beispiel #1
0
def loginToWiki(wiki):
    if wiki in WIKILOGINS:
        return WIKILOGINS[wiki]
    desktop = XSCRIPTCONTEXT.getDesktop()
    model = desktop.getCurrentComponent()
    sheet = config("Wikipedia")
    if not model.Sheets.hasByName(sheet):
        model.Sheets.insertNewByName(sheet, model.Sheets.getCount() + 1)
        sheet = model.Sheets.getByName(sheet)
        sheet.getCellRangeByName("A1").setString("URL")
        sheet.getCellRangeByName("A2").setString("Username")
        sheet.getCellRangeByName("A3").setString("Password")
    else:
        sheet = model.Sheets.getByName(sheet)
    urlColumn = getColumn("URL")
    userColumn = getColumn("Username")
    passwordColumn = getColumn("Password")
    index = 2
    levelCell = sheet.getCellRangeByName(urlColumn + str(index))
    while (levelCell.getType().value != "EMPTY"):
        if (levelCell.String == wiki):
            break
        index = index + 1
        levelCell = sheet.getCellRangeByName(urlColumn + str(index))
    username = sheet.getCellRangeByName(userColumn + str(index))
    password = sheet.getCellRangeByName(passwordColumn + str(index))
    session = CacheControl(requests.session())
    session.headers.update(USERAGENT)
    logintokens = session.get(url = wiki, params = {"action": "query", "meta": "tokens", "type": "login", "maxlag": 5, "format": "json"}).json()["query"]["tokens"]["logintoken"]
    session.post(wiki, data = { "action": "login", "lgname": username, "lgpassword": password, "lgtoken": logintokens})
    CSRF = session.get(url=wiki, params = { "action": "query", "meta": "tokens", "maxlag": 5, "format": "json"}).json()["query"]["tokens"]["csrftoken"]
    WIKILOGINS = {wiki: CSRF}
    return CSRF
Beispiel #2
0
class CacheControlHelper(object):
    def __init__(self):
        self.sess = CacheControl(requests.session(),
                                 heuristic=CustomHeuristic(days=30),
                                 cache=FileCache('.web_cache'))
        self.exceptions = requests.exceptions

    def get(self,
            url,
            params=None,
            timeout=120,
            cookies=None,
            headers={'Accept': 'application/json'}):
        if cookies:
            return self.sess.get(url,
                                 params=params,
                                 timeout=timeout,
                                 cookies=cookies,
                                 headers=headers)
        else:
            return self.sess.get(url,
                                 params=params,
                                 timeout=timeout,
                                 headers=headers)

    def post(self,
             url,
             data,
             timeout=120,
             headers={'Accept': 'application/json'}):
        return self.sess.post(url, data=data, timeout=timeout, headers=headers)
Beispiel #3
0
def Request(url,
            method="GET",
            headers=DEFAULT_HEADERS,
            additional_headers=None,
            data=None,
            session=None,
            allow_redirects=True,
            timeout=10,
            load_cookies=True,
            mobile=False):
    if additional_headers:
        headers.update(additional_headers)
    try:
        session = CacheControl(session)
    except Exception as e:
        pass
        # Error("Init web cache failed!!!", e)
    if mobile:
        headers["User-Agents"] = MOBILE_IOS_AGENTS
    xbmc.log("Requests headers: {0}".format(json.dumps(headers)), 1)
    if session:
        session.headers.update(headers)
        domain = re.search("https*\://(.+?)($|/)", url).group(1)
        if load_cookies:
            LoadCookies(session, cookies_name=domain)
        if data:
            response = session.post(url,
                                    data=data,
                                    allow_redirects=allow_redirects,
                                    timeout=timeout,
                                    verify=False)
        else:
            if method == "HEAD":
                response = session.head(url,
                                        allow_redirects=allow_redirects,
                                        timeout=timeout,
                                        verify=False)
            else:
                response = session.get(url,
                                       allow_redirects=allow_redirects,
                                       timeout=timeout,
                                       verify=False)
        response.encoding = "utf8"
        SaveCookies(session, cookies_name=domain)
        return response
    else:
        if method == "HEAD":
            return requests.head(url,
                                 headers=headers,
                                 allow_redirects=allow_redirects,
                                 timeout=timeout,
                                 verify=False)
        else:
            return requests.get(url,
                                headers=headers,
                                allow_redirects=allow_redirects,
                                timeout=timeout,
                                verify=False)
Beispiel #4
0
def overpass_query(query):
    """Query the overpass servers. This may block for extended periods of time, depending upon the query"""
    session = requests.session()
    session.headers.update({"User-Agent": USER_AGENT})
    cached_session = CacheControl(session)
    response = cached_session.post(
        "http://overpass-api.de/api/interpreter", data={"data": query}
    )
    wait_time = overpass_status()
    loop = 0
    while wait_time > 0:
        time.sleep(wait_time)
        wait_time = overpass_status()
        loop += 1
    while response.status_code == requests.codes.too_many_requests:
        time.sleep(10)
        response = cached_session.post(
            "http://overpass-api.de/api/interpreter", data={"data": query}
        )
    if response.status_code != requests.codes.ok:
        print("Bad request")
        print(response.text)
        print(response.status_code)
        raise ValueError("Bad Request: {}".format(query))

    xml = response.text

    if response.status_code != requests.codes.ok:
        raise ValueError(
            "We got a bad response code of {} for {} which resulted in:\r\n{}".format(
                response.status_code, query, xml
            )
        )
    content_type = response.headers.get("content-type")
    if content_type == "application/osm3s+xml":
        return ET.ElementTree(ElementTree.fromstring(xml))
    elif content_type == "application/json":
        return response.json()
    else:
        raise ValueError(
            "Unexpected content type ({}) from the query: {}".format(
                content_type, query
            )
        )
Beispiel #5
0
def getTracks(area, bboxInformation):
    apis = getApis()
    session = requests.session()
    session.headers.update({"User-Agent": USER_AGENT})
    cached_session = CacheControl(session)
    for api in apis:
        if "data" in api:
            data = api["data"]
            if "bbTopLeft" in data:
                data["bbTopLeft"] = "{lat},{lon}".format(
                    lat=bboxInformation[3], lon=bboxInformation[0])
            if "bbBottomRight" in data:
                data["bbBottomRight"] = "{lat},{lon}".format(
                    lat=bboxInformation[1], lon=bboxInformation[2])
            response = cached_session.post(api["api"] + api["tracks"],
                                           data=data)
            tJson = response.json()
        else:
            turl = api["api"] + api["tracks"]
            params = api["params"]
            params["bbox"] = params["bbox"].format(
                minx=bboxInformation[0],
                miny=bboxInformation[1],
                maxx=bboxInformation[2],
                maxy=bboxInformation[3],
            )
            response = cached_session.get(turl, params=params)
            print(response.url)
            while response.status_code != requests.codes.ok:
                time.sleep(1)
                response = cached_session.get(turl, params=params)
            try:
                tJson = response.json()
            except json.decoder.JSONDecodeError as e:
                print(response.url)
                print(response.text)
                raise e
            while "next" in response.links:
                next_url = response.links["next"]["url"]
                response = cached_session.get(next_url)
                while response.status_code != requests.codes.ok:
                    time.sleep(1)
                    response = cached_session.get(next_url)
                try:
                    tJson["features"] = (tJson["features"] +
                                         response.json()["features"])
                except json.decoder.JSONDecodeError as e:
                    print(response.url)
                    print(response.text)
                    raise e
        if response.status_code != requests.codes.ok:
            raise ValueError("{} gave us a status code of {}".format(
                response.url, response.status_code))
        if api["name"] == "openstreetcam":
            tJson = convertJson(tJson)
        save(area, api["name"], tJson)
Beispiel #6
0
def getURL(url,
           post_data=None,
           params=None,
           headers={},
           timeout=30,
           session=None,
           json=False):
    """
    Returns a byte-string retrieved from the url provider.
    """

    # request session
    cache_dir = sickbeard.CACHE_DIR or _getTempDir()
    session = CacheControl(sess=session,
                           cache=caches.FileCache(
                               os.path.join(cache_dir, 'sessions')))

    # request session headers
    session.headers.update({
        'User-Agent': USER_AGENT,
        'Accept-Encoding': 'gzip,deflate'
    })
    session.headers.update(headers)

    # request session ssl verify
    session.verify = False

    # request session paramaters
    session.params = params

    try:
        # request session proxies
        if sickbeard.PROXY_SETTING:
            logger.log("Using proxy for url: " + url, logger.DEBUG)
            session.proxies = {
                "http": sickbeard.PROXY_SETTING,
                "https": sickbeard.PROXY_SETTING,
            }

        # decide if we get or post data to server
        if post_data:
            resp = session.post(url, data=post_data, timeout=timeout)
        else:
            resp = session.get(url, timeout=timeout)

        if not resp.ok:
            logger.log(
                u"Requested url " + url + " returned status code is " +
                str(resp.status_code) + ': ' +
                clients.http_error_code[resp.status_code], logger.DEBUG)
            return

    except requests.exceptions.HTTPError, e:
        logger.log(u"HTTP error " + str(e.errno) + " while loading URL " + url,
                   logger.WARNING)
        return
Beispiel #7
0
def getURL(url, post_data=None, params=None, headers=None, timeout=30, session=None, json=False):
    """
    Returns a byte-string retrieved from the url provider.
    """

    # request session
    cache_dir = sickbeard.CACHE_DIR or _getTempDir()
    session = CacheControl(sess=session, cache=caches.FileCache(os.path.join(cache_dir, "sessions")))

    # request session headers
    req_headers = {"User-Agent": USER_AGENT, "Accept-Encoding": "gzip,deflate"}
    if headers:
        req_headers.update(headers)
    session.headers.update(req_headers)

    # request session ssl verify
    session.verify = False

    # request session paramaters
    session.params = params

    try:
        # Remove double-slashes from url
        parsed = list(urlparse.urlparse(url))
        parsed[2] = re.sub("/{2,}", "/", parsed[2])  # replace two or more / with one
        url = urlparse.urlunparse(parsed)

        # request session proxies
        if sickbeard.PROXY_SETTING:
            logger.log("Using proxy for url: " + url, logger.DEBUG)
            session.proxies = {"http": sickbeard.PROXY_SETTING, "https": sickbeard.PROXY_SETTING}

        # decide if we get or post data to server
        if post_data:
            resp = session.post(url, data=post_data, timeout=timeout)
        else:
            resp = session.get(url, timeout=timeout)

        if not resp.ok:
            logger.log(
                u"Requested url "
                + url
                + " returned status code is "
                + str(resp.status_code)
                + ": "
                + clients.http_error_code[resp.status_code],
                logger.DEBUG,
            )
            return

    except requests.exceptions.HTTPError, e:
        logger.log(u"HTTP error " + str(e.errno) + " while loading URL " + url, logger.WARNING)
        return
Beispiel #8
0
    def send_answer(self, answer, key):
        s = requests.session()
        s = CacheControl(s, cache=FileCache(os.path.expanduser('~/.tst/cache')))

        url = "%s/%s/answers" % (self.url, key)
        data = data2json(answer).encode('utf-8')
        tokens = JsonFile(os.path.expanduser('~/.tst/tokens.json'))
        headers = {"Authorization": "Bearer %s" % tokens.get(self.name)}
        try:
            response = s.post(url, headers=headers, data=data, allow_redirects=True)
        except requests.ConnectionError:
            _assert(False, "Connection failed... check your internet connection (1)")

        return response
Beispiel #9
0
def Request(
	url,
	method="GET",
	headers=DEFAULT_HEADERS,
	additional_headers=None,
	data=None,
	session=None,
	allow_redirects=True,
	timeout=10,
	load_cookies=True,
	mobile=False
):
	if additional_headers:
		headers.update(additional_headers)
	try:
		session = CacheControl(session)
	except Exception as e:
		pass
		# Error("Init web cache failed!!!", e)
	if mobile:
		headers["User-Agents"] = MOBILE_IOS_AGENTS
	xbmc.log("Requests headers: {0}".format(json.dumps(headers)), 1)
	if session:
		session.headers.update(headers)
		domain = re.search("https*\://(.+?)($|/)", url).group(1)
		if load_cookies:
			LoadCookies(session, cookies_name=domain)
		if data:
			response = session.post(url, data=data, allow_redirects=allow_redirects, timeout=timeout, verify=False)
		else:
			if method == "HEAD":
				response = session.head(url, allow_redirects=allow_redirects, timeout=timeout, verify=False)
			else:
				response = session.get(url, allow_redirects=allow_redirects, timeout=timeout, verify=False)
		response.encoding = "utf8"
		SaveCookies(session, cookies_name=domain)
		return response
	else:
		if method == "HEAD":
			return requests.head(url, headers=headers, allow_redirects=allow_redirects, timeout=timeout, verify=False)
		else:
			return requests.get(url, headers=headers, allow_redirects=allow_redirects, timeout=timeout, verify=False)
Beispiel #10
0
def getURL(url, post_data=None, params=None, headers={}, timeout=30, session=None, json=False):
    """
    Returns a byte-string retrieved from the url provider.
    """

    # request session
    cache_dir = sickbeard.CACHE_DIR or _getTempDir()
    session = CacheControl(sess=session, cache=caches.FileCache(os.path.join(cache_dir, 'sessions')))

    # request session headers
    session.headers.update({'User-Agent': USER_AGENT, 'Accept-Encoding': 'gzip,deflate'})
    session.headers.update(headers)

    # request session ssl verify
    session.verify = False

    # request session paramaters
    session.params = params

    try:
        # request session proxies
        if sickbeard.PROXY_SETTING:
            logger.log("Using proxy for url: " + url, logger.DEBUG)
            session.proxies = {
                "http": sickbeard.PROXY_SETTING,
                "https": sickbeard.PROXY_SETTING,
            }

        # decide if we get or post data to server
        if post_data:
            resp = session.post(url, data=post_data, timeout=timeout)
        else:
            resp = session.get(url, timeout=timeout)

        if not resp.ok:
            logger.log(u"Requested url " + url + " returned status code is " + str(
                resp.status_code) + ': ' + clients.http_error_code[resp.status_code], logger.DEBUG)
            return

    except requests.exceptions.HTTPError, e:
        logger.log(u"HTTP error " + str(e.errno) + " while loading URL " + url, logger.WARNING)
        return
Beispiel #11
0
class TwistedEventConsumer(EventConsumer):
    def __init__(self, queue, sdk_key, config):
        self._queue = queue
        """ :type: queue.Queue """

        self._session = CacheControl(txrequests.Session())
        """ :type: txrequests.Session """

        self._sdk_key = sdk_key
        self._config = config
        """ :type: ldclient.twisted.TwistedConfig """

        self._looping_call = None
        """ :type: LoopingCall"""

    def start(self):
        self._looping_call = task.LoopingCall(self._consume)
        self._looping_call.start(5)

    def stop(self):
        self._looping_call.stop()

    def is_alive(self):
        return self._looping_call is not None and self._looping_call.running

    def flush(self):
        return self._consume()

    def _consume(self):
        items = []
        try:
            while True:
                items.append(self._queue.get_nowait())
        except Empty:
            pass

        if items:
            return self.send_batch(items)

    @defer.inlineCallbacks
    def send_batch(self, events):
        @defer.inlineCallbacks
        def do_send(should_retry):
            # noinspection PyBroadException
            try:
                if isinstance(events, dict):
                    body = [events]
                else:
                    body = events
                hdrs = _headers(self._sdk_key)
                r = yield self._session.post(
                    self._config.events_uri,
                    headers=hdrs,
                    timeout=(self._config.connect_timeout,
                             self._config.read_timeout),
                    data=json.dumps(body))
                r.raise_for_status()
            except ProtocolError as e:
                inner = e.args[1]
                if inner.errno == errno.ECONNRESET and should_retry:
                    log.warning(
                        'ProtocolError exception caught while sending events. Retrying.'
                    )
                    yield do_send(False)
                else:
                    log.exception(
                        'Unhandled exception in event consumer. Analytics events were not processed.'
                    )
            except:
                log.exception(
                    'Unhandled exception in event consumer. Analytics events were not processed.'
                )

        try:
            yield do_send(True)
        finally:
            for _ in events:
                self._queue.task_done()
Beispiel #12
0
class Statbotics:
    """
    Main Object for interfacing with the Statbotics API
    """
    def __init__(self):
        self.BASE_URL = "https://backend.statbotics.io"
        self.session = CacheControl(requests.Session())
        self.login(self.getToken())
        self.token = self.getToken()

    def getToken(self, retries=0):
        if retries > 2:
            raise UserWarning("Could not connect to Statbotics.io")
        self.session.get(self.BASE_URL + "/admin/")
        if "csrftoken" not in self.session.cookies:
            return self.getToken(retries + 1)
        return self.session.cookies["csrftoken"]

    def login(self, token):
        login_data = {
            "csrfmiddlewaretoken": token,
            "next": self.BASE_URL + "/admin/"
        }
        self.session.post(
            self.BASE_URL + "/admin/login/",
            data=login_data,
            headers=dict(Referer=self.BASE_URL),
        )

    def _filter(self, data, fields):
        if fields == ["all"]:
            return data

        for field in fields:
            if field not in data[0]:
                raise ValueError("Invalid field: " + str(field))

        out = []
        for entry in data:
            new_entry = {}
            for field in fields:
                new_entry[field] = entry[field]
            out.append(new_entry)
        return out

    def _get(self, url, fields, retry=0):
        resp = self.session.get(self.BASE_URL + url)
        if resp.status_code != 200:
            if retry < 2:
                return self._get(url, fields, retry=retry + 1)
            raise UserWarning("Invalid query: " + url)

        data = resp.json()
        if "results" in data:
            data = data["results"]

        if len(data) == 0:
            raise UserWarning("Invalid inputs, no data recieved for " + url)

        return self._filter(data, fields)

    def _negate(self, string):
        if len(string) == 0:
            return string
        if string[0] == "-":
            return string[1:]
        return "-" + string

    def getTeam(self, team, fields=["all"]):
        """
        Function to retrieve information on an individual team\n
        :param team: Team Number, integer\n
        :param fields: List of fields to return. The default is ["all"]\n
        :return: a dictionary with the team's number, location (country, state, district), and Elo statistics (Current Elo, Recent Elo, Mean Elo, Max Elo)\n
        """

        validate.checkType(team, "int", "team")
        validate.checkType(fields, "list", "fields")
        return self._get("/api/_teams?team=" + str(team), fields)[0]

    def getTeams(
        self,
        country=None,
        state=None,
        district=None,
        active=True,
        metric=None,
        limit=1000,
        offset=0,
        fields=["all"],
    ):
        """
        Function to retrieve information on multiple teams\n
        :param country: Restrict based on country (select countries included)\n
        :param state: US States and Canada provinces only. Can infer country.\n
        :param district: Use 2 or 3-letter key (ex: FIM, NE, etc)\n
        :param active: Restrict to active teams (played most recent season)\n
        :param metric: Order output. Default descending, add '-' for ascending. (Ex: "-elo", "team", etc)\n
        :param limit: Limits the output length to speed up queries. Max 10,000\n
        :param offset: Skips the first (offset) items when returning\n
        :param fields: List of fields to return. Default is ["all"]\n
        :return: A list of dictionaries, each dictionary including the team, location, and Elo statistics\n
        """

        url = "/api/_teams?"

        validate.checkType(metric, "str", "metric")
        validate.checkType(limit, "int", "limit")
        validate.checkType(offset, "int", "offset")
        validate.checkType(fields, "list", "fields")

        if limit > 10000:
            raise ValueError(
                "Please reduce 'limit', consider breaking into multiple smaller queries"
            )

        url += "limit=" + str(limit) + "&offset=" + str(offset)
        url += validate.getLocations(country, state, district)

        if active:
            url += "&active=1"

        if metric:
            if metric not in validate.getTeamMetrics():
                raise ValueError("Invalid metric")
            url += "&o=" + self._negate(metric)

        return self._get(url, fields)

    def getYear(self, year, fields=["all"]):
        """
        Function to retrieve information for a specific year\n
        :param year: Year, integer\n
        :param fields: List of fields to return. The default is ["all"]\n
        :return: a dictionary with the year, match prediction statistics, and RP prediction statistics\n
        """

        validate.checkType(year, "int", "year")
        validate.checkType(fields, "list", "fields")
        return self._get("/api/_years?year=" + str(year), fields)[0]

    def getYears(self, metric=None, limit=1000, offset=0, fields=["all"]):
        """
        Function to retrieve information on multiple years\n
        :param metric: Order output. Default descending, add '-' for ascending. (Ex: "elo_acc", "-opr_mse", etc)\n
        :param limit: Limits the output length to speed up queries. Max 10,000\n
        :param offset: Skips the first (offset) items when returning\n
        :param fields: List of fields to return. Default is ["all"]\n
        :return: A list of dictionaries, each dictionary including the year and match/RP prediction statistics\n
        """

        validate.checkType(metric, "str", "metric")
        validate.checkType(limit, "int", "limit")
        validate.checkType(offset, "int", "offset")
        validate.checkType(fields, "list", "fields")
        url = "/api/_years?limit=" + str(limit) + "&offset=" + str(offset)
        if metric:
            url += "&o=" + self._negate(metric)
        return self._get(url, fields)

    def getTeamYear(self, team, year, fields=["all"]):
        """
        Function to retrieve information for a specific team's performance in a specific year\n
        :param team: Team number, integer\n
        :param year: Year, integer\n
        :param fields: List of fields to return. The default is ["all"]\n
        :return: a dictionary with the team, year, and Elo/OPR statistics\n
        """

        validate.checkType(team, "int", "team")
        validate.checkType(year, "int", "year")
        validate.checkType(fields, "list", "fields")
        url = "/api/_team_years?team=" + str(team) + "&year=" + str(year)
        return self._get(url, fields)[0]

    def getTeamYears(
        self,
        team=None,
        year=None,
        country=None,
        state=None,
        district=None,
        metric=None,
        limit=1000,
        offset=0,
        fields=["all"],
    ):
        """
        Function to retrieve information on multiple (team, year) pairs\n
        :param team: Restrict based on a specific team number\n
        :param country: Restrict based on country (select countries included)\n
        :param state: US States and Canada provinces only. Can infer country.\n
        :param district: Use 2 or 3-letter key (ex: FIM, NE, etc)\n
        :param metric: Order output. Default descending, add '-' for ascending. (Ex: "elo_pre_champs", "-opr_auto", etc)\n
        :param limit: Limits the output length to speed up queries. Max 10,000\n
        :param offset: Skips the first (offset) items when returning\n
        :param fields: List of fields to return. Default is ["all"]\n
        :return: A list of dictionaries, each dictionary including the team, year, and OPR/Elo statistics\n
        """

        url = "/api/_team_years"

        validate.checkType(team, "int", "team")
        validate.checkType(year, "int", "year")
        validate.checkType(metric, "str", "metric")
        validate.checkType(limit, "int", "limit")
        validate.checkType(offset, "int", "offset")
        validate.checkType(fields, "list", "fields")

        if limit > 10000:
            raise ValueError(
                "Please reduce 'limit', consider breaking into multiple smaller queries"
            )

        url += "?limit=" + str(limit) + "&offset=" + str(offset)

        if team and year:
            raise UserWarning("Use getTeamYear() instead")
        if team and (country or state or district):
            raise UserWarning("Conflicting location input")

        if team:
            url += "&team=" + str(team)

        if year:
            url += "&year=" + str(year)

        url += validate.getLocations(country, state, district)

        if metric:
            if metric not in validate.getTeamYearMetrics():
                raise ValueError("Invalid metric")
            url += "&o=" + self._negate(metric)

        return self._get(url, fields)

    def getEvent(self, event, fields=["all"]):
        """
        Function to retrieve information for a specific event\n
        :param event: Event key, string (ex: "2019cur")\n
        :param fields: List of fields to return. The default is ["all"]\n
        :return: a dictionary with the event and Elo/OPR statistics\n
        """

        validate.checkType(event, "str", "event")
        validate.checkType(fields, "list", "fields")
        url = "/api/_events?key=" + event
        return self._get(url, fields)[0]

    def getEvents(
        self,
        year=None,
        country=None,
        state=None,
        district=None,
        type=None,
        week=None,
        metric=None,
        limit=1000,
        offset=0,
        fields=["all"],
    ):
        """
        Function to retrieve information on multiple events\n
        :param year: Restrict by specific year, integer\n
        :param country: Restrict based on country (select countries included)\n
        :param state: US States and Canada provinces only. Can infer country.\n
        :param district: Use 2 or 3-letter key (ex: FIM, NE, etc)\n
        :param type: 0=regional, 1=district, 2=district champ, 3=champs, 4=einstein\n
        :param week: Week of play, generally between 0 and 8\n
        :param metric: Order output. Default descending, add '-' for ascending. (Ex: "elo_pre_playoffs", "-opr_end", etc)\n
        :param limit: Limits the output length to speed up queries. Max 10,000\n
        :param offset: Skips the first (offset) items when returning\n
        :param fields: List of fields to return. Default is ["all"]\n
        :return: A list of dictionaries, each dictionary including the team, event and Elo/OPR statistics\n
        """

        url = "/api/_events"

        validate.checkType(year, "int", "year")
        validate.checkType(metric, "str", "metric")
        type = validate.getType(type)
        validate.checkType(week, "int", "week")
        validate.checkType(limit, "int", "limit")
        validate.checkType(offset, "int", "offset")
        validate.checkType(fields, "list", "fields")

        if limit > 10000:
            raise ValueError(
                "Please reduce 'limit', consider breaking into multiple smaller queries"
            )

        url += "?limit=" + str(limit) + "&offset=" + str(offset)

        if year:
            url += "&year=" + str(year)

        url += validate.getLocations(country, state, district)

        if type is not None:
            url += "&type=" + str(type)

        if week is not None:
            url += "&week=" + str(week)

        if metric:
            if metric not in validate.getEventMetrics():
                raise ValueError("Invalid metric")
            url += "&o=" + self._negate(metric)

        return self._get(url, fields)

    def getTeamEvent(self, team, event, fields=["all"]):
        """
        Function to retrieve information for a specific (team, event) pair\n
        :param team: Team number, integer\n
        :param event: Event key, string (ex: "2019cur")\n
        :param fields: List of fields to return. The default is ["all"]\n
        :return: a dictionary with the event and Elo/OPR statistics\n
        """

        validate.checkType(team, "int", "team")
        validate.checkType(event, "str", "event")
        validate.checkType(fields, "list", "fields")
        url = "/api/_team_events?team=" + str(team) + "&event=" + event
        return self._get(url, fields)[0]

    def getTeamEvents(
        self,
        team=None,
        year=None,
        event=None,
        country=None,
        state=None,
        district=None,
        type=None,
        week=None,
        metric=None,
        limit=1000,
        offset=0,
        fields=["all"],
    ):
        """
        Function to retrieve information on multiple (team, event) pairs\n
        :param team: Restrict by team number, integer\n
        :param year: Restrict by specific year, integer\n
        :param country: Restrict based on country (select countries included)\n
        :param state: US States and Canada provinces only. Can infer country.\n
        :param district: Use 2 or 3-letter key (ex: FIM, NE, etc)\n
        :param type: 0=regional, 1=district, 2=district champ, 3=champs, 4=einstein\n
        :param week: Week of play, generally between 0 and 8\n
        :param metric: Order output. Default descending, add '-' for ascending. (Ex: "elo_pre_playoffs", "-opr_end", etc)\n
        :param limit: Limits the output length to speed up queries. Max 10,000\n
        :param offset: Skips the first (offset) items when returning\n
        :param fields: List of fields to return. Default is ["all"]\n
        :return: A list of dictionaries, each dictionary including the team, event and Elo/OPR statistics\n
        """

        url = "/api/_team_events"

        validate.checkType(team, "int", "team")
        validate.checkType(event, "str", "event")
        type = validate.getType(type)
        validate.checkType(week, "int", "week")
        validate.checkType(metric, "str", "metric")
        validate.checkType(limit, "int", "limit")
        validate.checkType(offset, "int", "offset")
        validate.checkType(fields, "list", "fields")

        if limit > 10000:
            raise ValueError(
                "Please reduce 'limit', consider breaking into multiple smaller queries"
            )

        url += "?limit=" + str(limit) + "&offset=" + str(offset)

        if team and event:
            raise UserWarning("Use getTeamEvent() instead")
        if event and (year or type or week):
            raise UserWarning("Overconstrained query")
        if (team or event) and (country or state or district):
            raise UserWarning("Conflicting location input")

        if team:
            url += "&team=" + str(team)

        if year:
            url += "&year=" + str(year)

        if event:
            url += "&event=" + event

        url += validate.getLocations(country, state, district)

        if type is not None:
            url += "&type=" + str(type)

        if week is not None:
            url += "&week=" + str(week)

        if metric:
            if metric not in validate.getTeamEventMetrics():
                raise ValueError("Invalid metric")
            url += "&o=" + self._negate(metric)

        return self._get(url, fields)

    def getMatch(self, match, fields=["all"]):
        """
        Function to retrieve information for a specific match\n
        :param match: Match key, string (ex: "2019cur_qm1", "2019cmptx_f1m3")\n
        :param fields: List of fields to return. The default is ["all"]\n
        :return: a dictionary with the match, score breakdowns, and predictions\n
        """

        validate.checkType(match, "str", "match")
        validate.checkType(fields, "list", "fields")
        return self._get("/api/_matches?key=" + match, fields)[0]

    def getMatches(self,
                   year=None,
                   event=None,
                   elims=None,
                   limit=1000,
                   offset=0,
                   fields=["all"]):
        """
        Function to retrieve information on multiple matches\n
        :param year: Restrict by specific year, integer\n
        :param event: Restrict by specific event key, string\n
        :param elims: Restrict to only elimination matches, default False\n
        :param limit: Limits the output length to speed up queries. Max 10,000\n
        :param offset: Skips the first (offset) items when returning\n
        :param fields: List of fields to return. Default is ["all"]\n
        :return: A list of dictionaries, each dictionary including the match, score breakdowns, and predictions\n
        """

        url = "/api/_matches"

        validate.checkType(year, "int", "year")
        validate.checkType(event, "str", "event")
        validate.checkType(elims, "bool", "elims")
        validate.checkType(limit, "int", "limit")
        validate.checkType(offset, "int", "offset")
        validate.checkType(fields, "list", "fields")

        if limit > 10000:
            raise ValueError(
                "Please reduce 'limit', consider breaking into multiple smaller queries"
            )

        url += "?limit=" + str(limit) + "&offset=" + str(offset)

        if not event:
            raise UserWarning("Query too large, be more specific (event)")

        if year and event:
            raise UserWarning("Year input will be ignored")

        if year:
            url += "&year=" + str(year)

        if event:
            url += "&event=" + event

        if elims:
            url += "&playoff=1"

        url += "&o=time"
        return self._get(url, fields)

    def getTeamMatch(self, team, match, fields=["all"]):
        """
        Function to retrieve information for a specific (team, match) pair\n
        :param team: Team number, integer\n
        :param match: Match key, string (ex: "2019cur_qm1", "2019cmptx_f1m3")\n
        :param fields: List of fields to return. The default is ["all"]\n
        :return: a dictionary with the team, match, alliance, and then elo\n
        """

        validate.checkType(team, "int", "team")
        validate.checkType(match, "str", "match")
        validate.checkType(fields, "list", "fields")
        url = "/api/_team_matches?team=" + str(team) + "&match=" + str(match)
        return self._get(url, fields)[0]

    def getTeamMatches(
        self,
        team=None,
        year=None,
        event=None,
        match=None,
        elims=None,
        limit=1000,
        offset=0,
        fields=["all"],
    ):
        """
        Function to retrieve information on multiple (team, match) pairs\n
        :param team: Restrict by team number, integer\n
        :param year: Restrict by specific year, integer\n
        :param event: Restrict by specific event key, string\n
        :param elims: Restrict to only elimination matches, default False\n
        :param limit: Limits the output length to speed up queries. Max 10,000\n
        :param offset: Skips the first (offset) items when returning\n
        :param fields: List of fields to return. Default is ["all"]\n
        :return: A list of dictionaries, each dictionary including the team, match, alliance, and then elo\n
        """

        url = "/api/_team_matches"

        validate.checkType(team, "int", "team")
        validate.checkType(year, "int", "year")
        validate.checkType(event, "str", "event")
        validate.checkType(match, "str", "match")
        validate.checkType(elims, "bool", "elims")
        validate.checkType(limit, "int", "limit")
        validate.checkType(offset, "int", "offset")
        validate.checkType(fields, "list", "fields")

        if limit > 10000:
            raise ValueError(
                "Please reduce 'limit', consider breaking into multiple smaller queries"
            )

        url += "?limit=" + str(limit) + "&offset=" + str(offset)

        if not team and not event and not match:
            raise UserWarning(
                "Query too large, be more specific (team, event, or match)")

        if (year and event) or (year and match) or (event and match):
            raise UserWarning("Only specify one of (year, event, match)")

        if team:
            url += "&team=" + str(team)

        if year:
            url += "&year=" + str(year)

        if event:
            url += "&event=" + event

        if match:
            url += "&match=" + match

        if elims:
            url += "&playoff=1"

        url += "&o=time"
        return self._get(url, fields)

    def getEventSim(self, event, index=None, full=False, iterations=None):
        validate.checkType(event, "str", "event")
        validate.checkType(index, "int", "index")
        validate.checkType(full, "bool", "full")
        validate.checkType(iterations, "int", "iterations")

        url = "/api/event_sim/event/" + event

        if index:
            url += "/index/" + str(index)

        if full:
            url += "/full"
            if iterations:
                if iterations > 100:
                    raise ValueError("Iterations must be <= 100")
                url += "/iterations/" + str(iterations)
        else:
            url += "/simple"

        return self._get(url, fields=["all"])
Beispiel #13
0
# Cache a POST request
import datetime

import requests
from cachecontrol import CacheControl
from cachecontrol.caches import FileCache

sess = requests.session()
cached_sess = CacheControl(sess, cache=FileCache('.web_cache'))

for n in range(10):
    start_time = datetime.datetime.now()
    response = cached_sess.post("http://httpbin.org/post")
    delta_time = datetime.datetime.now() - start_time
    print("Time delta: ", delta_time)
    assert response.status_code == 200
Beispiel #14
0
    src_content = f.read()
submit_url = "https://atcoder.jp/contests/" + contest_id + "/submit?lang=en"
login_url = "https://atcoder.jp/login?continue=" + \
    urlparse.quote(submit_url, "")

session = CacheControl(requests.Session())

login_res = session.get(login_url)
login_html = BeautifulSoup(login_res.text, "html.parser")
csrf_token = login_html.find_all(attrs={"name": "csrf_token"})[0]["value"]
login_info = {
    "username": username,
    "password": password,
    "csrf_token": csrf_token
}
login_res = session.post(login_url, data=login_info)
if not login_res.ok:
    print("Fail login")
    print(login_res.headers)
    print(login_res.status_code)
    exit()
html_text = re.sub(r"\r\n|\r|\n", "\n", login_res.text)
submit_html = BeautifulSoup(html_text, "lxml")
csrf_token = submit_html.find_all(attrs={"name": "csrf_token"})[0]["value"]
select_task = submit_html.find("select", attrs={"id": "select-task"})

screen_task_name_candidates = list(select_task.find_all("option"))

screen_task_name = [
    opt["value"] for opt in screen_task_name_candidates
    if opt.get_text(strip=True).lower().startswith(probrem)
Beispiel #15
0
class GithubAPI:
    def __init__(self):
        self.s = CacheControl(requests.session())

        self.app_id = GITHUB_APP_ID
        self.jwt_auth = JWTAuth(GITHUB_APP_ID)

        self.oauth_client_id = GITHUB_OAUTH_CLIENT_ID
        self.oauth_client_secret = GITHUB_OAUTH_CLIENT_SECRET
        self.oauth_redirect_uri = GITHUB_OAUTH_REDIRECT_URI

    def post(self,
             url,
             *args,
             api=True,
             jwt_bearer=False,
             oauth_server_auth=None,
             access_token=None,
             **kwargs):
        headers = kwargs.pop('headers', {})
        auth = kwargs.pop('auth', None)
        data = kwargs.pop('data', None)
        json = kwargs.pop('json', None)
        if api:
            headers.update(GITHUB_API_ACCEPT)
        if jwt_bearer:
            auth = self.jwt_auth
        if access_token:
            headers.update({'Authorization': f'token {access_token}'})
        if oauth_server_auth and (data or json):
            (data or json)['client_id'] = GITHUB_OAUTH_CLIENT_ID
            (data or json)['client_secret'] = GITHUB_OAUTH_CLIENT_SECRET

        return self.s.post(url,
                           *args,
                           data=data,
                           json=json,
                           headers=headers,
                           auth=auth,
                           **kwargs)

    def get(self,
            url,
            *args,
            api=True,
            jwt_bearer=False,
            oauth_server_auth=None,
            access_token=None,
            **kwargs):
        headers = kwargs.pop('headers', {})
        auth = kwargs.pop('auth', None)
        data = kwargs.pop('data', None)
        json = kwargs.pop('json', None)
        if api:
            headers.update(GITHUB_API_ACCEPT)
        if jwt_bearer:
            auth = self.jwt_auth
        if access_token:
            headers.update({'Authorization': f'token {access_token}'})
        if oauth_server_auth and (data or json):
            (data or json)['client_id'] = GITHUB_OAUTH_CLIENT_ID
            (data or json)['client_secret'] = GITHUB_OAUTH_CLIENT_SECRET

        return self.s.get(url,
                          *args,
                          data=data,
                          json=json,
                          headers=headers,
                          auth=auth,
                          **kwargs)

    def get_paginated(self, key, url, *args, **kwargs):
        r = self.get(url, *args, **kwargs)
        r.raise_for_status()
        data = r.json()[key]
        while 'link' in r.links.keys():
            r = self.get(url, headers=r.request.headers)
            r.raise_for_status()
            data.extend(r.json()[key])
        return data

    def oauth_authorize_url(self, *args):
        payload = {
            'client_id':
            self.oauth_client_id,
            'redirect_uri':
            self.oauth_redirect_uri,
            'state':
            secure_encode_64((*args, secrets.token_bytes(10)), HMAC_SECRET)
        }
        # noinspection SpellCheckingInspection
        return f'https://github.com/login/oauth/authorize?{urlencode(payload)}'

    def get_oauth_access_token(self, code, state):
        payload = {
            'client_id': self.oauth_client_id,
            'client_secret': self.oauth_client_secret,
            'code': code,
            'redirect_uri': self.oauth_redirect_uri,
            'state': state
        }
        r = self.post('https://github.com/login/oauth/access_token',
                      data=payload,
                      api=False)

        r.raise_for_status()

        data = parse_qs(r.text)

        access_token = data['access_token'][0]

        return access_token

    def get_user(self, access_token):
        r = self.get('https://api.github.com/user', access_token=access_token)

        r.raise_for_status()

        return r.json()

    def get_installations_for_user(self, access_token):
        data = self.get_paginated('installations',
                                  'https://api.github.com/user/installations',
                                  access_token=access_token)
        return data

    def get_repositories_for_installation(self, installation_id, access_token):
        data = self.get_paginated(
            'repositories',
            f'https://api.github.com/user/installations/{installation_id}/repositories',
            access_token=access_token)
        return data

    def get_repository(self, repo_id, access_token):
        r = self.get(f'https://api.github.com/repositories/{repo_id}',
                     access_token=access_token)

        r.raise_for_status()

        return r.json()

    def markdown(self, markdown, context):
        r = self.post(f'https://api.github.com/markdown',
                      json={
                          'text': markdown,
                          'mode': 'gfm',
                          'context': context
                      },
                      oauth_server_auth=True)

        r.raise_for_status()

        return r.text

    def add_issue_comment(self, repo, number, body, access_token):
        r = self.post(
            f'https://api.github.com/repos/{repo}/issues/{number}/comments',
            json={'body': body},
            access_token=access_token)

        r.raise_for_status()

        return r.text

    def add_review_comment(self, repo, number, in_reply_to, body,
                           access_token):
        r = self.post(
            f'https://api.github.com/repos/{repo}/pulls/{number}/comments',
            json={
                'body': body,
                'in_reply_to': in_reply_to
            },
            access_token=access_token)

        r.raise_for_status()

        return r.text
Beispiel #16
0
class Handler(object):
    def __init__(self, settings):
        self.settings = settings
        self.handler_config = settings.config[settings.env]['handler']
        self.cached_session = CacheControl(requests.session())

        self._init_db()

    def _get_db_conn(self):
        return sqlite3.connect(self.handler_config['dbfile'])

    def _init_db(self):
        con = self._get_db_conn()
        cur = con.cursor()
        stmt = '''
        CREATE TABLE IF NOT EXISTS temporary_bounces
            (
                bounced_address TEXT,
                domain TEXT,
                counter INTEGER
            );
        '''
        cur.execute(stmt.strip())
        con.commit()

        stmt = '''
        CREATE TABLE IF NOT EXISTS permanent_bounces
            (
                ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                bounced_address TEXT,
                domain TEXT,
                status_code INTEGER
            );
        '''
        cur.execute(stmt.strip())
        con.commit()

        cur.close()
        con.close()

    def _increase_bounced_address_counter(self, bounced_address, domain):
        con = self._get_db_conn()
        cur = con.cursor()
        stmt = '''
        INSERT OR REPLACE INTO temporary_bounces
            VALUES (:bounced_address, :domain,
            COALESCE(
                (SELECT counter FROM temporary_bounces
                    WHERE bounced_address=:bounced_address AND domain=:domain),
                0) + 1);
        '''
        cur.execute(stmt.strip(), {
            'bounced_address': bounced_address,
            'domain': domain
        })
        con.commit()

        cur.close()
        con.close()

    def _get_bounced_address_counter(self, bounced_address, domain):
        con = self._get_db_conn()
        cur = con.cursor()
        stmt = '''
        SELECT counter FROM temporary_bounces
            WHERE bounced_address=:bounced_address AND domain=:domain;
        '''
        cur.execute(stmt.strip(), {
            'bounced_address': bounced_address,
            'domain': domain
        })
        row = cur.fetchone()
        result = 0
        if row:
            result = int(row[0])

        cur.close()
        con.close()
        return result

    def _find_address(self, address):
        con = self._get_db_conn()
        cur = con.cursor()
        stmt = '''
        SELECT * FROM permanent_bounces
            WHERE bounced_address LIKE :bounced_address;
        '''
        cur.execute(stmt.strip(), {'bounced_address': '%{0}%'.format(address)})
        permanent_bounces = cur.fetchall()

        stmt = '''
        SELECT * FROM temporary_bounces
            WHERE bounced_address LIKE :bounced_address;
        '''
        cur.execute(stmt.strip(), {'bounced_address': '%{0}%'.format(address)})
        temporary_bounces = cur.fetchall()

        cur.close()
        con.close()
        return permanent_bounces, temporary_bounces

    def _reset_bounced_address(self, bounced_address, domain):
        con = self._get_db_conn()
        cur = con.cursor()
        stmt = '''
        DELETE FROM temporary_bounces
            WHERE bounced_address=:bounced_address AND domain=:domain;
        '''
        cur.execute(stmt.strip(), {
            'bounced_address': bounced_address,
            'domain': domain
        })
        con.commit()

        cur.close()
        con.close()

    def _set_permanent_bounced_address(self, bounced_address, domain,
                                       status_code):
        con = self._get_db_conn()
        cur = con.cursor()
        stmt = '''
        INSERT INTO permanent_bounces (bounced_address, domain, status_code)
            VALUES (:bounced_address, :domain, :status_code);
        '''
        cur.execute(
            stmt.strip(), {
                'bounced_address': bounced_address,
                'domain': domain,
                'status_code': status_code
            })
        con.commit()

        cur.close()
        con.close()

    def _get_origin_to_domains(self, msg):
        '''
        return the domains to which the origin email was sent
        '''
        to_addresses = [
            address for _, address in
            [parseaddr(x.strip()) for x in msg['To'].split(",")]
        ]
        domains = []
        for a in to_addresses:
            parts = tldextract.extract(a.split("@")[1])
            domains.append("%s.%s" % (parts[-2], parts[-1]))
        return domains

    def _store_permanent_bounced_email(self, bounced_address, body):
        if not ('permanent_bounced_emails_path' in self.handler_config
                and body):
            return

        dir_path = os.path.join(
            self.handler_config['permanent_bounced_emails_path'],
            bounced_address[0:2].lower())

        if not os.path.exists(dir_path):
            os.makedirs(dir_path)

        path = os.path.join(dir_path, bounced_address + '.gz')
        content = bytes(body)
        with gzip.open(path, 'wb') as f:
            f.write(content)

    def _handle_out_of_office_message(self, msg):
        pass

    def _handle_temporary_bounced_address(self, bounced_address, domain, body):
        temporary_threshold = self.handler_config['temporary_threshold']
        current_counter = self._get_bounced_address_counter(
            bounced_address, domain)

        if current_counter > temporary_threshold:
            self._handle_permanent_bounced_address(bounced_address, domain,
                                                   body)
            self._reset_bounced_address(bounced_address, domain)
            return

        self._increase_bounced_address_counter(bounced_address, domain)

    def _default_url_resolver(self, bounced_address, config):
        tpl = URITemplate(config['base_url'])
        return tpl.expand(address=bounced_address)

    def _xikolo_url_resolver(self, bounced_address, config):
        response = self.cached_session.get(config['base_url'])
        uri = response.json()['email_suspensions_url']
        tpl = URITemplate(uri)
        return tpl.expand(address=bounced_address)

    def _handle_permanent_bounced_address(self, bounced_address, domain, body):
        config = self.handler_config['domains'][domain]

        if 'url_resolver' in config and config['url_resolver'] == 'xikolo':
            endpoint = self._xikolo_url_resolver(bounced_address, config)
        else:
            endpoint = self._default_url_resolver(bounced_address, config)

        logger.debug("Post request to: %s for address: %s", endpoint,
                     bounced_address)

        response = self.cached_session.post(endpoint, data={})
        logger.info("Response (%s): %s ", response.status_code, response.text)

        self._set_permanent_bounced_address(bounced_address, domain,
                                            response.status_code)
        self._store_permanent_bounced_email(bounced_address, body)

    def set_permanent_bounced_address(self, bounced_address, domain):
        '''
        handles manually bounced email addresses
        '''
        logger.debug("Permanent: %s", bounced_address)
        self._handle_permanent_bounced_address(bounced_address, domain, '')

    def find_address(self, address):
        '''
        Find an email address within permanent or temporary bounced emails
        '''
        logger.debug("Find: %s", address)
        permanent_bounces, temporary_bounces = self._find_address(address)

        logger.debug('> Permanent bounces for address: "{0}"'.format(address))
        for entry in permanent_bounces:
            logger.debug(entry)

        logger.debug('> Temporary bounces for address: "{0}"'.format(address))
        for entry in temporary_bounces:
            logger.debug(entry)

    def handle_message(self, body):
        '''
        handles soft and hard bounced emails
        '''
        msg = email.message_from_bytes(bytes(body))
        logger.info("------------- INCOMING MESSAGE -------------")
        for key, value in msg.items():
            if any(key.startswith(h) for h in ['From', 'To', 'Subject']):
                logger.info("%s:\t%s", key, value)

        for domain in self._get_origin_to_domains(msg):
            if domain in self.handler_config['domains'].keys():
                break
        else:
            raise BouncedEmailException("Domain '%s' not found" % domain)

        t, p = all_failures(msg)

        def validate_addresses(bounced_addresses):
            address_list = []
            for address in bounced_addresses:
                address = address.decode('utf-8')
                if validate_email(address):
                    address_list.append(address)
            return address_list

        temporary = validate_addresses(t)
        permanent = validate_addresses(p)

        if not (temporary or permanent):
            return self._handle_out_of_office_message(msg)

        logger.info("Domain: %s", domain)

        for bounced_address in temporary:
            # sometimes a temporary failure is a permanent failure as well (strange, but yes)
            if bounced_address in permanent:
                continue
            logger.info("Temporary: %s", bounced_address)
            self._handle_temporary_bounced_address(bounced_address, domain,
                                                   body)

        for bounced_address in permanent:
            logger.info("Permanent: %s", bounced_address)
            self._handle_permanent_bounced_address(bounced_address, domain,
                                                   body)
Beispiel #17
0
class TwistedEventConsumer(EventConsumer):

    def __init__(self, queue, api_key, config):
        self._queue = queue
        """ @type: queue.Queue """

        self._session = CacheControl(txrequests.Session())
        """ :type: txrequests.Session """

        self._api_key = api_key
        self._config = config
        """ :type: ldclient.twisted.TwistedConfig """

        self._looping_call = None
        """ :type: LoopingCall"""

    def start(self):
        self._looping_call = task.LoopingCall(self._consume)
        self._looping_call.start(5)

    def stop(self):
        self._looping_call.stop()

    def is_alive(self):
        return self._looping_call is not None and self._looping_call.running

    def flush(self):
        return self._consume()

    def _consume(self):
        items = []
        try:
            while True:
                items.append(self._queue.get_nowait())
        except Empty:
            pass

        if items:
            return self.send_batch(items)

    @defer.inlineCallbacks
    def send_batch(self, events):
        @defer.inlineCallbacks
        def do_send(should_retry):
            # noinspection PyBroadException
            try:
                if isinstance(events, dict):
                    body = [events]
                else:
                    body = events
                hdrs = _headers(self._api_key)
                uri = self._config.base_uri + '/api/events/bulk'
                r = yield self._session.post(uri, headers=hdrs, timeout=(self._config.connect, self._config.read),
                                             data=json.dumps(body))
                r.raise_for_status()
            except ProtocolError as e:
                inner = e.args[1]
                if inner.errno == errno.ECONNRESET and should_retry:
                    log.warning(
                        'ProtocolError exception caught while sending events. Retrying.')
                    yield do_send(False)
                else:
                    log.exception(
                        'Unhandled exception in event consumer. Analytics events were not processed.')
            except:
                log.exception(
                    'Unhandled exception in event consumer. Analytics events were not processed.')
        try:
            yield do_send(True)
        finally:
            for _ in events:
                self._queue.task_done()
Beispiel #18
0
class Animeshon(BaseMetadata):
    def __init__(self):
        super().__init__()
        self.name = "Animeshon"
        self.logo_url = LOGO_LIST[5]
        self.website_url = 'https://animeshon.com/e/'
        self.metadata_flags = [Flags.MetadataFlag.MetadataLinker]
        self.requests_session = CacheControl(requests.Session(),
                                             cacheable_methods=("POST", "GET"),
                                             cache=FileCache('.Cache/Animeshon'),
                                             heuristic=MALHeuristic())
        # Note To Self: CacheControl uses the url as a key.
        #  Post requests use the same url. research alternatives like requests-cache
        #  https://github.com/ionrock/cachecontrol/issues/216

        self.requests_session = requests_cache.core.CachedSession(cache_name='.Cache/Animeshon/cache',
                                                                  backend='sqlite',
                                                                  expire_after=60*60*24*365,  # 1 Year Cache
                                                                  allowable_methods=('GET', 'POST'),
                                                                  include_get_headers=True)
        self.uncached_requests_session = requests.Session()
        self.getratelimiter = AsyncRateLimiter(max_calls=20, period=1, callback=limited)
        self.queryratelimiter = AsyncRateLimiter(max_calls=10, period=1, callback=limited)




    def query(self, query: str, query_type:str = 'query'):
        url = "https://api.animeshon.com/graphql"
        if query_type == 'query':
            rate_limiter = self.queryratelimiter
        elif query_type == 'get':
            rate_limiter = self.getratelimiter
        with rate_limiter:
            result = self.requests_session.post(url, json={'query': query})
            try:
                if result.from_cache:
                    # deletes last call if it was cached. only real api calls need to be slowed down
                    rate_limiter.calls.pop()
            except: pass
        if result.ok:
            if result.from_cache:
                error_on_cache = json.loads(result.content).get('errors')
                if error_on_cache:
                    key = self.requests_session.cache.create_key(result.request)
                    self.requests_session.cache.delete(key)
                    return self.query(query, query_type)
            return json.loads(result.content), result
        else:
            print(result.content)
            raise RuntimeError('Failed to grab data')

    def LinkId(self, metaIDs: AnimeStruct.MetaIDs):
        namespace_dict = {"MAL": "myanimelist-net",
                          "ANIDB": "anidb-net",
                          "ANN": "animenewsnetwork-com"}
        plugin_namespace_dict = {"myanimelist-net": "MAL",
                                 "anidb-net": "ANIDB",
                                 "animenewsnetwork-com": "ANN"}
        mappedPlugins = metaIDs.mappedPlugins()
        if "MAL" in mappedPlugins and "ANIDB" in mappedPlugins and "ANN" in mappedPlugins and "Animeshon" in mappedPlugins:
            return metaIDs
        for metaId in metaIDs.list:
            if metaId.PluginName in namespace_dict.keys():
                namespace = namespace_dict[metaId.PluginName]
                externalID = metaId.id
                query = queries.queryCrossReference.format(externalID=externalID, namespace=namespace)
                print(f"Animeshon: Linking ids from: {metaId.PluginName} : {metaId.id}")
                queryReply, raw = self.query(query, 'query')
                result = queryReply.get("data")
                if not result.get("queryCrossReference"):
                    # Todo: This may also be true if we are being rate limited!
                    if result.get('errors'):
                        raise RuntimeError(result.get('errors')[0].get('message'))
                    else:
                        print('Not Found')
                    # Delete from cache so next time it may have been added
                    key = self.requests_session.cache.create_key(raw.request)
                    self.requests_session.cache.delete(key)
                    return metaIDs
        anime_data = result.get("queryCrossReference")[0].get("resource")
        Animeshon_Metaid = AnimeStruct.MetaID(self.name, anime_data.get("id"))
        metaIDs.list.append(Animeshon_Metaid)
        for meta in anime_data.get("crossrefs"):
            pluginName = plugin_namespace_dict[meta.get("namespace")]
            id = meta.get("externalID")
            if pluginName not in mappedPlugins:
                meta_Metaid = AnimeStruct.MetaID(pluginName, id)
                metaIDs.list.append(meta_Metaid)
        return metaIDs

    def LinkIds(self, database: Database, anime_hash: str):
        """Grabs the anime from the database and adds new
        metadata ids from diferent providers"""
        anime: AnimeStruct.Anime = AnimeStruct.Anime.from_db(anime_hash, database)
        MetaIds: AnimeStruct.MetaIDs = self.LinkId(anime.id)
        MetaIds.to_db(database)

    def PopulateAnime(self, database: Database, anime_hash: str):
        oldAnimeData = AnimeStruct.Anime.from_db(anime_hash, database)
        if oldAnimeData.id.getID("Animeshon"):
            AnimeshonID = oldAnimeData.id.getID("Animeshon")
            query = queries.getAnime.format(AnimeshonID=AnimeshonID)
            print(f"Animeshon: Obtaining Anime Metadata: {AnimeshonID}")
            queryReply, raw = self.query(query, 'get')
            anime_metadata = queryReply.get("data").get('getAnime')
            properAnime = AnimeshonFormatter.AnimeMetadata(anime_metadata, oldAnimeData)
            # remove edges to stop the anime from keeping some old info like type
            database.remove_successor_edges(oldAnimeData.hash)
            properAnime.to_db(database)
Beispiel #19
0
def do_forums(function, endpoint, method, *data):

    import requests
    import common.logger as _logger
    from common.graphite import sendmetric
    import common.credentials.forums as _forums
    import logging
    import json
    import redis
    from cachecontrol import CacheControl
    from cachecontrol.caches.redis_cache import RedisCache

    # reference:
    # https://invisionpower.com/developers/rest-api

    # shut the F**K up.
    logging.getLogger("requests").setLevel(logging.WARNING)

    # setup redis caching for the requests object
    r = redis.StrictRedis(host='localhost', port=6379, db=0)
    session = requests.Session()
    # redis does not actually connect above, i have to specifically test

    try:
        r.client_list()
        session = CacheControl(session, RedisCache(r))
    except redis.exceptions.ConnectionError as err:
        sendmetric(function, 'forums', 'api_request', 'rediserror', 1)
        _logger.log('[' + function + '] Redis connection error: ' + str(err), _logger.LogLevel.ERROR)
    except redis.exceptions.ConnectionRefusedError as err:
        sendmetric(function, 'forums', 'api_request', 'rediserror', 1)
        _logger.log('[' + function + '] Redis connection error: ' + str(err), _logger.LogLevel.ERROR)
    except Exception as err:
        sendmetric(function, 'forums', 'api_request', 'rediserror', 1)
        logger.error('[' + function + '] Redis generic error: ' + str(err))

    # do the request, but catch exceptions for connection issues

    url = _forums.endpoint
    timeout = 5

    try:
        if method == 'post':
            data = data[0]
            headers = {'Content-Type': 'application/json', 'Accept': 'application/json'}
            request = session.post(url, headers=headers, timeout=timeout, data=data, auth=(_forums.api_key, '' ))
        elif method == 'get':
            headers = {'Accept': 'application/json'}
            request = session.get(url, headers=headers, timeout=timeout, auth=(_forums.api_key, '' ))

    except requests.exceptions.ConnectionError as err:
        sendmetric(function, 'forums', 'api_request', 'connection_error', 1)
        _logger.log('[' + function + '] forum api connection error:: ' + str(err), _logger.LogLevel.ERROR)
        return(500, { 'code': 500, 'error': 'API connection error: ' + str(err)})
    except requests.exceptions.ReadTimeout as err:
        sendmetric(function, 'forums', 'api_request', 'read_timeout', 1)
        _logger.log('[' + function + '] forum api connection read timeout: ' + str(err), _logger.LogLevel.ERROR)
        return(500, { 'code': 500, 'error': 'API connection read timeout: ' + str(err)})
    except requests.exceptions.Timeout as err:
        sendmetric(function, 'forums', 'api_request','timeout' , 1)
        _logger.log('[' + function + '] forum api connection timeout: ' + str(err), _logger.LogLevel.ERROR)
        return(500, { 'code': 500, 'error': 'forum API connection timeout: ' + str(err)})
    except Exception as err:
        sendmetric(function, 'forums', 'api_request', 'general_error', 1)
        _logger.log('[' + function + '] forum api generic error: ' + str(err), _logger.LogLevel.ERROR)
        return(500, { 'code': 500, 'error': 'General error: ' + str(err)})

    # need to also check that the api thinks this was success.

    if not request.status_code == 200:
        sendmetric(function, 'forums', 'api_request', 'failure', 1)
        # don't bother to log 404s
        if not request.status_code == 404:
            _logger.log('[' + function + '] forum API error ' + str(request.status_code) + ': ' + str(request.text), _logger.LogLevel.ERROR)
            _logger.log('[' + function + '] forum API error URL: ' + str(url), _logger.LogLevel.ERROR)
    else:
        sendmetric(function, 'forums', 'api_request', 'success', 1)

    # do metrics

    elapsed_time = request.elapsed.total_seconds()
    sendmetric(function, 'forums', 'api_request', 'elapsed', elapsed_time)
    sendmetric(function, 'forums', 'api_request', request.status_code, 1)

    return(request.status_code, request.text)
Beispiel #20
0
class ApiClient(object):
    def __init__(self, base_uri, client_id, username, password, scope=None):
        self.base_uri = base_uri
        self.client_id = client_id
        self.username = username
        self.password = password
        self.scope = scope
        self.session = CacheControl(requests.session())

    @property
    def auth_header(self):
        """
        authorization header
        """
        token_response = self.get_token()
        return {
            'authorization':
            '%s %s' % (token_response.get(
                'token_type', ''), token_response.get('access_token', ''))
        }

    def get_token(self):
        """
        get the token endpoint from the well-known uri and
        then authenticate with grant_type client_credentials
        """
        uri = urljoin(self.base_uri, '.well-known/openid-configuration')
        openid_configuration = self.session.get(uri).json()
        token_endpoint = openid_configuration['token_endpoint']

        body = {
            'grant_type': 'password',
            'client_id': self.client_id,
            'username': self.username,
            'password': self.password
        }
        if self.scope:
            body['scope'] = self.scope
        headers = {'Content-Type': 'application/x-www-form-urlencoded'}
        json_response = self.session.post(token_endpoint,
                                          headers=headers,
                                          data=body).json()
        if 'error' in json_response:
            logger.error(json_response)
            raise Exception('authorization error', json_response)
        return json_response

    def get(self, uri):
        """
        make authorized request
        """
        uri = urljoin(self.base_uri, uri)
        headers = self.auth_header
        response = self.session.get(uri, headers=headers)
        return response

    def put(self, uri, data=None):
        uri = urljoin(self.base_uri, uri)
        headers = self.auth_header
        response = self.session.put(uri, headers=headers, json=data)
        return response
Beispiel #21
0
def do_esi(function,
           url,
           method,
           page,
           charid=None,
           data=None,
           version='latest',
           base='esi',
           extraheaders={}):

    import requests
    import common.logger as _logger
    import common.ldaphelpers as _ldaphelpers
    import logging
    import json
    import redis
    import re
    from cachecontrol import CacheControl
    from cachecontrol.caches.redis_cache import RedisCache
    from common.graphite import sendmetric
    from common.credentials.g_translate import translate_api_key
    from commands.maint.tokens import eve_tokenthings

    # headers

    useragent = 'triumvirate services - yell at saeka'
    headers = {
        'Accept': 'application/json',
        'User-Agent': useragent,
        'Accept-Encoding': 'gzip'
    }

    if method == 'post':
        # add a header for POST data
        headers['Content-Type'] = 'application/json'

    if extraheaders is not {}:
        # add any custom headers as necessary
        headers.update(extraheaders)

    # shut the F**K up.
    logging.getLogger("requests").setLevel(logging.WARNING)

    # if a charid is specified, this is going to be treated as an authenticated request
    # where an access token is added to the esi request url automatically

    # snag the user's tokens from ldap
    if charid is not None:

        _logger.log(
            '[' + __name__ + '] authenticated {0} request for {1}: {2}'.format(
                base, charid, url), _logger.LogLevel.DEBUG)

        dn = 'ou=People,dc=triumvirate,dc=rocks'
        filterstr = '(uid={})'.format(charid)
        attrlist = [
            'esiAccessToken',
            'esiAccessTokenExpires',
            'discordAccessToken',
        ]
        code, result = _ldaphelpers.ldap_search(__name__, dn, filterstr,
                                                attrlist)

        if code == False:
            _logger.log(
                '[' + __name__ + '] LDAP connectionerror: {}'.format(error),
                _logger.LogLevel.ERROR)
            js = {'error': 'internal ldap error'}
            return 500, js, None

        if result == None:
            js = {'error': 'no tokens for uid {0}'.format(charid)}
            return 500, js, None

        try:
            (dn, result), = result.items()
        except Exception as e:
            print(result)

        esi_atoken = result.get('esiAccessToken')
        esi_atoken_expires = result.get('esiAccessTokenExpires')
        discord_atoken = result.get('discordAccessToken')

        if esi_atoken == None and base == 'esi':
            js = {'error': 'no stored esi access token'}
            return 400, js, None

        if discord_atoken == None and base == 'discord':
            js = {'error': 'no stored discord access token'}
            return 400, js, None

        # make sure the ESI token is current if this is an ESI request

        if base == 'esi':
            # at this point it this is an authenticated request.
            # make sure that the token retrieved is current. if it is not, update it.

            pass

    else:
        _logger.log(
            '[' + __name__ +
            '] unauthenticated {0} request: {1}'.format(base, url),
            _logger.LogLevel.DEBUG)
        token_header = dict()

    # construct the full request url including api version

    # request_esi hits more than just ESI-specific stuff, so some scoping of the base is necessary

    if base == 'esi':
        # ESI ofc
        base_url = 'https://esi.evetech.net/' + version

        # add common query parameters including pagination and datasource
        # if the url doesn't have a ? indicating it has parameters, add the parameter set with them

        pattern = re.compile('.*[?].*')
        if pattern.match(url):
            url += '&datasource=tranquility'
        else:
            url += '?datasource=tranquility'

        # paginating on more than 1 page to be kind to the google cdn
        if page > 1:
            url += '&page={0}'.format(page)

        if charid is not None:
            # add the authenticated header
            headers['Authorization'] = 'Bearer {0}'.format(esi_atoken)
    elif base == 'discord':
        # discord api
        base_url = 'https://discordapp.com/api/' + version

        if charid is not None:
            # add the authenticated header
            headers['Authorization'] = 'Bearer {0}'.format(discord_atoken)

    elif base == 'zkill':
        # zkillboard
        base_url = 'https://zkillboard.com/api'
    elif base == 'triapi':
        # tri api
        base_url = 'https://api.triumvirate.rocks'
    elif base == 'oauth':
        # eve oauth
        base_url = 'https://login.eveonline.com/oauth'
    elif base == 'g_translate':
        # google translate
        base_url = 'https://translation.googleapis.com/language/translate/v2'
        base_url += '?key={0}&target=en&source=text&model=nmt&'.format(
            translate_api_key)
    elif base == 'eve_market':
        # eve marketdata
        base_url = 'https://api.eve-marketdata.com/api/'

    # special google translate bullshit

    if base == 'g_translate':
        full_url = base_url + url
    else:
        full_url = base_url + '/' + url

    # setup redis caching for the requests object
    r = redis.StrictRedis(host='localhost', port=6379, db=0)
    session = requests.Session()
    # redis does not actually connect above, i have to specifically test

    try:
        r.client_list()
        session = CacheControl(session, RedisCache(r))
    except redis.exceptions.ConnectionError as err:
        sendmetric(function, base, 'request', 'rediserror', 1)
        _logger.log('[' + function + '] Redis connection error: ' + str(err),
                    _logger.LogLevel.ERROR)
    except redis.exceptions.ConnectionRefusedError as err:
        sendmetric(function, base, 'request', 'rediserror', 1)
        _logger.log('[' + function + '] Redis connection error: ' + str(err),
                    _logger.LogLevel.ERROR)
    except Exception as err:
        sendmetric(function, base, 'request', 'rediserror', 1)
        logger.error('[' + function + '] Redis generic error: ' + str(err))

    # do the request, but catch exceptions for connection issues

    timeout = 10
    try:
        if method == 'post':
            request = session.post(full_url,
                                   headers=headers,
                                   timeout=timeout,
                                   data=data)
        elif method == 'get':
            request = session.get(full_url, headers=headers, timeout=timeout)

    except requests.exceptions.ConnectionError as err:
        sendmetric(function, base, 'request', 'connection_error', 1)
        _logger.log('[' + function + '] ESI connection error:: ' + str(err),
                    _logger.LogLevel.WARNING)
        return (500, {'error': 'API connection error: ' + str(err)}, None)
    except requests.exceptions.ReadTimeout as err:
        sendmetric(function, base, 'request', 'read_timeout', 1)
        _logger.log(
            '[' + function + '] ESI connection read timeout: ' + str(err),
            _logger.LogLevel.WARNING)
        return (500, {
            'error': 'API connection read timeout: ' + str(err)
        }, None)
    except requests.exceptions.Timeout as err:
        sendmetric(function, base, 'request', 'timeout', 1)
        _logger.log('[' + function + '] ESI connection timeout: ' + str(err),
                    _logger.LogLevel.WARNING)
        return (500, {'error': 'API connection timeout: ' + str(err)}, None)
    except requests.exceptions.SSLError as err:
        sendmetric(function, base, 'request', 'ssl_error', 1)
        _logger.log('[' + function + '] ESI SSL error: ' + str(err),
                    _logger.LogLevel.WARNING)
        return (500, {'error': 'API connection timeout: ' + str(err)}, None)
    except Exception as err:
        sendmetric(function, base, 'request', 'general_error', 1)
        _logger.log('[' + function + '] ESI generic error: ' + str(err),
                    _logger.LogLevel.WARNING)
        return (500, {'error': 'General error: ' + str(err)}, None)

    # need to also check that the api thinks this was success.

    if not request.status_code == 200:
        sendmetric(function, base, 'request', 'failure', 1)

        if request.status_code == 204:
            # empty return
            return (request.status_code, [], request.headers)
        elif request.status_code == 502:
            # load balancer error, don't bother.
            msg = "ESI LB error"
            _logger.log(
                '[' + function + '] ' + msg + ' ' + str(request.status_code) +
                ': ' + str(request.text), _logger.LogLevel.INFO)

            return (request.status_code, [], request.headers)

        # don't bother to log 404 and 403s
        elif not request.status_code == 404 and not request.status_code == 403:
            _logger.log(
                '[' + function + '] ESI API error ' +
                str(request.status_code) + ': ' + str(request.text),
                _logger.LogLevel.WARNING)
            _logger.log('[' + function + '] ESI API error URL: ' + str(url),
                        _logger.LogLevel.WARNING)
    else:
        sendmetric(function, base, 'request', 'success', 1)

    # check for warning headers. mostly for esi.

    warning = request.headers.get('warning')
    pages = request.headers.get('X-Pages')
    content_type = request.headers.get('content-type')

    if content_type:
        content_type = content_type.lower()

    if pages:
        msg = '{0} total pages'.format(pages)
        _logger.log('[' + function + '] {0}'.format(msg),
                    _logger.LogLevel.DEBUG)

    if warning:
        msg = '{0} deprecated endpoint: {1} version {2} - {3}'.format(
            base, version, url, warning)
        _logger.log('[' + function + '] {0}'.format(msg),
                    _logger.LogLevel.WARNING)

    # do metrics

    elapsed_time = request.elapsed.total_seconds()
    sendmetric(function, base, 'request', 'elapsed', elapsed_time)
    sendmetric(function, base, 'request', request.status_code, 1)

    # shouldn't have to typecast it but sometimes:
    # TypeError: the JSON object must be str, not 'LocalProxy'
    try:
        result = json.loads(str(request.text))
    except Exception as error:
        msg = 'could not convert {0} data to json: {1}'.format(base, full_url)
        _logger.log('[' + function + '] {0}'.format(msg),
                    _logger.LogLevel.WARNING)
        return (500, {'code': 500, 'error': msg}, request.headers)

    return (request.status_code, result, request.headers)
GDP = Namespace('http://gss-data.org.uk/def/gdp#')

s = CacheControl(Session(),
                 cache=FileCache('.cache'),
                 heuristic=LastModified())

orgs = {
    org['label']['value']: org['org']['value']
    for org in s.post('https://staging.gss-data.org.uk/sparql',
                      headers={
                          'Accept': 'application/sparql-results+json'
                      },
                      data={
                          'query':
                          '''
PREFIX org: <http://www.w3.org/ns/org#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT ?org ?label
WHERE {
  ?org a org:Organization ;
    rdfs:label ?label .
}'''
                      }).json().get('results', {}).get('bindings', [])
}


class Organisation(SQLObject):
    class sqlmeta:
        table = 'wh_organisation'

    uri = StringCol(alternateID=True, length=255)