Example #1
0
def _get_resource(resource_url: str) -> (str, bytes):
    """Download or reads a file (online or local).

    Parameters:
        resource_url (str): URL or path of resource to load
    Returns:
        str, bytes: Tuple containing the resource's MIME type and its data.
    Raises:
        NameError: If an HTTP request was made and ``requests`` is not available.
        ValueError: If ``resource_url``'s protocol is invalid.
    """
    url_parsed = urlparse(resource_url)
    if url_parsed.scheme in ['http', 'https']:
        # Requests might not be installed
        if requests_get is not None:
            request = requests_get(resource_url)
            data = request.content
            if 'Content-Type' in request.headers:
                mimetype = request.headers['Content-Type']
            else:
                mimetype = mimetypes.guess_type(resource_url)
        else:
            raise NameError("HTTP URL found but requests not available")
    elif url_parsed.scheme == '':
        # '' is local file
        with open(resource_url, 'rb') as f:
            data = f.read()
        mimetype, _ = mimetypes.guess_type(resource_url)
    elif url_parsed.scheme == 'data':
        raise ValueError("Resource path is a data URI", url_parsed.scheme)
    else:
        raise ValueError("Not local path or HTTP/HTTPS URL", url_parsed.scheme)

    return mimetype, data
Example #2
0
def mef_person_update_index(sender, *args, **kwargs):
    """Index MEF person in ES."""
    record = kwargs['record']
    if 'documents' in record.get('$schema', ''):
        authors = record.get('authors', [])
        for author in authors:
            mef_url = author.get('$ref')
            if mef_url:
                mef_url = mef_url.replace(
                    'mef.rero.ch', current_app.config['RERO_ILS_MEF_HOST'])
                request = requests_get(url=mef_url,
                                       params=dict(resolve=1, sources=1))
                if request.status_code == requests_codes.ok:
                    data = request.json()
                    id = data['id']
                    data = data.get('metadata')
                    if data:
                        data['id'] = id
                        data['$schema'] = current_jsonschemas.path_to_url(
                            current_app.config['RERO_ILS_PERSONS_MEF_SCHEMA'])
                        indexer = RecordIndexer()
                        index, doc_type = indexer.record_to_index(data)
                        indexer.client.index(
                            id=id,
                            index=index,
                            doc_type=doc_type,
                            body=data,
                        )
                        current_search.flush_and_refresh(index)
                else:
                    current_app.logger.error(
                        'Mef resolver request error: {stat} {url}'.format(
                            stat=request.status_code, url=mef_url))
                    raise Exception('unable to resolve')
Example #3
0
def mef_person_delete(sender, *args, **kwargs):
    """Delete signal."""
    record = kwargs['record']
    if 'documents' in record.get('$schema', ''):
        authors = record.get('authors', [])
        for author in authors:
            mef_url = author.get('$ref')
            if mef_url:
                mef_url = mef_url.replace(
                    'mef.rero.ch', current_app.config['RERO_ILS_MEF_HOST'])
                request = requests_get(url=mef_url,
                                       params=dict(resolve=1, sources=1))
                if request.status_code == requests_codes.ok:
                    data = request.json()
                    id = data['id']
                    data = data.get('metadata')
                    if data:
                        search = DocumentsSearch()
                        count = search.filter(
                            'match', authors__pid=id).execute().hits.total
                        if count == 1:
                            indexer = RecordIndexer()
                            index, doc_type = indexer.record_to_index(data)
                            indexer.client.delete(id=id,
                                                  index=index,
                                                  doc_type=doc_type)
                            current_search.flush_and_refresh(index)
                else:
                    current_app.logger.error(
                        'Mef resolver request error: {result} {url}'.format(
                            result=request.status_code, url=mef_url))
                    raise Exception('unable to resolve')
    def generate_response(self, gfy_urls):
        carebox = []
        textbody = ''
        for url in gfy_urls:
            fix_url = self.FIX_URL.format(url)
            result = requests_get(self.API_URL.format(url))
            if result.ok:
                gfy_item = result.json()['gfyItem']
                size = '{:.1f}'.format(gfy_item['gifSize'] /
                                       gfy_item['webmSize'])
                title = [fix_url, gfy_item['title']][gfy_item['title'] is True]
                reddit = gfy_item['redditId']
                carebox.append({
                    'fix_url': fix_url,
                    'size': size,
                    'title': title,
                    'reddit': reddit
                })

        for gfycat in carebox:
            textbody += self.responses.gfycat_binding.format(**gfycat)
            if gfycat['reddit']:
                origin = self.session.get_submission(
                    submission_id=gfycat['reddit'])
                caredict = {
                    'upvote': origin.upvote_ratio * 100,
                    'title': origin.title,
                    'url': 'https://np.reddit.com/{}/'.format(gfycat['reddit'])
                }
                textbody += self.responses.original_submission.format(
                    **caredict)

        textbody = self.responses.intro + textbody + self.responses.outro
        return textbody.replace('\\n', '\n')
Example #5
0
def parse_transcript_list(transcript_list_file):
    """ 
    Takes a file containing a line separated list of ensembl transcripts,
    then returns a data structure containing the
    gene name, chromosome, and locus start and end
    """
    if not isfile(transcript_list_file):
        print('Invalid File name')
        raise SystemExit
    transcript_list = []
    with open(transcript_list_file, 'r') as f:
        r = f.readlines()
        for row in r:
            transcript_list.append(row.rstrip())
    transcript_dict = {}
    for transcript in transcript_list:
        url = ('https://rest.ensembl.org/lookup/id/' + transcript +
               '?content-type=application/json')
        print(url)
        response = requests_get(url)
        data = response.json()
        if "error" in data:
            response = data["error"]
            raise ValueError(response)
        chromosome = data["seq_region_name"]
        locus_intervals = ("chr" + str(chromosome) + ":" + str(data["start"]) +
                           "-" + str(data["end"]))
        if chromosome not in transcript_dict:
            transcript_dict[chromosome] = []
        transcript_dict[chromosome].append((transcript, locus_intervals))
    return transcript_dict
Example #6
0
def login_github():
    at_response = requests_post(
        'https://github.com/login/oauth/access_token',
        data={
            'client_id': app.github_oauth_client_id,
            'client_secret': app.github_oauth_client_secret,
            'code': request.args['code'],
            'accept': 'application/json'
        }
    )
    assert at_response.status_code == 200
    response_data = url_decode(at_response.text)
    access_token = response_data['access_token']
    user_response = requests_get('https://api.github.com/user',
                                 params={'access_token': access_token})
    assert user_response.status_code == 200
    user_data = user_response.json()
    try:
        login = session.query(GithubLogin).filter_by(
            uid=user_data['login']
        ).one()
    except NoResultFound:
        login = GithubLogin(user=User(), uid=user_data['login'])
        login.user.display_name = login.identifier()
        session.add(login)
        session.commit()
    login_user(login.user)
    return redirect(url_for('pages.index'))
Example #7
0
    def getHtml(self, word):
        self.word = word

        try:
            r = requests_get('http://www.iciba.com/' + self.word)
            return r.text
        except Exception as e:
            print(e)
Example #8
0
 def search_films(x):
     global j
     j = str
     for i in x:
         ght = requests_get(i)
         ght_1 = ght.json()
         k = str(ght_1["title"] + '(' + str(ght_1["episode_id"]) + ')')
         j = str(j) + k + '; '
Example #9
0
def get_proxies():
	global args
	if args.proxies:
		proxies=open(args.proxies,'r').read().strip().split('\n')
	else:
		proxies=requests_get('https://www.proxy-list.download/api/v1/get?type=https&anon=elite').content.decode().strip().split('\r\n')
	log('[INFO] %d proxies successfully loaded!'%len(proxies))
	return proxies
Example #10
0
 def _get_current(self, _url_info, self_major, self_version):
     try:
         response = requests_get(_url_info.format(self_major), params={'v':self_version})
     except ConnectionError, e:
         # We ignore ENETUNREACH because it simply means that we could not connect to the server,
         # which is fine, e.g. no Internet connectivity is allowed in that system.
         if e.errno != ENETUNREACH:
             raise
Example #11
0
 def GetStoredFiles(self):
     url = self._base_url + '/cgi/enumerate/'
     log('Request: GET ' + url)
     r = requests_get(url)
     self.CheckHttpError(r)
     files = r.json()
     files = sorted(files, key=lambda item: item['display_filename'])
     return files
def communityFilter(districtNameList, districtLinkList):
    """
    // 按区县爬取小区名字和链接
    """
    districtCommunityDicList = []
    """
    // 分区县处理
    """

    for districtLink in districtLinkList:
        districtAllCommunity = {}
        """
        // 每个区县所有小区列表是分页展示的
        // 首先获取区县小区列表的第一页,获取页面信息
        """

        res = requests_get(districtLink)
        soup = BeautifulSoup(res.text, 'lxml')

        page_data = soup.select(
            'div[class="page-box house-lst-page-box"]')[0]['page-data']
        page_data = eval(page_data)
        totalPage = page_data["totalPage"]
        """
        // 分页处理
        """

        for pageIndex in range(1, totalPage + 1):

            if (pageIndex > 1):
                pageLink = districtLink + 'pg' + str(pageIndex) + '/'
                res = requests_get(pageLink)
                soup = BeautifulSoup(res.text, 'lxml')

            communityResourceList = soup.select(
                'li[class="clear xiaoquListItem"]')

            for communityResource in communityResourceList:
                communityLink = communityResource.a['href']
                communityName = communityResource.img['alt']
                districtAllCommunity[communityName] = communityLink

        districtCommunityDicList.append(districtAllCommunity)

    return districtCommunityDicList
Example #13
0
def __api_request(service_label, params, index_name=None):
    """Wraps the access to the Nordlys API. It returns a 3-uple (results, total no. of results, pretty status message).

    :param service_label: a constant for the required service_label.
    :param params: request params.
    :param index_name: optional; name of index.
    :return: a list of docIDs.
    """
    results = None  # default init, it remains None if request returns error
    total = 0
    msg = ""

    url = "/".join([PROTOCOL, SERVER_HOSTNAME_API, service_label])
    if service_label == SERVICE_E_RETRIEVAL:
        url += "?q={}&model={}&start={}&1st_num_docs={}&fields_return={}".format(
            quote(params.get("q", "")),
            params.get("model", "lm"),
            params.get("start", 0),
            params.get("1st_num_docs", 100),
            params.get("fields_return", "abstract"),
        )
        url += "&num_docs={}".format(params.get("num_docs", NUM_RESULTS))

    elif service_label == SERVICE_E_LINKING:
        url += "?q={}".format(quote(params.get("q", "")))

    elif service_label == SERVICE_TTI:
        url += "?q={}&method={}&num_docs={}&start={}&index={}&field={}".format(
            quote(params.get("q", "")), params.get("method", "tc"),
            params.get("num_docs", NUM_RESULTS), params.get("start", 0),
            params.get("index", TTI_INDEX_FALLBACK_2015_10),
            params.get("field", "_id"))
    try:
        print("Service request' URL: {}".format(url))
        r = requests_get(url, timeout=REQUEST_TIMEOUT)
        print(r)
        results = j_loads(r.text)
        total = results.get("total_hits", 0)

        # Obtain postprocessed results to render, if needed
        entity_collection = MONGO_ENTITY_COLLECTIONS[0] if len(
            MONGO_ENTITY_COLLECTIONS) > 0 else "dbpedia-2015-10"
        results = process_results(results,
                                  service_label,
                                  protocol=PROTOCOL,
                                  server_hostname_api=SERVER_HOSTNAME_API,
                                  entity_collection=entity_collection,
                                  request_timeout=REQUEST_TIMEOUT)

    except ConnectionError:
        msg = "We're so sorry. There was a connection error :("
    except Timeout:
        msg = "Timeout while trying to connect to the remote server, or while receiving data from it :("
    except JSONDecodeError:
        msg = "There are no results for your query :("

    return results, total, msg
Example #14
0
 def _api_call(self, call, **kwargs):
     url = self.__url + str(call) + '.json?' + '&'.join([str(key) + '=' + str(value) for key, value in kwargs.items() if value != ''])\
         .replace('\'', '').replace('[', '').replace(']', '')
     response = requests_get(url, headers=self.__header)
     if response.status_code == 200:
         self.__connection = True
         return json_loads(response.text)
     else:
         return {response.status_code: response.text}
Example #15
0
    def _get_extract(self, page_id, page_title):
        """Get an extract from a wikipedia page of the given location."""

        PARAMS_EXTRACT["titles"] = f"{page_title}"

        extract = requests_get(f"{ENDPOINT_WIKIMEDIA}",
                               params=PARAMS_EXTRACT).json()

        self._extract = extract["query"]["pages"][f"{page_id}"]["extract"]
Example #16
0
    def getEN_mp3(self):

        url = 'https://dict.youdao.com/dictvoice?audio={word}&type=2'.format(
            word=self.word)
        mp3 = requests_get(url)
        with open('en.mp3', 'wb') as f:
            f.write(mp3.content)
            f.close()
        mp3.close()
    def generate_response(self, massdrop_links):
        """Takes multiple links at once, iterates, generates a response appropriately.
           Idea is to take into account: Title, Price, Running Drop, Time left"""
        drop_field = []
        textbody = ""
        fixed_urls = 0
        time_ends_in = None
        will_update = False
        for url in massdrop_links:
            fix_url = "https://massdrop.com/buy/{}?mode=guest_open".format(url)
            api_url = self.API_URL.format(url)
            try:
                response = requests_get(api_url).json()
                if "error" in response:
                    continue  # API doesn't have response codes, responds with 200 OK always
                product_name = response["name"]
                # statusCode: 1 - Drop is active and running
                if response["statusCode"] == 1:
                    current_price = "${:.2f}".format(response["currentPrice"])
                    prices = [x["price"] for x in response["steps"]]
                    prices = self.massdrop_pricer(current_price, prices)
                    time_ends_at = datetime.strptime(response["endAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
                    time_ends_in = time_ends_at - datetime.utcnow()
                    time_left = " / " + self.time_formatter(time_ends_in)
                    will_update = True
                else:
                    time_left = "drop has ended"
                    current_price, prices = "", ""
                drop_field.append(
                    {
                        "title": product_name,
                        "current_price": current_price,
                        "prices": prices,
                        "time_left": time_left,
                        "fix_url": fix_url,
                    }
                )
                fixed_urls += 1
            except Exception as e:
                self.logger.error("Oh noes, an unexpected error happened: {} < {}".format(e.__class__.__name__, e))

        if len(drop_field) == 0:
            return None, None

        # item is a dictionary that fits on the right binding - saves time, is short
        for item in drop_field:
            textbody += self.responses.product_binding.format(**item)

        update_string = ["", " ^| ^This ^comment ^updates ^every ^12 ^hours."][will_update]

        textbody = (
            self.responses.intro_drop.format(products=("product", "products")[fixed_urls > 1])
            + textbody
            + self.responses.outro_drop.format(update=update_string)
        )

        return textbody.replace("\\n", "\n"), time_ends_in
Example #18
0
 def check_by_redirect(self, url: str, results: list):
     r = requests_get(url,
                      headers=self.headers,
                      verify=True,
                      allow_redirects=False)
     if r.status_code == 302:
         return
     print(r.url)
     results.append(r.url)
Example #19
0
def list_form_submissions(headers, form_id):
    '''List all submissions for a form.'''
    query_url = '{}/forms/{}/submissions'.format(api_url, form_id)
    response = requests_get(query_url, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception('API error (HTTP code {})'.format(
            response.status_code))
Example #20
0
def download_img(icon_url: str):
    try:
        r = requests_get(icon_url, stream=True)
        if r.status_code == 200:
            with open(f"{DATA_PATH}icon.png", 'wb') as file:
                r.raw.decode_content = True
                copyfileobj(r.raw, file)
    except (ConnectionError):
        return
Example #21
0
def run_begin_end_time(exp:str, runnum:int) :
    # returns a list of dicts per run with 'begin_time', 'end_time', 'run_num', 'run_type'
    if runnum>0 :
        resp = requests_get('https://pswww.slac.stanford.edu/prevlgbk/lgbk/%s/ws/runs' % exp).json()
        for d in resp :
            if d['run_num'] == runnum :
                return str(d['begin_time']), str(d['end_time'])
    logger.debug('begin and end time info is not found in mysql for run=%d. Uses default times.' %runnum)
    return '1000000000', '5000000000'
Example #22
0
def get_html(url: str) -> str:
    """Return the html string for the given url."""
    with requests_get(url, stream=True, headers=USER_AGENT_HEADER,
                      timeout=15) as r:
        check_response_headers(r)
        content = next(r.iter_content(MAX_RESPONSE_LENGTH))
    charset_match = CHARSET(content)
    return content.decode(
        charset_match[1].decode() if charset_match else r.encoding)
Example #23
0
def main():
    def search_planet(x):
        global a
        thy = requests_get(x)
        thy_1 = thy.json()
        a = thy_1["name"]

    def massiv(x):
        global m
        m = []
        for i in x:
            m.append(i)

    def search_films(x):
        global j
        j = str
        for i in x:
            ght = requests_get(i)
            ght_1 = ght.json()
            k = str(ght_1["title"] + '(' + str(ght_1["episode_id"]) + ')')
            j = str(j) + k + '; '

    name = requests_get('https://www.swapi.co/api/people')
    planet = requests_get('https://www.swapi.co/api/planets')

    data = name.json()
    data_1 = planet.json()

    z = 0
    ch = []
    for i in data['results']:
        pl = data['results'][z]['homeworld']
        films = data['results'][z]['films']
        massiv(films)
        search_films(m)
        search_planet(pl)
        b = 'Name: ' + (data['results'][z]['name']
                        ) + '; Planet: ' + a + '; Film: ' + str(j)
        ch.append(b)
        z += 1

    for i in ch:
        print(i, '\n')
Example #24
0
    def check_updates(self):
        r = requests_get(
            'https://api.github.com/repos/tampe125/dump-scraper/releases/latest'
        )
        json_data = json.loads(r.content)

        if StrictVersion(json_data['tag_name']) > StrictVersion(self.version):
            logging.getLogger('dumpscraper').warn(
                "A new version is available, please download it from https://github.com/tampe125/dump-scraper/releases"
            )
Example #25
0
 def _get_http_get_request_from_url(self, url):
     try:
         r = requests_get(url, verify=self._ssl_verify)
     except RequestsConnectionError:
         raise HPNAConnectionError(
             "Failed to connect to HP Network Automation URL.")
     if r.status_code != 200:
         raise HPNAConnectionError(
             "Failed to connect to HP Network Automation URL.")
     return r.text
Example #26
0
 def __getToken(self):
     # 获取token认证
     url_token = 'https://openapi.baidu.com/oauth/2.0/token'
     api_key = 'R26ZZakxixaQbIDGrPkUwOTc'
     secret_key = '10d76d90116385e126d95e1c277c538c'
     get_token_url = url_token+'?'+'grant_type=client_credentials&client_id=' + \
         api_key+'&client_secret='+secret_key
     token = requests_get(get_token_url)
     r = jsonLoads(token.text)
     return r['access_token']
Example #27
0
    def get_records(self, from_date, max=0, file=None):
        """Get cantook records.

        from_date: record changed after this date to get
        max: maxium records to fetcher
        file: to save the fetched record
        """
        self._count = 0
        self._max = max
        url = self.get_request_url(start_date=from_date, page=1)
        request = requests_get(url)
        total_pages = int(request.headers.get('X-Total-Pages', 0))
        total_items = int(request.headers.get('X-Total-Items', 0))
        # per_pages = int(request.headers.get('X-Per-Page', 0))
        current_page = int(request.headers.get('X-Current-Page', 0))
        while (request.status_code == requests_codes.ok and
               current_page <= total_pages and
               (self._count < self._max or self._max == 0)):
            self.verbose_print(
                'API page: {page} url: {url}'.format(
                    page=current_page,
                    url=url
                )
            )
            self.process_records(request.json().get('resources', []))
            # get next page and update current_page
            url = self.get_request_url(
                start_date=from_date,
                page=current_page+1
            )
            request = requests_get(url)
            current_page = int(request.headers.get('X-Current-Page', 0))
        if self.file:
            file.write(']')
        if (
            (max != 0 and self._count != max) or
            (max == 0 and total_items != self._count)
           ):
            # we had an ERROR
            raise('ERROR not all records harvested')

        return total_items, self._count
def get_last_sent_id():
    row = requests_get('{}/vindb/vin_records/last/'.format(
        settings.vindb_host),
                       params={
                           'data_source': settings.vindb_data_source,
                       },
                       headers={
                           'Content-Type': 'application/json'
                       }).json()

    return row.get('uuid', 0)
Example #29
0
    def getEN_mp3(self):

        # sound = bs.find_all(class_='new-speak-step')
        # sound_url = sound[1].get('ms-on-mouseover').split("'")
        # mp3 = requests.get(sound_url[1])
        url = 'https://dict.youdao.com/dictvoice?audio={word}&type=2'.format(
            word=self.word)
        mp3 = requests_get(url)
        with open('en.mp3', 'wb') as f:
            f.write(mp3.content)
            f.close()
Example #30
0
    def listSubnavi(self, path, hasitems, items_to_add=None):
        if hasitems == 'false':
            url = urllib_urljoin(self.base_url, path)
            html = requests_get(url).text
            soup = BeautifulSoup(html, 'html.parser')

            for item in soup('a', 'sdc-site-directory__content'):
                if items_to_add and item.get('href') not in items_to_add:
                    continue

                label = item.span.string
                url = self.plugin.build_url({
                    'action':
                    'showVideos',
                    'path':
                    '{0}-videos'.format(item.get('href')),
                    'show_videos':
                    'false'
                })
                self.addDir(label, url)
        else:
            items = None
            for nav_item in self.nav_json:
                if nav_item.get('path') == path:
                    items = nav_item.get('children')

            if items:
                for item in items:
                    action = item.get('action') if item.get(
                        'action', None) else 'showVideos'
                    if action == 'listSubnavi':
                        url = self.plugin.build_url({
                            'action':
                            action,
                            'path':
                            item.get('path'),
                            'hasitems':
                            'true' if item.get('children', None) else 'false',
                            'items_to_add':
                            item.get('includes')
                        })
                    else:
                        url = self.plugin.build_url({
                            'action':
                            action,
                            'path':
                            item.get('path'),
                            'show_videos':
                            'true' if item.get('show_videos', None) is None
                            or item.get('show_videos') == 'true' else 'false'
                        })
                    self.addDir(item.get('label'), url)

        xbmcplugin.endOfDirectory(self.plugin.addon_handle, cacheToDisc=True)
Example #31
0
 def embed_line(self, i: int, line: List[str]) -> Optional[int]:
     try:
         r = requests_get(line[1], stream=True, timeout=self.timeout)
         image = Image.open(BytesIO(r.content))
         image = self.transforms(image).unsqueeze(0)  # Fake batch-size of 1
         image = image.to(self.device)
         self.model(image)
         del image
         return i
     except Exception:
         return None
 def __get_avg_rating_for_product_id(self, model_number):
     try:
         res = requests_get(url=self.AVG_RATING_FORMAT.format(reviews_addr=self.reviews_addr,
                                                              model_number=model_number))
         if res.status_code != 200:
             current_app.logger.warning("Could not fetch rating for product id: %s", model_number)
             return -1
         data = json.loads(res.content)
         return data["rating"]
     except Exception as e:
         current_app.logger.exception("Error fetching rating for product id: %s", model_number)
         return -1
 def __fetch_listings_for_product(self, product_id):
     try:
         res = requests_get(url=self.FETCH_LISTINGS_FORMAT.format(inventory_addr=self.inventory_addr,
                                                                  product_id=product_id))
         if res.status_code != 200:
             current_app.logger.warning("Could not fetch rating for product id: %d", product_id)
             return -1
         data = json.loads(res.content)
         return data
     except Exception as e:
         current_app.logger.exception("Error fetching listings for product id: %d", product_id)
         return []
Example #34
0
def fetch_remote_hash(branch):
    if not branch:
        logging.error('No branch specified. Exiting.')
        return

    resp = requests_get(
        'https://api.github.com/repos/screenly/screenly-ose/git/refs/heads/{}'.format(branch)
    )

    if not resp.ok:
        logging.error('Invalid response from github: {}'.format(resp.content))
        return False

    logging.debug('Got response from Github: {}'.format(resp.status_code))
    latest_sha = resp.json()['object']['sha']
    return latest_sha
Example #35
0
def _request(uri, path):
    try:
        res = requests_get(uri, headers=__header, timeout=60)
        out.debug("res.headers", obj=dict(res.headers))
        res.raise_for_status()
    except Exception as e:
        out.warn(f"HTTP error: {e}\n{color.red(format_exc())}")
    else:
        out.debug(f"HTTP {res.status_code} \"{res.reason}\" -- \"{uri}\"")

        if res.status_code == 204 or res.headers.get("Content-Length") == 0:
            out.error(f"HTTP {res.status_code} \"{res.reason}\""
                      f" -- \"{uri}\": no content")

        rw.writef(path, res.content)
        out.info(f"updated \"{path}\""
                 f", last modified on \"{res.headers.get('Last-Modified')}\"")
Example #36
0
    def update(self):
        """update(self) - Fill Queue with new Pastebin IDs"""
        new_pastes = []
        raw = None

        while not raw:
            try:
                raw = requests_get('http://pastebin.com/archive').content
                if "Pastebin.com has blocked your IP" in raw:
                    getLogger('dumpscraper').critical("Pastebin blocked your IP. Wait a couple of hours and try again")
                    raise RunningError()
            except ConnectionError:
                getLogger('dumpscraper').warn('Connection error, trying again in 5 seconds')
                raw = None
                sleep(5)

        results = BeautifulSoup(raw, "lxml").findAll(
            lambda tag: tag.name == 'td' and tag.a and '/archive/' not in tag.a['href'] and tag.a['href'][1:])

        for entry in results:
            paste = PastebinPaste(entry.a['href'][1:])
            # Check to see if we found our last checked URL
            if paste.id == self.ref_id:
                break
            new_pastes.append(paste)

        # Don't cry if we don't have any results
        try:
            # Let's save the starting id, so I can skip already processed pastes
            self.ref_id = results[0].a['href'][1:]
        except IndexError:
            dump_logger = getLogger('dumpscraper')
            dump_logger.info("\tArchive links not found")
            dump_logger.debug('\t\tFetched page:')

            for row in results:
                dump_logger.debug('\t\t\t' + row)

        for entry in new_pastes[::-1]:
            self.put(entry)
Example #37
0
def check_in(passcode):
    if passcode == "":
        redirect('/')
    user_ip = request.remote_addr #finds their ip
    print(user_ip)
    url = ("http://ip-api.com/json/{}?fields=country,city".format(user_ip))
    #get info about country and city from their ip, in JSON format
    #more info is available from that site if needed
    r = requests_get(url)
    try: #try block in case... idk. something?
        location_data = json.loads(r.text)
        visitor_city = location_data['city']
        visitor_country = location_data['country']
        print("Someone is visiting from " + visitor_city,visitor_country)
        print("They're using the passcode " + passcode)
        
        conn = sqlite3.connect('people.db') #connect to the database
        c = conn.cursor()
        #finds the location for whoever uses this passcode
        stored_city = c.execute("SELECT location_city FROM people WHERE passcode = ?;", (passcode,))
        print(stored_city.fetchall())
        stored_country = c.execute("SELECT location_country FROM people WHERE passcode = ?;", (passcode,))
        print(stored_country.fetchall())

        #if the stored location is different to that of the current location
        #then we update it!
        #we check city first, because they might be in the same country, but have just
        #changed cities
        if visitor_city != stored_city:
            c.execute("UPDATE people SET location_city=? WHERE passcode = ?;",(visitor_city,passcode))
            if (visitor_country != stored_city):
                      c.execute("UPDATE people SET location_country=? WHERE passcode = ?;",(visitor_country,passcode))
        conn.commit()
        c.close()
        #redirect("http://www.xkcd.com")
    except:
        pass
    return template('checkin')
    def generate_response(self, gfy_urls):
        carebox = []
        textbody = ''
        for url in gfy_urls:
            fix_url = self.FIX_URL.format(url)
            result = requests_get(self.API_URL.format(url))
            if result.ok:
                gfy_item = result.json()['gfyItem']
                size = '{:.1f}'.format(gfy_item['gifSize'] / gfy_item['webmSize'])
                title = [fix_url, gfy_item['title']][gfy_item['title'] is True]
                reddit = gfy_item['redditId']
                carebox.append({'fix_url': fix_url, 'size': size, 'title': title, 'reddit': reddit})

        for gfycat in carebox:
            textbody += self.responses.gfycat_binding.format(**gfycat)
            if gfycat['reddit']:
                origin = self.session.get_submission(submission_id=gfycat['reddit'])
                caredict = {'upvote': origin.upvote_ratio * 100, 'title': origin.title,
                            'url': 'https://np.reddit.com/{}/'.format(gfycat['reddit'])}
                textbody += self.responses.original_submission.format(**caredict)

        textbody = self.responses.intro + textbody + self.responses.outro
        return textbody.replace('\\n', '\n')
Example #39
0
def remote_branch_available(branch):
    if not branch:
        logging.error('No branch specified. Exiting.')
        return

    try:
        resp = requests_get(
            'https://api.github.com/repos/screenly/screenly-ose/branches',
            headers={
                'Accept': 'application/vnd.github.loki-preview+json',
            },
        )
    except exceptions.ConnectionError:
        logging.error('No internet connection.')
        return

    if not resp.ok:
        logging.error('Invalid response from Github: {}'.format(resp.content))
        return

    for github_branch in resp.json():
        if github_branch['name'] == branch:
            return True
    return False
Example #40
0
    def download_job_sid(self, sid, mode='json', filename=None):
        """
        Wrapper for streaming results to a file instead of through sockets with the API.
            :param sid: sid of job
            :param mode: json, csv, or xml
            :return: local filename, False if failure
        """

        # Only tested on 6.3, may need to mod this
        job_sid_url = 'https://{0}/en-US/api/search/jobs/{1}/results?isDownload=true&' \
                      'timeFormat=%25FT%25T.%25Q%25%3Az&maxLines=0&count=0&filename=&outputMode={2}' \
                      '&spl_ctrl-limit=unlimited&spl_ctrl-count=50000'.format(self.host, sid, mode)

        if not filename:
            filename= '{0}.{1}'.format(sid, mode)
        cookie_builder = {}
        for l in self._auth_headers:
            for x in l[1].split('; '):
                q = x.split('=')
                cookie_builder[q[0]] = q[1]
        r = requests_get(job_sid_url, stream=True, cookies=cookie_builder, verify=False)
        cnt = 0
        with open(filename, 'wb') as f:
            # I have the bandwidth to do this size, you may not.
            for chunk in r.iter_content(chunk_size=1024*1024*1024):
                if chunk:
                    f.write(chunk)
                cnt += 1
                if cnt % 1000 == 0:
                    # Call control occasionally to keep the export stream alive
                    requests_post(r'https://{0}/en-US/splunkd/__raw/services/search/jobs/{1}/control'
                                  .format(self.host, sid), data={
                                        'output_mode': mode,
                                        'action': 'touch'
                                    }, cookies=cookie_builder, verify=False)
        return filename
Example #41
0
 def DownloadFile(self, url_path):
     url = self._base_url + url_path
     log('Request: GET ' + url)
     r = requests_get(url)
     self.CheckHttpError(r)
     return r.content
Example #42
0
    def check_updates(self):
        r = requests_get('https://api.github.com/repos/tampe125/dump-scraper/releases/latest')
        json_data = json.loads(r.content)

        if StrictVersion(json_data['tag_name']) > StrictVersion(self.version):
            logging.getLogger('dumpscraper').warn("A new version is available, please download it from https://github.com/tampe125/dump-scraper/releases")
Example #43
0
 def _get_current(self, _url_info, self_major, self_version):
     response = requests_get(_url_info.format(self_major), params={'v':self_version})
     if response.status_code == OK:
         return bunchify(loads(response.text))