Example #1
0
def send_requests(reqs):
    """
    Send all the requests in :reqs: and reads the response data to extract the
    deputies data.  It will check if a deputy has more than one page of
    advisors and send new requests if True
    """
    buffer = list()

    print("Sending!")
    kwargs = dict(size=8, exception_handler=http_exception_handler)
    for response in grequests.imap(reqs, **kwargs):
        page_data = extract_data_from_page(response)

        yield page_data
        print('.', end="", flush=True)

        if page_data["has_next_page"]:
            current = page_data["current_page"]
            total = page_data["number_of_pages"]
            for page in range(current + 1, total + 1):
                buffer.append(get_page(page_data['data'], page))

    pending = len(buffer)
    print("\nFound {} more pages to fetch. Starting now…".format(pending))
    for req in grequests.imap(buffer, **kwargs):
        page_data = extract_data_from_page(req)
        yield page_data
        print('.', end="", flush=True)
def send_requests(reqs):
    """
    Send all the requests in :reqs: and reads the response data to extract the
    deputies data.  It will check if a deputy has more than one page of
    advisors and send new requests if True
    """
    buffer = list()

    print("Sending!")
    kwargs = dict(size=8, exception_handler=http_exception_handler)
    for response in grequests.imap(reqs, **kwargs):
        page_data = extract_data_from_page(response)

        yield page_data
        print('.', end="", flush=True)

        if page_data["has_next_page"]:
            current = page_data["current_page"]
            total = page_data["number_of_pages"]
            for page in range(current + 1, total + 1):
                buffer.append(get_page(page_data['data'], page))

    pending = len(buffer)
    print("\nFound {} more pages to fetch. Starting now…".format(pending))
    for req in grequests.imap(buffer, **kwargs):
        page_data = extract_data_from_page(req)
        yield page_data
        print('.', end="", flush=True)
Example #3
0
def send_requests(reqs):
    """
    Send all the requests in :reqs: and reads the response data to extract the deputies data. 
    It will check if a deputy has more than one page of advisors and send new requests if True
    """
    request_buffer = list()

    print("Sending!")
    for response in grequests.imap(reqs,
                                   size=8,
                                   exception_handler=http_exception_handler):
        page_data = extract_data_from_page(response)

        yield page_data
        print('.', end="", flush=True)

        if page_data["has_next_page"]:
            for rq in [
                    get_request_to_page_of_advisors_from_deputy(
                        page_data['data'], page_number)
                    for page_number in range(page_data["current_page"],
                                             page_data["number_of_pages"])
            ]:
                request_buffer.append(rq)

    print("\nFound {} more pages to fetch. Starting now...".format(
        len(request_buffer)))
    for req in grequests.imap(request_buffer,
                              size=8,
                              exception_handler=http_exception_handler):
        page_data = extract_data_from_page(req)

        yield page_data
        print(':', end="", flush=True)
Example #4
0
def get_route_info_from_infotraffic(known_lines_csv: str, known_stations_csv: str)-> Dict[int, Tuple[Route, Route]]:
	root = 'http://86.122.170.105:61978/html/timpi/'
	urls = [grequests.get(root + 'tram.php', stream=False),
	        grequests.get(root + 'trol.php', stream=False),
	        grequests.get(root + 'auto.php', stream=False)]

	known_lines = { line.line_id: line for line in importer.parse_lines_from_csv(known_lines_csv) }
	known_lines = known_lines  # type: Dict[int, Line]
	known_stations = { station.raw_name: station for station in importer.parse_stations_from_csv(known_stations_csv) }
	known_stations = known_stations  # type: Dict[str, Station]
	line_id_re = re.compile("param1=(\d+)")
	line_id_to_routes = {}  # type: Dict[int, Tuple[Route, Route]]
	for page in grequests.imap(urls, size=len(urls), exception_handler=exception_handler):
		page.raise_for_status()
		if page.status_code == requests.codes.ok:
			soup = bs4.BeautifulSoup(page.text, "html.parser")
			unknown_lines = { }  # type: Dict[int, str]
			line_requests = []
			for a in soup.select("div p a"):
				line_id = int(line_id_re.search(a['href']).group(1))
				line = known_lines.get(line_id, None)
				if not line:
					line_name = a['title'] if a.has_attr('title') else None
					if line_name is None:
						img = a.select("img")[0]
						line_name = img['alt'] if img and img.has_attr('alt') else 'unknown'
					unknown_lines[line_id] = line_name
					print("WARNING: unknown line '{line_name}' (line ID: {line_id}) encountered at {url}"
					      .format(line_name=line_name, line_id=line_id, url=page.url))
				line_requests.append(grequests.get(root + a['href'], stream=False))

			for line_response in grequests.imap(line_requests, size=6, exception_handler=exception_handler):
				line_id = int(line_id_re.search(line_response.url).group(1))
				routes = parse_arrivals_from_infotrafic(line_id, known_stations, line_response, include_unknown_stations=True)
				line = known_lines.get(line_id, None)
				line_name = line.line_name if line is not None else unknown_lines.get(line_id, "unknown")
				route1 = route2 = None
				for route_id, route in enumerate(routes):
					valid_stations = []
					for station, arrival in route:
						if not isinstance(station, Station):
							print("WARNING: unknown station '{raw_station_name}' encountered in route {route_id} of line {line_name} (line ID: {line_id})"
							      .format(line_name=line_name, line_id=line_id, route_id=route_id, raw_station_name=station))
						else:
							if not station.lng or not station.lat:
								print("WARNING: station '{station_name}' (station ID: {station_id}) has no GPS coordinates defined"
								      .format(station_name=station.friendly_name, station_id=station.station_id))
							valid_stations.append(station)

					if valid_stations and line is not None:
						if route_id == 0:
							route1 = Route(route_id, line.route_name_1, line.line_id, valid_stations)
						elif route_id == 1:
							route2 = Route(route_id, line.route_name_2, line.line_id, valid_stations)

				if route1 is not None and route2 is not None:
					line_id_to_routes[line.line_id] = (route1, route2)

	return line_id_to_routes
Example #5
0
def getDatainfo(start, stop):
    lireques_list = []  #li数组
    allData = []  #最终数据
    rs = (grequests.get('https://www.ciu1.com/shipin/list-偷拍自拍-' + format(u) +
                        '.html',
                        timeout=3,
                        proxies="proxies") for u in range(start, stop))
    res_list = grequests.imap(rs, size=200)
    for iteminfo in res_list:
        iteminfo.encoding = iteminfo.apparent_encoding
        soup = BeautifulSoup(iteminfo.text, 'lxml')
        data = soup.select('#tpl-img-content > li')
        for item in data:
            print('获取中')
            childrenText = grequests.get('https://www.ciu1.com' +
                                         item.a.get('href'))
            lireques_list.append(childrenText)
            print(childrenText)
    lireques_arr = grequests.imap(lireques_list)
    for item_li in lireques_arr:
        print(1)
        item_li.encoding = item_li.apparent_encoding
        item_info = BeautifulSoup(item_li.text, 'lxml')
        item_info_chil = item_info.select('#shipin-detail-content-pull')
        item_info = item_info.select('#lin1k0')
        result = {
            "title": item_info_chil[0].img.get('alt'),
            "url": item_info[0].get('data-clipboard-text'),
            "pic": item_info_chil[0].img.get('data-original')
        }
        allData.append(result)
    db = pymysql.connect(host='216.24.255.15',
                         port=3306,
                         user='******',
                         password='******',
                         database='m_vue_ac_cn',
                         charset='utf8')
    cursor = db.cursor()

    def insertInfo(a, b, c):
        sql = "INSERT INTO video_data(title, url, pic) VALUES ('" + a + "', '" + b + "', '" + c + "')"
        #print(sql)
        cursor.execute(sql)

    try:
        for item in allData:
            insertInfo(item.get('title'), item.get('url'), item.get('pic'))
    # 提交到数据库执行
        print(allData)
        print('成功!')
        db.commit()
    except:
        # 发生错误时回滚
        db.rollback()

        # 关闭数据库连接
        db.close()
Example #6
0
def picture_spider():
    srequest = Srequests()
    if srequest.check_cookies():
        pass
    else:
        print('update cookies !')
        loginurl = 'https://anime-pictures.net/login/submit'
        logindata = {'login': '******', 'password': '******', 'time_zone': 'Asia/Shanghai'}
        srequest.update_cookies(loginurl, logindata)

    # 搜索图片
    taglist = ['girl', 'long hair', 'breasts', 'blush', 'light erotic']
    search_tag = '||'.join(taglist)

    # update_date 0:任何时候 1:上周 2:过去一个月 3:过去的一天
    if get_pictures_count() < 200:
        update_date = 0
    else:
        update_date = 2

    # search_url = "https://anime-pictures.net/pictures/view_posts/0?search_tag=%s&aspect=16:9&order_by=date&ldate=%d" \
    #              "&ext_jpg=jpg&ext_png=png&lang=en" % (search_tag, update_date)

    search_url = "https://anime-pictures.net/pictures/view_posts/0?search_tag=%s&res_x=1024&res_y=768&res_x_n=1&res_y_n=1&aspect=16:9&order_by=date&ldate=%d&small_prev=1&ext_jpg=jpg&ext_png=png&lang=en" % (
        search_tag, update_date)

    resp = srequest.session.get(search_url, headers=Srequests.headers).text
    # print(Srequests.headers)
    details_urls = []
    details_urls.extend(get_details_urls(resp))

    page_count = get_page_count(resp)

    search_urls = [
        "https://anime-pictures.net/pictures/view_posts/%d?search_tag=%s&res_x=1024&res_y=768&res_x_n=1&res_y_n=1&aspect=16:9&order_by=date&ldate=%d&small_prev=1&ext_jpg=jpg&ext_png=png&lang=en" % (
            x, search_tag, update_date) for x in range(1, int(page_count) + 1)]

    reqs = (grequests.get(url, headers=Srequests.headers, session=srequest.session) for url in search_urls)
    for r_data in grequests.imap(reqs, size=Wallpaper.REQUEST_THREAD_NUMBER):
        if r_data.status_code == 200:
            print('搜索页成功:' + r_data.url)
            details_urls.extend(get_details_urls(r_data.text))
        else:
            print('搜索页失败:' + r_data.url)

    # 图片详情页
    reqs = (grequests.get(url, headers=Srequests.headers, session=srequest.session) for url in details_urls)
    for r_data in grequests.imap(reqs, size=Wallpaper.REQUEST_THREAD_NUMBER):
        if r_data.status_code == 200:
            print('详情页成功:' + r_data.url)
            save_picture_info(Picture(*get_picture_info(r_data.text)))
        else:
            print('详情页失败:' + r_data.url)

    srequest.close()
Example #7
0
def _get_person_id(query: str, max_people: int) -> List[Iterator[str]]:
    """ Returns list of people IDs """
    request_objs = []

    for i in range(int(max_people / 10) + 1):
        payload = {
            "search": "Search",
            "filter": query,
            "_kgoui_region": "kgoui_Rcontent_I0_Rcontent_I0_Ritems",
            "_object_include_html": 1,
            "_object_js_config": 1,
            "_kgoui_page_state": "439a1a9b6fb81b480ade61813e20e049",
            "_region_index_offset": i * 10,
            "feed": "directory",
            "start": i * 10
        }
        request_objs.append(grequests.get(PEOPLE_URL, params=payload))

    responses = grequests.imap(request_objs)

    url_list = []
    for response_obj in responses:
        response = response_obj.json()['response']['contents']
        local_url_list = (x['fields']['url']['formatted'] for x in response)
        local_url_list = (dict(param.split("=") for param in x.split("&"))
                          for x in local_url_list)
        local_url_list = (x['id'] for x in local_url_list if 'id' in x)
        url_list.append(local_url_list)
    return url_list
Example #8
0
    def check_proxies(proxy_list, threads=8):
        IFCONFIG_CANDIDATES = [
            "https://ifconfig.co/ip", "https://api.ipify.org/?format=text",
            "https://myexternalip.com/raw", "https://wtfismyip.com/text"
        ]
        # de-dupe
        proxy_list = list(set(proxy_list))

        # create a set of unsent requests
        rs = []
        for proxy in proxy_list:
            rs.append(
                grequests.get(random.choice(IFCONFIG_CANDIDATES),
                              proxies={
                                  "http": proxy,
                                  "https": proxy
                              },
                              timeout=1))

        print("[II] [proxy_checker] Checking health of proxies")

        working_proxies = []
        # send a few at a time in sets of size "threads"
        for response in grequests.imap(rs, size=threads):
            # raw_text = str( response.content, 'utf-8')
            if response.status_code == 200:
                this_proxy = next(iter(response.connection.proxy_manager))

                parsed = urlsplit(this_proxy).netloc
                working_proxies.append(parsed)
                yield parsed
Example #9
0
def dl(url_template, url_arguments, desc=''):
    " Download in parallel {url_template} for each {url_arguments}, with a progress bar describing {desc}"
    all_requests = (grequests.get(url_template % arg, headers=HEADERS)
                    for arg in url_arguments)
    yield from tqdm(grequests.imap(all_requests),
                    desc=desc,
                    total=len(url_arguments))
Example #10
0
def send_requests(url, data_file, expected_results):
    results = defaultdict(int)
    for res in grequests.imap(get_requests(url, data_file), size=20):
        results[res.status_code] += 1
        if res.status_code != 204:
            error(res)
    return [results == expected_results, results]
Example #11
0
def download_iamges(image_data, n_images, output_dir):
    """
    Download a specified number of images to out_dir.

    :param _elementtree._element_iterator image_data: information to
                                                      download images
    :param int n_images: number of images to download
    :param str output_dir: directory to store the images
    """

    urls = (make_thumb_url(image) for image in image_data)
    reqs = (grequests.get(url) for url in urls)
    responses = grequests.imap(reqs)

    responses = frogress.bar(responses, steps=n_images)
    print('\nDownloading {} images'.format(n_images))

    os.makedirs(output_dir, exist_ok=True)

    for r in responses:
        try:
            url = urllib.parse.urlparse(r.url)
            filename, _ = os.path.splitext(os.path.basename(url.path))
            output_file_path = os.path.join(output_dir, filename + '.jpg')
            with open(output_file_path, 'wb') as output_file:
                output_file.write(r.content)
        finally:
            r.close()
 def _get_description(self, rs):
     """
     Creates asynchronous requests using the grequests library,
     if request was successful - gets vacancy description from vacancy page,
     if not - appends url in list of urls, which will be used again
     :param rs: list of urls
     :return: list of urls with error in response
     """
     error_rs = []
     for r in grq.imap(rs, size=self.WORKERS_NUM,
                       exception_handler=self.exception_handler):
         if r.status_code == 200:
             try:
                 index = self._get_job_id(r.url)
                 self.vacancy_dict[index]["description"] = \
                     self._get_vacancy_description(pq(r.text))
                 if self.vacancy_dict[index]["description"] == "":
                     error_rs.append(r.url)
                     logging.info('Empty description in {}'.format(index))
             except Exception as e:
                 logging.info(
                     'Error in response {}, exception:{}'.
                         format(r.url, str(e)))
         else:
             error_rs.append(r.url)
     return error_rs
Example #13
0
    def test_evt_link_for_trx_id4(self):
        symbol = base.Symbol(
            sym_name=sym_name, sym_id=sym_id, precision=sym_prec)
        asset = base.new_asset(symbol)
        pay_link = evt_link.EvtLink()
        pay_link.set_max_pay(999999999)
        pay_link.set_header(evt_link.HeaderType.version1.value |
                            evt_link.HeaderType.everiPay.value)
        pay_link.set_symbol_id(sym_id)
        pay_link.set_link_id_rand()
        pay_link.sign(user.priv_key)

        req = {
            'link_id': 'd1680fea21a3c3d8ef555afd8fd8c903'
        }

        url = 'http://127.0.0.1:8888/v1/evt_link/get_trx_id_for_link_id'

        tasks = []
        for i in range(10240):
            pay_link.set_link_id_rand()
            req['link_id'] = pay_link.get_link_id().hex()
            tasks.append(grequests.post(url, data=json.dumps(req)))

        i = 0
        for resp in grequests.imap(tasks, size=900):
            self.assertEqual(resp.status_code, 500, msg=resp.content)
            i += 1
            if i % 100 == 0:
                print('Received {} responses'.format(i))
Example #14
0
 def _get_description(self, rs):
     """
     Creates asynchronous requests using the grequests library,
     if request was successful - gets vacancy description from vacancy page,
     if not - appends url in list of urls, which will be used again
     :param rs: list of urls
     :return: list of urls with error in response
     """
     error_rs = []
     for r in grq.imap(rs,
                       size=self.WORKERS_NUM,
                       exception_handler=self.exception_handler):
         if r.status_code == 200:
             try:
                 index = self._get_job_id(r.url)
                 self.vacancy_dict[index]["description"] = \
                     self._get_vacancy_description(pq(r.text))
                 if self.vacancy_dict[index]["description"] == "":
                     error_rs.append(r.url)
                     logging.info('Empty description in {}'.format(index))
             except Exception as e:
                 logging.info('Error in response {}, exception:{}'.format(
                     r.url, str(e)))
         else:
             error_rs.append(r.url)
     return error_rs
Example #15
0
    def __get_matches_from_live_ids(self, matches_ids):
        urls = (self.GAME_URL_TEMPLATE.format(match_id=I) for I in matches_ids)

        rs = (grequests.get(u, timeout=3, verify=True) for u in urls)

        matches_data = []
        for match_request in grequests.imap(rs, size=7):
            if not match_request or match_request is None:
                continue

            try:
                match_data = match_request.json()
            except ValueError:
                continue

            # Probably match_id changed or match has ended, so remove it from live_ids, # noqa
            if not match_data.get("Success", False):
                _, url_params = match_request.url.split("?")
                match_id = int((url_params.split("&")[0]).lstrip("?id="))
                self.live_matches_ids.discard(match_id)

                continue

            res_data = match_data["Value"]
            match_name = res_data["O1"] + " - " + res_data["O2"]

            if self.extra_info.get(match_name) is None:
                date = datetime.datetime.today()  # + datetime.timedelta(hours=0, minutes=35) # noqa
                date_str = date.strftime("%d-%m-%Y")
                self.extra_info[match_name] = date_str

            matches_data.append(res_data)

        return matches_data
Example #16
0
def getSucursales(stemUrl):
    sucursales = []
    mainUrl = stemUrl + 'sucursales'
    timeoutSecs = 20  # seconds to launch timeout exception
    concurrents = 20  # max concurrent requests

    print 'Recolectando información sobre comercios...'
    data = getJsonData(mainUrl)

    cantSucursales = data['total']
    maxLimit = data['maxLimitPermitido']
    cantPages = int(math.ceil(cantSucursales / maxLimit))

    urls = []
    print('Descargando comercios...')
    for x in xrange(1, cantPages + 1):
        urls.append(mainUrl + '?offset=' + str((x - 1) * maxLimit) +
                    '&limit=' + str(maxLimit))

    rs = (grequests.get(u,
                        stream=False,
                        timeout=timeoutSecs,
                        headers={'User-Agent': 'Mozilla/5.0'}) for u in urls)
    responses = grequests.imap(rs, size=concurrents)
    for response in responses:
        data = ujson.loads(response.content)
        sucursales = sucursales + data['sucursales']
        response.close()

    return sucursales
def fetch_all_projects():
    """
    Fetch the names of all projects using the v1 project API.
    Grossly inefficient - we need an API that can return a
    list of all project names quickly.
    """

    url_format = (
        'http://readthedocs.org/api/v1/project/?format=json'
        '&limit=100&offset={0}')
    project_names = []

    # Make initial request to see how many total requests we need to set up.
    resp = requests.get(url_format.format(0))
    project_results = resp.json()

    project_names.extend(parse_project_objects(project_results['objects']))

    total_count = project_results['meta']['total_count']
    # Determine the largest offset needed to fetch all projects
    max_offset = (total_count/100) * 100
    print max_offset

    urls = [url_format.format(offset)
            for offset in xrange(100, max_offset + 1, 100)]

    rs = (grequests.get(u) for u in urls)
    for resp in grequests.imap(rs, size=5):
        project_results = resp.json()
        project_names.extend(parse_project_objects(project_results['objects']))

    with open('project_list.json', 'w') as f:
        f.write(json.dumps({
            'project_names': project_names,
        }))
Example #18
0
def process_requests(ctx, rs, count, process_fun, ordered=False):
    errors = 0
    index = -1

    if ordered:
        request_iterator = grequests.map(rs, size=ctx['connections'])
    else:
        request_iterator = grequests.imap(rs, size=ctx['connections'])

    with click.progressbar(request_iterator, length=count) as bar:
        for r in bar:
            index = index + 1
            if r.status_code is requests.codes.ok:
                process_fun(index, r)
            elif r.status_code is 404:
                # indicates database was deleted before we queried it
                continue
            elif r.status_code is 500:
                errors = errors + 1
                click.echo('500 error processing {0}. Continuing...' + r.url,
                           err=True)
            else:
                click.echo(r.status_code)
                r.raise_for_status()

    if errors > 0:
        click.echo(
            'Failed to get data for {0} requests due to server errors'.format(
                errors))
Example #19
0
 def __init__(self, manifest_file, target_dir=os.getcwd()):
     self.target_dir = target_dir
     self.manifest_file = manifest_file
     self.cdn_url = "http://cdn.urbanterror.info/urt/{0}/{1}/q3ut4/{2}"
     self.mver = ""
     self.relnum = ""
     self.files = grequests.imap(self._parse_manifest())
Example #20
0
def _get_person_url(query: str, max_people: int) -> List[Iterator[str]]:
    """ """
    request_objs = []

    for i in range(int(max_people / 10) + 1):
        payload = {
            "search": "Search",
            "filter": query,
            "_region": "kgoui_Rcontent_I0_Rcontent_I0_Ritems",
            "_object_include_html": 1,
            "_object_js_config": 1,
            "_kgoui_page_state": "8c6ef035807a2a969576d6d78d211c78",
            "_region_index_offset": i * 10,
            "feed": "directory",
            "start": i * 10
        }
        request_objs.append(grequests.get(PEOPLE_URL, params=payload))

    responses = grequests.imap(request_objs)

    url_list = []
    for response_obj in responses:
        response = response_obj.json()['response']['contents']
        local_url_list = (x['fields']['url']['formatted'] for x in response)
        local_url_list = (x.replace('\\', '').split('&')[-1].replace('id=', '')
                          for x in local_url_list if '&start' not in x)
        local_url_list = (urllib.parse.unquote(x) for x in local_url_list)
        url_list.append(local_url_list)

    return url_list
Example #21
0
def sync_db():
    header = {
        "Authorization":
        "Basic: a2RsYW5ub3lAZ21haWwuY29tOmZlM2Y2ZDI5OGJlMWI2ODljNmUwZjlkNjFiYjNjY2YzYTNkYWIwMDdmYjYzZWU0MDcxMTFhMTgzMjNjYWQwNzAyNjM5OTY1OTZhOTAwZTM4MzgwNDhhMThjODdkZDUyOWZiZWM3YTA2YTEwZjA0ZDM0NjJjYmRmNjkwNGJlMjEz"
    }

    urls = [
        'http://tw06v033.ugent.be/Chronic/rest/DrugService/drugs',
        'http://tw06v033.ugent.be/Chronic/rest/SymptomService/symptoms',
        'http://tw06v033.ugent.be/Chronic/rest/TriggerService/triggers',
        'http://tw06v033.ugent.be/Chronic/rest/HeadacheService/headaches?patientID=6',
        'http://tw06v033.ugent.be/Chronic/rest/MedicineService/medicines?patientID=6',
    ]

    session = requests.Session()
    # session.mount('http://', HTTPAdapter(pool_connections=250, pool_maxsize=50))
    rs = (grequests.get(u, headers=header, session=session) for u in urls)

    # responses = requests.async.imap(rs, size=250)

    times = []
    for response in grequests.imap(rs, size=1):
        if response.status_code == 200:
            times.append(response.elapsed.total_seconds())
        else:
            times.append(1000)

        response.close()

    q.put(sum(times))
Example #22
0
def request(method, iterable, key=None, ignore_errors=True, **kwargs):
    """Convinient http request iterator.
    Returns a generator of :class:`requests.Response <requests.Response>`.
    See ``requests.request`` and ``grequests``.

    :param iterable: Iterable of URL or context object with ``key`` argument.
                     The item can access by ``response.context``.
    :param key: (optional) URL getter function like ``key`` argument of
                ``list.sort``.
    :param ignore_errors: (optional) If ``True``, ignore non 20x code and
                          transport errors.
    """
    # https://github.com/kennethreitz/requests
    # https://github.com/kennethreitz/grequests
    assert 'return_response' not in kwargs, 'not supported'
    kwargs.setdefault('prefetch', True)
    size = kwargs.pop('size', 2)
    hooks = kwargs.pop('hooks', {})

    def gen_hook_response(item):
        def result(response):
            response.context = item
            if 'response' in hooks:
                return hooks['response'](response)
        return result

    reqs = (grequests.request(
        method,
        key(item) if key else item,
        hooks=dict((i for i in hooks.items() if i[0] in requests.hooks.HOOKS),
                   response=gen_hook_response(item)),
        **kwargs) for item in iterable)

    for response in grequests.imap(reqs, kwargs['prefetch'], size):
        # can't get socket.timeout, requests.packages.urllib3.exceptions.TimeoutError here

        # response.status_code == None if not connectable for some reasons
        if ignore_errors \
           and (not response.status_code \
                or math.floor(response.status_code / 100) != 2):
            logger.error('%s %s', response.url, response.status_code)
            response = requests.hooks.dispatch_hook('error', hooks, response)
            continue

        # read and decode response body
        if kwargs['prefetch']:
            try:
                response.content
            except http.client.HTTPException as e: # e.g. IncompleteRead
                logger.exception('%s', response.url)
                response.error = e
                if ignore_errors:
                    response = requests.hooks.dispatch_hook(
                        'error', hooks, response)
                    continue
            except Exception as e:
                logger.exception('%s', response.url)
                continue

        yield response
def sync_db():
    header = {
        "Authorization": "Basic: a2RsYW5ub3lAZ21haWwuY29tOmZlM2Y2ZDI5OGJlMWI2ODljNmUwZjlkNjFiYjNjY2YzYTNkYWIwMDdmYjYzZWU0MDcxMTFhMTgzMjNjYWQwNzAyNjM5OTY1OTZhOTAwZTM4MzgwNDhhMThjODdkZDUyOWZiZWM3YTA2YTEwZjA0ZDM0NjJjYmRmNjkwNGJlMjEz"}

    urls = [
        'http://tw06v033.ugent.be/Chronic/rest/DrugService/drugs',
        'http://tw06v033.ugent.be/Chronic/rest/SymptomService/symptoms',
        'http://tw06v033.ugent.be/Chronic/rest/TriggerService/triggers',
        'http://tw06v033.ugent.be/Chronic/rest/HeadacheService/headaches?patientID=6',
        'http://tw06v033.ugent.be/Chronic/rest/MedicineService/medicines?patientID=6',
    ]

    session = requests.Session()
    # session.mount('http://', HTTPAdapter(pool_connections=250, pool_maxsize=50))
    rs = (grequests.get(u, headers=header, session=session) for u in urls)

    # responses = requests.async.imap(rs, size=250)

    times = []
    for response in grequests.imap(rs, size=1):
        if response.status_code == 200:
            times.append(response.elapsed.total_seconds())
        else:
            times.append(1000)

        response.close()

    q.put(sum(times))
Example #24
0
def send_heartbeat(node_state: NodeState,
                   election_timeout,
                   time_unit=TimeUnit.SECOND):
    client = Client()
    timeout = int(election_timeout)
    if time_unit == TimeUnit.SECOND:
        timeout = timeout * 1000
    state = {
        "id": node_state.id,
        "term": node_state.current_term,
        "state": type(node_state).__name__.lower(),
        "timeout": timeout
    }
    try:
        with client as session:
            logging.info(f'send heartbeat to monitor: {state}')
            posts = [
                grequests.post(MONITOR_URL_HEARTBEAT,
                               json=state,
                               session=session)
            ]
            for response in grequests.imap(posts):
                result = response.json()
                logging.info(f'get response from monitor: {result}')
    except:
        logging.info(f'cannot connect to monitor: {MONITOR_URL_STATE_UPDATE}')
Example #25
0
def start_get_ss():
    urls_dict = {
        # 'https://doub.io/sszhfx/':  get_ss_doubi,
        # 'https://xsjs.yhyhd.org/free-ss/': get_ss_yhyhd,
        # 'https://www.vbox.co/': get_ss_vbox,
        # 'http://ishadow.info/': get_ss_ishadow,
        # 'http://ss.vpsml.site/': get_ss_vpsml
        # 'http://get.shadowsocks8.cc/': get_ss_shadowsocks8,
        # 'http://www.shadowsocks.asia/mianfei/10.html': get_ss_sspw,
        # 'http://ss.ishadow.world/': get_ss_sishadow,
        r'https://github.com/Alvin9999/new-pac/wiki/ss%E5%85%8D%E8%B4%B9%E8%B4%A6%E5%8F%B7':
        get_ss_Alvin9999
    }
    headers = {
        'user-agent':
        'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'
    }
    pool = ThreadPoolExecutor(len(urls_dict.keys()) + 1)
    rs = (grequests.get(u, timeout=80, proxies=proxies, headers=headers)
          for u in urls_dict.keys())
    for r in grequests.imap(rs, size=3):
        try:
            print("{:-^72}".format(r.url))
            func = urls_dict.get(r.url, u"没有匹配项!!!")
            pool.submit(func(r))

        except Exception as e:
            print(u"错误提示:" + str(e))
            continue
    def scrape_ratings(self, episodes):

        # alternative: http://www.omdbapi.com/?i=tt3697842&Season=1

        def make_req(episode):
            return grequests.get(episode['url'], callback=set_meta({"episode": episode}))

        rows = []

        reqs = (make_req(episode) for episode in episodes)
        for response in grequests.imap(reqs, exception_handler=handler):
            doc = lxml.html.fromstring(response.content)
            rating, rating_count = "N/A", "N/A"
            if not doc.cssselect('div.notEnoughRatings'):
                rating = float(doc.cssselect("span[itemprop='ratingValue']")[0].text)
                rating_count = int(doc.cssselect("span[itemprop='ratingCount']")[0].text)
            row = {
                'season': int(response.meta['episode']['season']),
                'episode_number': int(response.meta['episode']['episode_number']),
                'rating': rating,
                'rating_count': rating_count
            }
            msg = " ".join([str(row['season']), str(row['episode_number']), str(row['rating']), str(row['rating_count'])])
            logging.debug(msg)
            rows.append(row)

        rows.sort(key=lambda x: (x['season'], x['episode_number']))

        return rows
Example #27
0
def trainingCost():
    start_time = time.time()

    priceOrderArray = []
    priceArray = []

    reqs = (grequests.get(link) for link in trainingLinks)
    resp = grequests.imap(reqs, grequests.Pool(20))

    for r in resp:
        soup = BeautifulSoup(r.text, 'lxml')
        ovr = textCleaner(
            soup.find("div", class_="list-info-player__ovr").span.text)
        ratingPrice = round(
            priceCleaner(
                soup.find("div", class_="player-listing__price-value").text))
        trainingCostValue = ratingPrice / qsCheck(float(ovr))
        trainingCostValue = round(trainingCostValue, 2)
        priceOrderArray.append(str(trainingCostValue))
        priceArray.append(
            ("[Rated: " + str(ovr) + "]" + "[Buying at: " + str(ratingPrice) +
             "]" + "[C/T: " + str(trainingCostValue) + "]"))

    CheapestPriceIndex = priceOrderArray.index(min(priceOrderArray))

    print("....Here you are: ")
    print("\n".join(priceArray))
    print("The cheapest option is this: \n")
    print(priceArray[(20 - (priceOrderArray.index(min(priceOrderArray))))])
    totalTime = time.time() - start_time
    print("--- %s seconds ---" % (round(totalTime, 2)))
Example #28
0
    def run(self):

        total = 0
        totals = {}
        bad_results = 0

        with open(self.hjson_path) as f:
            sites = json.load(f)

        rs = (grequests.head(s.get('url'),
                             hooks={'response': [self.hook_factory(s)]})
              for s in sites.get('base_urls'))
        for r in grequests.imap(rs, size=20):
            total += 1
            if totals.get(r.status_code):
                totals[r.status_code] += 1
            else:
                totals[r.status_code] = 1
            if r.status_code >= 400:
                bad_results += 1

        print('========================================================')
        print('Summary')
        print('========================================================')
        print('Total requests: %d' % total)
        print('Bad responses: %d' % bad_results)
        for sc in totals:
            print('Status Code %d: %d' % (sc, totals[sc]))

        self.dispatcher.command_complete.emit(0)
Example #29
0
    def __iter__(self):
        """ Yield all matching books for the supplied books & branch. """
        search = "Searching library catalog for books"
        if self.branch:
            search += f" at {self.branch}"
        logger.info(search)

        full_record_requests = []

        # First, yield all books with full metadata from the RSS channel
        for book in self.catalog_results():
            if book.call_number:
                yield book
            elif not book.full_record_link:
                logger.warning("No link given for %s, can't get call #",
                               book.title)
            else:  # Some metadata found, but we need to more for the call #
                logger.debug("No call # found for %s, fetching record.",
                             book.title)
                full_record_requests.append(self.async_record(book))

        # Then yield books that need additional lookups to fetch call numbers
        for response in grequests.imap(full_record_requests):
            book = response._book
            book.call_number = self.get_call_number(response)
            yield book
Example #30
0
def update_files(metadata, hash_fs):
    urls_to_get = []
    for ext in metadata:
        for ext_file in ext["current_version"]["files"]:
            if not ext_file["is_webextension"]:
                continue
            ext_file_hash_type, ext_file_hash = ext_file["hash"].split(":")
            assert ext_file_hash_type == "sha256"
            if hash_fs.get(ext_file_hash) is None:
                if ext_file["url"] in urls_to_get:
                    logger.warning("Duplicate URL in metadata: %s" %
                                   ext_file["url"])
                urls_to_get.append(ext_file["url"])
            else:
                logger.debug("`%s` is already cached locally" % ext_file_hash)

    logger.info("Fetching %d uncached web extensions from AMO" %
                len(urls_to_get))

    session = create_request_session()

    while True:
        fatal_errors = 0
        unsent_requests = [
            grequests.get(url, verify=True, session=session)
            for url in urls_to_get
        ]
        for response in grequests.imap(unsent_requests,
                                       size=MAX_CONCURRENT_REQUESTS):
            if response.status_code == 200:
                logger.debug("Downloaded %d bytes from `%s`" %
                             (len(response.content), response.url))
                try:
                    hash_fs.put(BytesIO(response.content), ".zip")
                except ValueError as err:
                    # probably the mysterious ValueError: embedded null byte
                    logger.error("Unable to store `%s` in local cache: %s" %
                                 (response.url, str(err)))
                    continue
                try:
                    original_url = response.history[0].url
                except IndexError:
                    # There was no redirect
                    original_url = response.url
                urls_to_get.remove(original_url)
            else:
                logger.error("Unable to download `%s`, status code %d" %
                             (response.url, response.status_code))
                if 400 <= response.status_code < 500:
                    fatal_errors += 1
            if len(urls_to_get) % 100 == 0:
                logger.info("%d extensions to go" % len(urls_to_get))

        if len(urls_to_get) == fatal_errors:
            break

    if len(urls_to_get) > 0:
        logger.warning(
            "Unable to fetch %d extensions, likely deleted add-ons" %
            len(urls_to_get))
def quote_extract():
    prog_count = 0
    ticker = 0
    #initalise grequests to use
    reqs = (grequests.get(link) for link in links)
    resp = grequests.imap(reqs, grequests.Pool(1))

    def SQL_commit():
        nonlocal quotes
        cur.execute(
            '''INSERT or REPLACE INTO Quote_link (quote_link)
            VALUES ( ? )''', (quotes, ))
        cur.execute('SELECT id FROM Quote_link WHERE quote_link = ?',
                    (quotes, ))
        quote_link_id = cur.fetchone()[0]

        conn.commit()

    for i in resp:
        soups = BeautifulSoup(i.text, 'lxml')
        for j in soups.find_all('a',
                                class_='actionLink',
                                attrs={'href': re.compile("^/work/quotes")}):
            quotes = (j.get('href'))
            prog_count += 1
            progress = (str(round((prog_count / book_n) * 100, 1)))
            ticker += 1
            if ticker == 3:
                print("Currently at %", progress, "completion.")
                ticker = 0

            SQL_commit()
def quote_extract(links):
    prog_count = 0
    ticker = 0
    #initalise grequests to use
    reqs = (grequests.get(link) for link in links)
    resp = grequests.imap(reqs, grequests.Pool(1))
    #opens up grequests and finds tags within each consecutive webpage for the quote hyperlink
    # pulls the quote hyperlink to produce
    #for j in range(len)
    for i in resp:
        soups = BeautifulSoup(i.text, 'lxml')
        for j in soups.find_all('a',
                                class_='actionLink',
                                attrs={'href': re.compile("^/work/quotes")}):
            quotes = (j.get('href'))
            prog_count += 1
            progress = (str(round((prog_count / book_n) * 100, 1)))
            ticker += 1
            if ticker == 1:
                print("Currently at %", progress, "completion.")
                ticker = 0

    def commit():
        cur.execute(
            '''INSERT or REPLACE INTO Quote_link (quote_link)
            VALUES ( ? )''', (quotes, ))
        cur.execute('SELECT id FROM Quote_link WHERE quote_link = ?',
                    (quotes, ))
        quote_link_id = cur.fetchone()[0]

        conn.commit()

    commit()
Example #33
0
 def run(self):
     if self._concurrent:
         rs = (grequests.get(u) for u in self._urls)
         self._results = list(grequests.imap(rs))
     else:
         for u in tqdm(self._urls):
             self._results.append(self._get(u))
Example #34
0
 def test_imap_timeout(self):
     reqs = [
         grequests.get(httpbin('delay/1'), timeout=0.001),
         grequests.get(httpbin('/'))
     ]
     responses = list(grequests.imap(reqs))
     self.assertEqual(len(responses), 1)
Example #35
0
def send_requests(url, data_file, expected_results):
    results = defaultdict(int)
    for res in grequests.imap(get_requests(url, data_file), size=20):
        results[res.status_code] += 1
        if res.status_code != 204:
            error(res)
    return [results == expected_results, results]
Example #36
0
File: t4.py Project: alex-fu/pytest
def grequests_insert_test():
    rs = (grequests_insert(x) for x in range(1, N))
    rs2 = grequests.imap(rs, stream=True, size=1000)
    r = 0
    for _ in rs2:
        r += 1
    print(r)
Example #37
0
def getCantArticulos(stemUrl, comercios):
    timeoutSecs = 30
    mainUrl = stemUrl + 'productos' + '?id_sucursal='
    concurrents = 5
    urls = []
    reqCounter = 0
    result = []

    print "Obteniendo cantidad de artículos por comercio..."

    for comercio in comercios:
        urls.append(mainUrl + comercio['id'])

    rs = (grequests.get(u,
                        stream=False,
                        timeout=timeoutSecs,
                        headers={'User-Agent': 'Mozilla/5.0'}) for u in urls)

    responses = grequests.imap(rs, size=concurrents)

    for response in responses:
        data = ujson.loads(response.text)
        idComercio = response.url[response.url.rfind('=', 0, len(response.url)
                                                     ) + 1:]

        result.append({
            "id": idComercio,
            "total": data['total'],
            "maxLimitPermitido": data['maxLimitPermitido'],
        })

        response.close()  # Close open connections

    return result
Example #38
0
def fetch_names():
    """
    fetches names from the url_List
    """

    names_links = [grequests.get(link) for link in url_list()]
    resp = grequests.imap(names_links, exception_handler=exception_handler)

    names_lists = []

    for idx, r in enumerate(resp):
        soup = BeautifulSoup(r.text, "html.parser")
        post = soup.find("section", {"class": "entry"})

        try:
            names = [name.text for name in post.find_next("ol").find_all("li")]
        except AttributeError:
            print(
                f"there are no names that begin with {ascii_lowercase[idx].upper()}"
            )
            names = []

        names_lists.append(names)

    return names_lists
Example #39
0
    def grequests随机无序集(self):  # grequests.imap(任务列

        页数网址 = 'http://news.paidai.com/?page={}'  #{}
        网址总列表 = []

        任务列表 = []
        内容总列表 = []
        for 倒页数 in range(18, 0, -1):
            各帖子链接 = 页数网址.format(
                str(倒页数)
            )  # 不换行 end=""  request("GET"  pool=1, ,size=2 pool=1,timeout=len(任务列表)//2,
            网址总列表.append(各帖子链接)

        网址分列表 = []
        倒数 = len(网址总列表)
        for 各帖子链接 in 网址总列表:
            网址分列表.append(各帖子链接)
            倒数 = 倒数 - 1
            if len(网址分列表) == 20 or 倒数 == 0:
                print('等待响应网页倒数', 倒数)
                for 各帖子链接 in 网址分列表:
                    print('各帖子链接', 各帖子链接)
                    任务 = grequests.get(各帖子链接,
                                       headers=头部信息)  # timeout=len(任务列表)//2,
                    任务列表.append(任务)
                条件循环 = 1
                次数循环 = 0
                网址分列表 = []
                while 条件循环 == 1:
                    此时数 = int(time.time())
                    if 此时数 > 换IP时间计数 + 60:
                        self.模具一一换ip连接()
                        self.模具一一换头部信息()
                    try:  # 调用异常处理,应对易发生错误的位置
                        返回网页内容集 = grequests.imap(
                            任务列表, size=5)  # size=3 并发数 3  gtimeout超时时间
                    except (grequests.exceptions.ConnectTimeout,
                            grequests.exceptions.ReadTimeout,
                            grequests.exceptions.ConnectionError,
                            grequests.exceptions.ConnectTimeout,
                            grequests.exceptions.ChunkedEncodingError,
                            grequests.exceptions.InvalidSchema) as 异常:
                        print('网络异常等待', 异常)
                        print('倒数9秒再连接', 次数循环, '次')
                        # time.sleep(3)
                    else:
                        print('=========================')
                        返回网页内容列表 = []
                        for 返回网页内容 in 返回网页内容集:
                            返回网页内容文本 = str(返回网页内容)
                            if '200' in 返回网页内容文本 and 'None' not in 返回网页内容文本 and '40' not in 返回网页内容文本:
                                print('返回网页内容', 返回网页内容)
                                返回网页内容.encoding = "UTF-8"
                                返回网页内容列表.append(返回网页内容)
                            if len(任务列表) == len(返回网页内容列表):
                                内容总列表 = 内容总列表 + 返回网页内容列表
                                条件循环 = 520

        print('完成')
def download_reqs_to_files(reqs):
    for response in grequests.imap(reqs, exception_handler=handler):
        if response.status_code != 200:
            print("error downloading %s with code %s" % (response.url, response.status_code))
            continue
        filename = response.url.split("/")[-1]
        with open("data/" + filename, "wb") as f:
            f.write(response.content)
        print("downloaded %s" % filename)
def download_reqs_to_files(reqs):
    for response in grequests.imap(reqs, exception_handler=handler):
        if response.status_code != 200:
            print("error downloading %s with code %s" % (response.url, response.status_code))
            continue
        filepath = RAW_FILEPATH.format(kind=response.meta['kind'], episode=response.meta['i'])
        with open(filepath, "wb") as f:
            f.write(response.content)
        print("downloaded %s" % filepath)
Example #42
0
def make_requests(reqs):
    successful_responses = 0
    failed_responses = 0
    for resp in grequests.imap(reqs, stream=False, size=100):
        if resp.status_code == 200:
            successful_responses += 1
        else:
            failed_responses += 1
        resp.close()
    return successful_responses, failed_responses
Example #43
0
    def test_imap_timeout_exception(self):
        class ExceptionHandler:
            def __init__(self):
                self.counter = 0

            def callback(self, request, exception):
                 self.counter += 1
        eh = ExceptionHandler()
        reqs = [grequests.get(httpbin('delay/1'), timeout=0.001)]
        list(grequests.imap(reqs, exception_handler=eh.callback))
        self.assertEqual(eh.counter, 1)
Example #44
0
 def test_imap_timeout_exception_handler_returns_value(self):
     """
     ensure behaviour for a handler that returns a value
     """
     def exception_handler(request, exception):
         return request
     reqs = [grequests.get(httpbin('delay/1'), timeout=0.001)]
     out = []
     for r in grequests.imap(reqs, exception_handler=exception_handler):
         out.append(r)
     self.assertEquals(out, [])
Example #45
0
def grequests_retry():
    s = requests.Session()
    retries = Retry(total=5, backoff_factor=0.2, status_forcelist=[500, 502, 503, 504], raise_on_redirect=True,
                    raise_on_status=True)
    s.mount('http://', HTTPAdapter(max_retries=retries))
    s.mount('https://', HTTPAdapter(max_retries=retries))

    tasks = (grequests.get(url, session=s) for url in urls)
    resp = grequests.imap(tasks, size=10)
    for r in resp:
        print(r.status_code)
        print(r.text)
Example #46
0
 def test_imap_timeout_no_exception_handler(self):
     """
     compliance with existing 0.2.0 behaviour
     """
     reqs = [grequests.get(httpbin('delay/1'), timeout=0.001)]
     out = []
     try:
         for r in grequests.imap(reqs):
             out.append(r)
     except Timeout:
         pass
     self.assertEquals(out, [])
Example #47
0
def getIMDBPages(ListIDs):

    ''' Returns image counts by querying the relevant IMDB pages. '''

    requests = (grequests.get(imagePageTemplate.format(ID)) for ID in ListIDs)

    # send out requests 30 at a time,  get them back in generator expression.
    responses = grequests.imap(requests, size = 30)

    # get image counts and other information
    data = [getImageCounts(response) for response in responses]

    # serialise result as JSON, and return
    return json.dumps(data)
Example #48
0
def query_gerrit(template, change_ids, repo_name):
    """query gerrit."""
    queries = []
    template = "https://review.openstack.org" + template
    for change_id in change_ids:
        # ChangeIDs can be used in multiple branches/repos
        patch_id = urllib.quote_plus("%s~master~" % repo_name) + change_id
        queries.append(template % patch_id)
    unsent = (grequests.get(query) for query in queries)
    for r in grequests.imap(unsent, size=10):
        try:
            yield json.loads(r.text[4:])
        except AttributeError:
            # request must have failed, ignore it and move on
            logger.debug("failed to parse gerrit response")
            pass
Example #49
0
def startRetrieve(params, session=Session(), callback=None):
    page_1 = session.get(SEARCH_URL, params=params)
    first_json = page_1.json()
    total_items = first_json["totalItems"]
    pages = int(math.ceil(float(total_items)/ITEMS_PER_PAGE))
    if(pages > MAX_PAGES): pages = MAX_PAGES
    reqs = []
    resps = []
    resps.extend(first_json["items"])
    for page in range(2,pages+1):
        #params["page"] = str(page_num)
        reqs.append(grequests.request('GET', SEARCH_URL+"page={}".format(page), timeout=TIMEOUT, params=params, session=session))
    for resp in grequests.imap(reqs, False, REQ_THREADS, exception_handler=exception_handler):
        current_app.logger.debug("Requesting data from %s", resp.request.path_url)
        resps.extend(resp.json()["items"])
    return resps
Example #50
0
 def scrape_archives(self):
     pages = self.get_archive_urls()
     rs = (grequests.get(u, session=self.session) for u in pages)
     all_show_notes = {}
     for result in grequests.imap(rs):
         print("On URL ->", result.url)
         doc = html.fromstring(result.content)
         all_show_notes[result.url.split("/")[-1]] = {
             "title": doc.cssselect("header h2 a")[0].text_content(),
             "notes": doc.cssselect(".post-contents.cf p")[1].text_content().strip(),
             "date": "{} {}".format(
                 doc.cssselect(".date .day")[0].text_content(),
                 doc.cssselect(".date .month")[0].text_content()
             )
         }
     with open("show_notes.json", "w") as show_notes_file:
         show_notes_file.write(json.dumps(all_show_notes))
     print("Done and saved!")
def get_all(task,view,filter_dict,page_size=2500,request_limit=20):
    result = []
    c = count(task,view,filter_dict)
    total_pages = math.ceil(c/page_size)
    url = 'https://proteomics2.ucsd.edu/ProteoSAFe/QueryResult'
    params = [
        ('task', task),
        ('file', find_file_name(task,view)),
        ('query', encode_all_filters(filter_dict))
    ]
    rs = (
        grequests.get(url, params = OrderedDict(params + [('pageSize', page_size),('offset', page_offset * page_size)]))
        for page_offset in range(0,total_pages)
    )
    all_responses = []
    for l in grequests.imap(rs,size=request_limit):
        all_responses += l.json()['row_data']
    return all_responses
def sweap(mp,urldict,urllist):
    rs=(grequests.get(u) for u in urllist)
    for r in grequests.imap(rs):
        fd=get_info(r)
        if not fd:continue
        finfo=urldict.get(r.url,None)
        if not finfo:continue
        print finfo.msg
        logging.info(finfo.msg)
        fd['country']=finfo.country
        fd['mediatype']=finfo.mediatype
        fd['fieldtype']=finfo.fieldtype
        fd['fieldinfo']=finfo.fieldinfo
        fd['is_meta_stored']=False
        sv=SaveFeed(mp,fd)
    msg='end sweap dictsize=%d listsize=%d' % (len(urldict),len(urllist))
    print msg
    logging.info(msg)
Example #53
0
def do_requests(data, results, limit=None):
    if limit:
        total = limit
    else:
        total = len(data)
    i = 0
    for resp in grequests.imap(RequestsGenerator(data), stream=True):
        i += 1
        sys.stdout.write("\rCompleted %4d/%-4d [%-20s] %0d%%  " % (
            i, total,
            "=" * (int((i / total) * 20)), 
            i * 100 / total))
        sys.stdout.flush()
        add_result(results, data, resp)
        if limit and limit == i:
            return
    sys.stdout.write("\n")
    sys.stdout.flush()
Example #54
0
def get_threds():
	#get the threds of a particular board
	print('------4chan Word Frequency Experiment------\nNOTE: These posts are from an online forum, and as such\nare NOT censored. Use at your own risk!\n---What This Is---\nThis script counts the number of occurances of any particular\nword in a board on 4chan, and returns a descending list\nof those word frequencies. It currently ignores some\n(but not all!) common words.')
	which_thred = input("Please input the thread symbol (e.g., sci, g, or vg): ")
	thred_nums = json.loads(requests.get('https://a.4cdn.org/'+which_thred+'/threads.json').text)
	num_th = 0
	all_threads = []
	for q in thred_nums:
		num_th +=1
		for r in q['threads']:
			all_threads.append(r['no'])
	thred_base = 'https://a.4cdn.org/'+which_thred+'/thread/'
	print(str(all_threads))
	# this has somthing to do with a concept called 'deferred' ('promises' in JS).
	# Put simply, it has to wait for ALL the approx. 150 or so responses to
	# return before it can continue. We basically create an array of http reqs
	# with the line below, and then say "wait till they all get back" with 
	# grequests.map(reqs)
	reqs = (grequests.get(thred_base+str(url)+'.json',timeout=10) for url in all_threads)
	rez = grequests.imap(reqs,exception_handler=exception_handler)
	txt = ''
	thred_count = 0
	print('Beginning thread concatenization')
	for r in rez:
		thred_count += 1
		try:
			coms = json.loads(r.text)['posts']
			for n in coms:
				try:
					txt+=n['com']
				except:
					txt+=''
		except: 
			txt+=''
		print('Done thread #'+str(thred_count))
	# got all txt. Now clean it!
	clean_txt = clean(txt) #clean the text to remove unprintable chars
	no_html_txt  = strip_tags(clean_txt) #remove HTML tags, since those are not part of the posted data
	no_link_txt = reg.sub(r'^https?:\/\/.*[\r\n]*', '', no_html_txt)#remove links (mostly)
	no_quote_txt = reg.sub('&gt;&gt;\d{4,}|&gt;+|>>\d{4,}',' ',no_link_txt) #remove 4chan 'quotes', such as >>blahblah
	unwanted_symbs = [">","&gt;","[^a-zA-Z0-9']"]
	for q in range(0,len(unwanted_symbs)):
		no_quote_txt = reg.sub(unwanted_symbs[q],' ',no_quote_txt) 
	count_words(no_quote_txt.lower())
Example #55
0
def btc_e(assets):
    r = requests.get('https://btc-e.com/api/3/info').json()
    urls=[]
    pairs = []
    for k, v in r['pairs'].items():
        k1, k2 = k.upper().split("_")
        if k1 in assets and k2 in assets:
            pairs.append(k)
            urls.append('https://btc-e.com/api/3/ticker/' + k)
    def item(r):
        k,v = r.popitem()
        k1, k2 = k.upper().split("_")
        return {'from': k1,
                'to': k2,
                'bid': v['buy'],
                'ask': v['sell'],
                'last': v['last']}
    return [item(x.json()) for x in \
            grequests.imap([grequests.get(u) for u in urls])]
Example #56
0
def anx(assets):
    retval = []
    urls = []
    pairs = []
    resp = requests.get('https://anxpro.com/api/3/currencyStatic').json()
    for k, v in resp['currencyStatic']['currencyPairs'].items():
        k1 = v['tradedCcy']
        k2 = v['settlementCcy']
        if k1 in assets and k2 in assets:
            pairs.append([k1, k2])
            urls.append('https://anxpro.com/api/2/%s/money/ticker' % k)
    def item(r):
        return {'from': r['vol']['currency'],
                'to': r['last']['currency'],
                'bid': float(r['buy']['value']),
                'ask': float(r['sell']['value']),
                'last': float(r['last']['value'])}
    return [item(i.json()['data']) \
            for i in grequests.imap([grequests.get(u) for u in urls])]
Example #57
0
def download_images(wnidfile, folder, n_images):
  def make_name(wnid, url):
    filename = url.encode("ascii", "ignore").replace("/","_")
    return os.path.join(folder, wnid, filename)

  URL = "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid={}"
  wnids = [l.strip().split()[0] for l in open(wnidfile)]
  random.shuffle(wnids)
  session = requests.Session()
  for wnid in wnids:
    try:
      os.makedirs(os.path.join(folder, wnid))
    except os.error: pass
    res = requests.get(URL.format(wnid))
    urls = [_.strip() for _ in res.text.split("\n")]
    urls = [u for u in urls if u]
    jobs = [grequests.get(url, session=session, timeout=5)
        for url in urls
        if not os.path.exists(make_name(wnid, url))
    ]
    n_already_have = (len(urls) - len(jobs))
    N = max(min(n_images, len(urls)) - n_already_have, 0)
    print("getting %s, (have %d, need %d) (%d/%d)" % (wnid, n_already_have, N, wnids.index(wnid)+1, len(wnids)))
    if N == 0: continue
    curr = 0
    pbar = tqdm(total=len(jobs))
    for res in grequests.imap(jobs, size=50):
      if curr >= N:
        print("got %d" % curr)
        break
      pbar.update()
      if "unavailable" in res.url:
        continue
      try:
        im = Image.open(StringIO(res.content))
        if im.width < 128 or im.height < 128: continue
        im.save(make_name(wnid, res.url))
        curr += 1
      except IOError: continue
      except Exception as e:
        # print("caught exception: %s" % e)
        continue
Example #58
0
def async_check_urls(url_list, request_size=128):
    d = {'err': []}
    greq = grequests.imap(
        (grequests.get(
            'http://' + url, timeout=(10, 10)) for url in url_list),
        size=request_size)
    while True:
        try:
            res = next(greq)
        except StopIteration:
            break
        except:
            d['err'].append(res.url)
        else:
            try:
                d[res.status_code].append(res.url)
            except KeyError:
                d[res.status_code] = [res.url]
            
    return d
Example #59
0
def run_command(url='', org='', account='', key='', command='', agent_list='', **kwargs):
    plugin_content = '#!/usr/bin/env bash \n' + command
    requests = (grequests.post(
        utils.build_api_url(url,
                            org,
                            account,
                            endpoint='rpc' + '/run'),
        data={
            'name': 'temp.sh',
            'agent': agent_id,
            'content': base64.b64encode(plugin_content),
            'encoding': 'base64',
            'params': '',
            'type': 'SCRIPT'
        },
        callback=set_meta(agent_id),
        headers={'Authorization': "Bearer " + key}) for agent_id in agent_list)
    data = []
    for resp in grequests.imap(requests, size=10):
        data.append([resp.meta, resp.json()])
    return data
Example #60
0
def main(*args):
	urls = [
		'http://localhost:8000/test?timeout=10&name=req1',
		'http://localhost:8001/test?timeout=9&name=req2',
		'http://localhost:8002/test?timeout=8&name=req3',
		'http://localhost:8003/test?timeout=7&name=req4',
		'http://localhost:8004/test?timeout=6&name=req5',
		'http://localhost:8004/test?timeout=5&name=req6',
		'http://localhost:8003/test?timeout=4&name=req7',
		'http://localhost:8002/test?timeout=3&name=req8',
		'http://localhost:8001/test?timeout=2&name=req9',
		'http://localhost:8000/test?timeout=1&name=req0',
	]
	
	print datetime.datetime.now()
	
	rs = (grequests.get(u) for u in urls)
	for res in grequests.imap(rs, size=10):
		print datetime.datetime.now()
		print res.text
		
	print datetime.datetime.now()