class QianzhanClient(object): def __init__(self, userId, password): self._userId = userId self._password = password self._http_client = HTTPClient() pass """+++++++++++++++++++login++++++++++++""" def _per_login(self): login_page_url = "http://qiye.qianzhan.com/usercenter/login?ReturnUrl=http%3A%2F%2Fqiye.qianzhan.com%2F" response = self._http_client.get(login_page_url) return self._get_varifyimage(True) def _get_varifyimage(self, is_first=False): if is_first: varifyimage_url = "http://qiye.qianzhan.com/usercenter/varifyimage" else: varifyimage_url = "http://qiye.qianzhan.com/usercenter/varifyimage?" + str(random.random()) response = self._http_client.get(varifyimage_url) # logging.debug("verifyimage: %s" % response.content) varifycode = read_body_to_string(response.content) logging.debug("varifycode: %s" % varifycode.replace(' ', '')) return varifycode.replace(' ', '') def _do_login(self, varifycode, max_times=10): form_data = { "userId": self._userId, "password": self._password, "VerifyCode": varifycode, "sevenDays": "false" } login_url = "http://qiye.qianzhan.com/usercenter/dologin" response = self._http_client.post(login_url, form_data) logging.debug("text: %s" % response.text) try: json_obj = json.loads(response.text) except Exception, e: json_obj = {"isSuccess": False, "sMsg": "is html return"} pass logging.debug("json_obj: %s" % json_obj) if not json_obj.get("isSuccess"): # print json_obj.get("sMsg") max_times -= 1 if max_times > 0: varifycode = self._get_varifyimage() return self._do_login(varifycode, max_times) else: return False # print json_obj.get("sMsg") logging.info("cookie: %s" % response.cookies.get_dict()) return True
def __init__(self, proxies): # self._username = username # self._password = password self._http_client = HTTPClient(proxies=proxies) self._user_agent = random.choice(USER_AGENTS) self.credit_ticket = None self.currentTimeMillis = None self._detail_url = None self._index_1_url = None self._index_2_url = None self._search_list_url = None pass
class Spider(Sailor): pass def ready(self): self.http = HTTPClient() self.http.set_proxy(settings.PROXY) self.logger = logging.getLogger("Spider") self._load_actions() def _load_actions(self, ): self.actions = settings.ACTIONS for k, v in self.actions.iteritems(): self.actions[k] = self._init_actions(*v) self.actions['GET'] = self.http_get self.actions['POST'] = self.http_post def _init_actions(self, cls, param={}): cls = import_class(cls) if cls is None: raise RuntimeError, "Could not loading crawler '%s'." % cls return cls(**param) def start(self, t): site = WebSite(t.header('Site'), "", "worker") next_task = new_task('worker') next_task.header('Site', t.header('Site')) for l in t.list_actions(): action, url, save_as, args = self._parse_action(l, site) try: handler = self.actions.get(action, None) self.logger.debug("[%s] %s --> %s, args:%s" % (action, url, save_as, str(args))) if handler is not None: handler(site, self.http, next_task, url, save_as, *args) else: self.logger.error("not found sipder action:'%s'", action) except Exception, e: self.logger.exception(trackable(u"Exception on task '%s'" % e)) self.http.close() next_task.status = 'waiting'
class NDLA: def __init__(self, url): self.url = url self.soup = None self.http_client = HTTPClient() def request(self): return self.http_client.get(self.url, {}) def parse_title(self): title = self.soup.find(attrs={'property': 'og:title'}).get('content', None) return title def parse_publication_date(self): label, date, rest = self.soup.find(id='edit-dates').text.split(' ', 2) publication_date = datetime.strptime(date, '%d.%m.%Y,') return publication_date def parse_authors(self): author_anchors = self.soup.find(attrs={'class': 'owner'}).findAll('a') authors = [Author(family=anchor.text, given=None) for anchor in author_anchors] return authors def parse(self, html): self.soup = BeautifulSoup(html, 'html.parser') website = Website() website.url = self.url website.id = 'web:%s' % self.url website.title = self.parse_title() website.authors = self.parse_authors() website.name = 'Nasjonal digital læringsarena' website.publication_date = self.parse_publication_date() return website
class SNL: def __init__(self, url): self.url = url self.http_client = HTTPClient() def get_api_url(self): '''Generates the API URL.''' return self.url \ if self.url.endswith('.json') else self.url + '.json' def request(self): '''Requests and returns response.''' api_url = self.get_api_url() return self.http_client.get(api_url, {}) def parse(self, json): '''Parses JSON from SNL API.''' fields = loads(json) website = Website() website.id = 'web:%s' % self.url website.url = self.url website.title = fields['title'] date, time = fields['created_at'].split('T') website.publication_date = datetime.strptime(date, '%Y-%m-%d') website.authors = [Author(family=author['full_name'], given=None) for author in fields['authors']] website.name = 'Store Norske Leksikon' return website
def __init__(self, query: str, max_item_count: int): self.http_client = HTTPClient() self.bearer_token = BEARER_TOKEN self.next_token = '' self.query = query self.max_item_count = max_item_count self.__lock = asyncio.Lock()
class SiteClient(object): def __init__(self, proxies): # self._username = username # self._password = password self._http_client = HTTPClient(proxies=proxies) pass def _verify_post(self, url, data=None, json=None, times=0): try: response = self._http_client.post(url, data, json) if response.status_code == 200: pass elif response.status_code == 302: location = response.headers['Location'] logging.debug("location: %s" % location) raise Error302() elif response.status_code == 403: raise Error403() elif response.status_code == 404: raise Error404() elif response.status_code == 502: raise Error502() elif response.status_code == 503: raise Error503() else: raise ErrorStatusCode(response.status_code) return response except HttpClientError, err: times += 1 if times < 3: return self._verify_post(url, data=data, json=json, times=times) else: raise err
def get_captcha(path, group='emop'): fd = open(path, 'rb') data = base64.b64encode(fd.read()) fd.close() client = HTTPClient() param = {'g': group, 'content': data} response = client.post_data("http://192.168.3.220:8000/route", param, {}) #response = client.post_data("http://127.0.0.1:8925/route", param, {}) print("the response : %s" % response) if response: ret = json.loads(response) else: ret = {} return ret
def get_captcha(path, group="emop"): fd = open(path, "r") data = base64.b64encode(fd.read()) fd.close() client = HTTPClient() param = {"g": group, "content": data} response = client.post_data("http://42.120.43.111:8925/route", param, {}) # response = client.post_data("http://127.0.0.1:8925/route", param, {}) if response: ret = json.loads(response) else: ret = {} return ret
def get_captcha(path, group='emop'): fd = open(path, 'r') data = base64.b64encode(fd.read()) fd.close() client = HTTPClient() param = {'g': group, 'content': data} response = client.post_data("http://42.120.43.111:8925/route", param, {}) #response = client.post_data("http://127.0.0.1:8925/route", param, {}) if response: ret = json.loads(response) else: ret = {} return ret
class SiteClient(object): def __init__(self): self._http_client = HTTPClient() pass def _verify_post(self, url, data=None, json=None, times=0, headers=default_headers, timeout=download_timeout): # headers.update({ # 'User-Agent': self._user_agent, # # "Proxy-Authorization": self.get_authHeader() # }) try: response = self._http_client.post(url=url, data=data, json=json, headers=headers, timeout=timeout) if response.status_code == 200: logging.debug(response.headers) pass elif response.status_code == 302: location = response.headers['Location'] logging.debug("location: %s" % location) raise Error302() elif response.status_code == 403: raise Error403() elif response.status_code == 404: raise Error404() elif response.status_code == 502: raise Error502() elif response.status_code == 503: raise Error503() else: raise ErrorStatusCode(response.status_code) return response except Error403, err: raise err except HttpClientError, err: times += 1 if times < 2: return self._verify_post(url, data=data, json=json, times=times, headers=headers, timeout=timeout) else: raise err
def demo(): # Start Example TCP socket client for message in samplelist: # see samplelist in /data_streams/samples.py SocketClient(message=message).start() # Start HTTP example client HTTPClient().start() # Start Example Random Number Stream DataStream(random_nr_config, random_nr).start()
class General: def __init__(self, url): self.url = url self.http_client = HTTPClient() def request(self): '''Requests and returns response.''' return self.http_client.get(self.url, {}) def extract_with_newspaper(self, html): '''Parses HTML using Newspaper.''' article = Article(self.url) article.set_html(html) filterwarnings('ignore', category=DeprecationWarning) with catch_warnings(): article.parse() return article.__dict__ def parse(self, html): '''Converts Newspaper fields into Website.''' fields = self.extract_with_newspaper(html) website = Website() website.id = 'web:%s' % fields['url'] website.publication_date = fields['publish_date'] website.url = fields['url'] website.name = self.extract_site_name(fields['meta_data']) website.title = fields['title'] \ if fields['title'] else None website.authors = [Author(family=name, given=None) \ for name in fields['authors']] return website @staticmethod def extract_site_name(meta_data): try: return meta_data['og']['site_name'] except (TypeError, KeyError): return None
class SiteClient(object): def __init__(self, proxies={}): # self._username = username # self._password = password self._http_client = HTTPClient(proxies=proxies) self._user_agent = random.choice(USER_AGENTS) self.credit_ticket = None self.currentTimeMillis = None self._detail_url = None self._index_1_url = None self._index_2_url = None self._search_list_url = None self._qynb_detail_url = None pass # -----------------------------------------------get post------------------------------------------------------- # def _verify_post(self, url, data=None, json=None, times=0, headers=default_headers, timeout=download_timeout): headers.update({ 'User-Agent': self._user_agent, # "Proxy-Authorization": self.get_authHeader() }) try: response = self._http_client.post(url=url, data=data, json=json, headers=headers, timeout=timeout) if response.status_code == 200: logging.debug(response.headers) pass elif response.status_code == 302: location = response.headers['Location'] logging.debug("location: %s" % location) raise NeedrefreshProxyError() elif response.status_code == 403: raise Error403() elif response.status_code == 404: raise NeedrefreshProxyError() elif response.status_code == 502: raise NeedrefreshProxyError() elif response.status_code == 503: raise NeedrefreshProxyError() else: raise ErrorStatusCode(response.status_code) if response.content.find('繁访问错误页面') > 0: logging.info( "---------------||||||||||||||||||||||||-------------") raise NeedrefreshProxyError() return response except Error403, err: raise err except HttpClientError, err: times += 1 if times < 2: return self._verify_post(url, data=data, json=json, times=times, headers=headers, timeout=timeout) else: raise err
#!/bin/python # -*- coding: utf-8 -*- import logging, os, sys def init_path(): cur_dir = os.path.dirname(os.path.abspath(__file__)) lib_path = os.path.join(cur_dir, 'libs') sys.path.insert(0, lib_path) if __name__ == "__main__": init_path() FORMAT = "%(asctime)s %(name)s T[%(thread)d]P[%(process)d] %(levelname)8s %(message)s" logging.basicConfig(level=logging.DEBUG, format=FORMAT, stream=sys.stdout) from http_client import HTTPClient http = HTTPClient() if os.environ.get("http_proxy", ""): http.set_proxy({'http': os.environ.get("http_proxy", "")}) http.get_real_url("http://t.cn/aCAHu3")
def __init__(self, reader, writer, token): super().__init__(Platform.ItchIo, __version__, reader, writer, token) self.http_client = HTTPClient(self.store_credentials) self.session_cookie = None
class SimpleCrawler(object): def __init__(self): self.http = HTTPClient() self.debug = True def start(self, lpid): url = "http://www.hzfc365.com/house_search/search_prj.jsp?lpid=%s" % lpid reps_text = self.http.download(url) if self.debug: self._save_temp(reps_text, lpid) build_list = self.parse_build_info(reps_text) for build in build_list: logging.info("start fetch:%s, Kai Pan Shi jian:%s" % (1, 2)) build.lpid = lpid for zh_nm in build.zh_nm_list: self.fetch_zh_nm_pid_data(build, *zh_nm) def fetch_zh_nm_pid_data(self, build, zh_nm, pid, name): url = "http://www.hzfc365.com/house_view/lpxx-xs-2.jsp"\ "?zh_nm=%s&pid=%s" % (zh_nm, pid) referer_url = "http://www.hzfc365.com/house_search/search_prj.jsp?lpid=%s" % build.lpid reps_text = self.http.download(url, {"Referer": referer_url}) if self.debug: self._save_temp(reps_text, zh_nm) self._parse_room_info(reps_text, referer_url=url) def _parse_room_info(self, reps_text=None, cache_name = None, referer_url=None): if cache_name: reps_text = self._read_temp(cache_name) #http://www.hzfc365.com/house_view/lpxx-xs-2.jsp?zh_nm=120620&pid=87401 """http://www.hzfc365.com/house_view/lpxx-xs-2-yt.jsp?zh_nm=120618&q_area=&keytime=1288790113772&sessionid=2ECA879E33D7BECC3941443553DAD4FC""" """http://www.hzfc365.com/house_view/lpxx-xs-2-yt.jsp? zh_nm=120618& q_area=& keytime=1288790113772& //12887910169 sessionid=2ECA879E33D7BECC3941443553DAD4FC""" r_zh_nm = re.search('<input id="info_zh_nm" type="hidden" value="(\d+)">', reps_text).group(1) sessionid = re.search('<input id="sessionid" type="hidden" value="(\w+)">', reps_text).group(1) import time cur_time = time.time() logging.info("r_zh_nm=%s, sessionid=%s, time=%s" % (r_zh_nm, sessionid, cur_time)) url = "http://www.hzfc365.com/house_view/lpxx-xs-2-yt.jsp?zh_nm=%s&q_area=&keytime=%s&sessionid=%s" % (r_zh_nm, cur_time * 100, sessionid) reps_text = self.http.download(url, {"Referer": referer_url}) if self.debug: self._save_temp(reps_text, "d%s" % r_zh_nm) return self._parse_room_detail_info(reps_text) def _parse_room_detail_info(self, reps_text=None, cache_name=None): if cache_name: reps_text = self._read_temp(cache_name) regex = "title='([^']+)'" factor = re.compile(regex, re.I) data = [] for item in factor.finditer(reps_text): logging.info("details:%s" % str(item.groups())) data.append(RoomInfo(*item.groups())) return data #<input id="info_zh_nm" type="hidden" value="120620"> #<input id="sessionid" type="hidden" value="27D3C558B4A632ABFE27FC1B48ADAB44"> def parse_build_info(self, reps_text=None, cache_name = None): if cache_name: reps_text = self._read_temp(cache_name) td = r"\s+<td[^>]+>(.*?)</td>" regex = r"<TR onmouseover=[^>]+><A [^>]+>%s</A>" % (td * 7) regex += "\s+<td[^>]+>(.*?)</td>" regex += "\s+</tr>" factor = re.compile(regex, re.I) #items = ( e.group(1) for e in factor.finditer(expr) ) data = [] for item in factor.finditer(reps_text): logging.info("data:%s" % str(item.groups())) data.append(BuidingInfo(*item.groups())) return data def _save_temp(self, data, name): fd = open("temp_cache_%s.txt" % name, "w") fd.write(data) fd.close() def _read_temp(self, name): data = "" fd = open("temp_cache_%s.txt" % name, "r") data = fd.read() fd.close() return data
def __init__(self, url): self.url = url self.http_client = HTTPClient()
def __init__(self, userId, password): self._userId = userId self._password = password self._http_client = HTTPClient() pass
def ready(self): self.http = HTTPClient() self.http.set_proxy(settings.PROXY) self.logger = logging.getLogger("Spider") self._load_actions()
def make_request(client: HTTPClient, path, headers): client.get(path, headers)
def __init__(self, username, password): self._username = username self._password = password self._http_client = HTTPClient() pass
class SiteClient(object): def __init__(self, username, password): self._username = username self._password = password self._http_client = HTTPClient() pass """+++++++++++++++++++login++++++++++++""" def _per_login(self): login_page_url = "http://center.qianlima.com/login.jsp" response = self._http_client.get(login_page_url) return response def _do_login(self): form_data = { "username": self._username, "password": self._password, "rem_login": "******" } login_url = "http://center.qianlima.com/login_post.jsp?re_url=null" response = self._http_client.post(login_url, form_data) # logging.debug("text: %s" % response.text) # try: # json_obj = json.loads(response.text) # except Exception, e: # json_obj = {"isSuccess": False, "sMsg": "is html return"} # pass # # logging.debug("json_obj: %s" % json_obj) # # if not json_obj.get("isSuccess"): # return False logging.info("cookie: %s" % response.cookies.get_dict()) return True def login(self): # print "++++++++++++++login+++++++++++++++++" self._per_login() is_success = self._do_login() return is_success def _verify_post(self, url, data=None, json=None, **kwargs): kwargs.setdefault("allow_redirects", False) response = self._http_client.post(url, data, json, **kwargs) if response.status_code == 200: pass elif response.status_code == 302: location = response.headers['Location'] logging.debug("location: %s" % location) raise Error302() elif response.status_code == 403: raise Error403() elif response.status_code == 404: raise Error404() else: raise ErrorStatusCode() return response def _verify_get(self, url, **kwargs): kwargs.setdefault("allow_redirects", False) response = self._http_client.get(url, **kwargs) if response.status_code == 200: pass elif response.status_code == 302: location = response.headers['Location'] logging.debug("location: %s" % location) raise Error302() elif response.status_code == 403: raise Error403() elif response.status_code == 404: raise Error404() else: raise ErrorStatusCode() return response def get_company(self, url): response = self._verify_get(url) return response def get_search(self, url): response = self._verify_get(url) return response
def __init__(self, proxies): # self._username = username # self._password = password self._http_client = HTTPClient(proxies=proxies) pass
def __init__(self): self._http_client = HTTPClient() pass
def __init__(self): # self._username = username # self._password = password self._http_client = HTTPClient() pass
class BaseSiteClient(object): def __init__(self, config, proxies={}): self._config = config self._http_client = HTTPClient(proxies=proxies) self._user_agent = random.choice(self._config.user_agents) pass def _check_response(self, response): if response.status_code == 200: logging.debug(response.headers) pass elif response.status_code == 302: location = response.headers['Location'] logging.debug("location: %s" % location) raise Error302() elif response.status_code == 403: raise Error403() elif response.status_code == 404: raise Error404() elif response.status_code == 502: raise Error502() elif response.status_code == 503: raise Error503() else: raise ErrorStatusCode(response.status_code) return response def _verify_post(self, url, data=None, json=None, times=0, headers=None, timeout=None): if not headers: headers = self._config.default_headers if not timeout: timeout = self._config.default_timeout headers.update({ 'User-Agent': self._user_agent, # "Proxy-Authorization": self.get_authHeader() }) try: response = self._http_client.post(url=url, data=data, json=json, headers=headers, timeout=timeout) return self._check_response(response) except Error403, err: raise err except HttpClientError, err: times += 1 if times < 2: return self._verify_post(url, data=data, json=json, times=times, headers=headers, timeout=timeout) else: raise err
def __init__(self, config, proxies={}): self._config = config self._http_client = HTTPClient(proxies=proxies) self._user_agent = random.choice(self._config.user_agents) pass
def __init__(self): self.http = HTTPClient() self.debug = True
class ItchIntegration(Plugin): def __init__(self, reader, writer, token): super().__init__(Platform.ItchIo, __version__, reader, writer, token) self.http_client = HTTPClient(self.store_credentials) self.session_cookie = None async def shutdown(self): await self.http_client.close() # implement methods async def authenticate(self, stored_credentials=None): logging.debug("authenticate") confirmation_uri = 'https://itch.io/user/oauth?client_id=3821cecdd58ae1a920be15f6aa479f7e&scope=profile&response_type=token&redirect_uri=http%3A%2F%2F127.0.0.1%3A7157%2Fgogg2itchintegration' if not stored_credentials: return NextStep("web_session", { "window_title": "Log in to Itch.io", "window_width": 536, "window_height": 675, "start_uri": confirmation_uri, "end_uri_regex": r"^(http://127\.0\.0\.1:7157/gogg2itchintegration#access_token=.+)", }, js={ r'^https://itch\.io/my-feed.*': [f'window.location = "{confirmation_uri}"'] }) else: self.http_client.update_cookies(stored_credentials) try: user = await self.get_user_data() return Authentication(str(user.get("id")), str(user.get("username"))) except AccessDenied: raise InvalidCredentials() async def pass_login_credentials(self, step, credentials, cookies): session_cookies = { cookie['name']: cookie['value'] for cookie in cookies if cookie['name'] } self.http_client.update_cookies(session_cookies) user = await self.get_user_data() logging.debug(type(id)) logging.debug(user.get("id")) logging.debug(user.get("username")) return Authentication(str(user.get("id")), str(user.get("username"))) async def get_owned_games(self): page = 1 games = [] while True: try: resp = await self.http_client.get( f"https://api.itch.io/profile/owned-keys?classification=game&page={page}" ) except AuthenticationRequired: self.lost_authentication() raise if len(resp.get("owned_keys")) == 0: return games await self.parse_json_into_games(resp.get("owned_keys"), games) page += 1 return games async def get_user_data(self): resp = await self.http_client.get(f"https://api.itch.io/profile?") self.authenticated = True return resp.get("user") async def parse_json_into_games(self, resp, games): for key in resp: game = key.get("game") if not game.get("classification") == "game": continue game_name = game.get("title") game_num = str(game.get("id")) logging.debug('Parsed %s, %s', game_name, game_num) self.persistent_cache[game_num] = game this_game = Game(game_id=game_num, game_title=game_name, license_info=LicenseInfo( LicenseType.SinglePurchase), dlcs=[]) games.append(this_game) if CHECK_GAMES_AGAINST_IGDB: itch_id = game.get("id") game_gog_url = GOG_GAME_URL.format(itch_id) start_time = time.time() try: json_response = await self.http_client.get(game_gog_url) if "error" in json_response: log_unknown_game( "No IGDB data found for {} (itch.io game ID is {})" .format(game_name, itch_id)) except ClientResponseError as e: log_unknown_game("No IGDB data found for {}: {}".format( game_name, e)) stop_time = time.time() elapsed_seconds = stop_time - start_time if elapsed_seconds <= GOG_API_RATE_LIMIT_SECONDS: diff_seconds = GOG_API_RATE_LIMIT_SECONDS - elapsed_seconds await asyncio.sleep(diff_seconds) async def get_os_compatibility(self, game_id, context): try: compat = self.persistent_cache[str(game_id)].get("traits") os = (OSCompatibility.Windows if "p_windows" in compat else OSCompatibility(0)) | ( OSCompatibility.MacOS if "p_osx" in compat else OSCompatibility(0)) | (OSCompatibility.Linux if "p_linux" in compat else OSCompatibility(0)) logging.debug("Compat value: %s", os) if not os == 0: return os except KeyError: logging.error("Key not found in cache: %s", game_id)
def __init__(self, proxies): self._http_client = HTTPClient(proxies=proxies) self._user_agent = random.choice(USER_AGENTS) pass
class ItchIntegration(Plugin): def __init__(self, reader, writer, token): super().__init__(Platform.ItchIo, __version__, reader, writer, token) self.http_client = HTTPClient(self.store_credentials) self.session_cookie = None async def shutdown(self): await self.http_client.close() # implement methods async def authenticate(self, stored_credentials=None): logging.debug("authenticate") if not (stored_credentials.get("access_token") if stored_credentials else None): return NextStep( "web_session", { "window_title": "Log in to Itch.io", "window_width": 536, "window_height": 675, "start_uri": r"https://itch.io/user/oauth?client_id=3821cecdd58ae1a920be15f6aa479f7e&scope=profile&response_type=token&redirect_uri=http%3A%2F%2F127.0.0.1%3A7157%2Fgogg2itchintegration", "end_uri_regex": r"^http://127\.0\.0\.1:7157/gogg2itchintegration#access_token=.+", }) else: try: user = await self.get_user_data( stored_credentials["access_token"]) return Authentication(user["id"], user["username"]) except AccessDenied: raise InvalidCredentials() async def pass_login_credentials(self, step: str, credentials: Dict[str, str], cookies: List[Dict[str, str]]) -> \ Union[NextStep, Authentication]: session_cookies = { cookie['name']: cookie['value'] for cookie in cookies if cookie['name'] } self.http_client.update_cookies(session_cookies) api_key = re.search( r"^http://127\.0\.0\.1:7157/gogg2itchintegration#access_token=(.+)", credentials["end_uri"]) key = api_key.group(1) self.store_credentials({"access_token": key}) user = await self.get_user_data(key) return Authentication(user["id"], user["username"]) async def get_owned_games(self): page = 1 games = [] while True: try: resp = await self.http_client.get( f"https://api.itch.io/profile/owned-keys?page={page}") except AuthenticationRequired: self.lost_authentication() raise if len(resp.get("owned_keys")) == 0: return games self.parse_json_into_games(resp.get("owned_keys"), games) page += 1 return games async def get_user_data(self, api_key): resp = await self.http_client.get(f"https://api.itch.io/profile?") self.authenticated = True return resp.get("user") @staticmethod def parse_json_into_games(resp, games): for key in resp: game = key.get("game") if not game.get("classification") == "game": continue game_name = game.get("title") game_num = game.get("id") logging.debug('Parsed %s, %s', game_name, game_num) this_game = Game(game_id=game_num, game_title=game_name, license_info=LicenseInfo( LicenseType.SinglePurchase), dlcs=[]) games.append(this_game)
class Site(object): def __init__(self, ): self.root = os.environ.get("WORKSPACE", ".") self.hostname = "fmei.sinaapp.com" def real_path(self, path): return os.path.join(self.root, path) if __name__ == "__main__": init_path() FORMAT = "%(asctime)s %(name)s T[%(thread)d]P[%(process)d] %(levelname)8s %(message)s" logging.basicConfig(level=logging.DEBUG, format=FORMAT, stream=sys.stdout) from http_client import HTTPClient if len(sys.argv) == 2: pid = sys.argv[1] http = HTTPClient() task = GetTaokDetail(http) if os.environ.get("http_proxy", ""): http.set_proxy({'http': os.environ.get("http_proxy", "")}) task.open_file(u"%s_cate_output.txt" % pid) data = task.generate_cate_tree(Site(), http, pid) task.close() else: print "python pgenerate_cate_tree <pid>" print "done"
class QianzhanClient(object): def __init__(self, userId, password): # self._userId = userId # self._password = password self._http_client = HTTPClient() pass """+++++++++++++++++++login++++++++++++""" # def _per_login(self): # login_page_url = "http://qiye.qianzhan.com/usercenter/login?ReturnUrl=http%3A%2F%2Fqiye.qianzhan.com%2F" # response = self._http_client.get(login_page_url) # return self._get_varifyimage(True) def _get_varifyimage(self, is_first=False): if is_first: varifyimage_url = "http://qiye.qianzhan.com/usercenter/varifyimage" else: varifyimage_url = "http://qiye.qianzhan.com/usercenter/varifyimage?" + str( random.random()) response = self._http_client.get(varifyimage_url) # logging.debug("verifyimage: %s" % response.content) varifycode = read_body_to_string(response.content) logging.debug("varifycode: %s" % varifycode.replace(' ', '')) return varifycode.replace(' ', '') # # def _do_login(self, varifycode, max_times=10): # form_data = { # "userId": self._userId, # "password": self._password, # "VerifyCode": varifycode, # "sevenDays": "false" # } # login_url = "http://qiye.qianzhan.com/usercenter/dologin" # response = self._http_client.post(login_url, form_data) # logging.debug("text: %s" % response.text) # # try: # json_obj = json.loads(response.text) # except Exception, e: # json_obj = {"isSuccess": False, "sMsg": "is html return"} # pass # # logging.debug("json_obj: %s" % json_obj) # # if not json_obj.get("isSuccess"): # # print json_obj.get("sMsg") # max_times -= 1 # if max_times > 0: # varifycode = self._get_varifyimage() # return self._do_login(varifycode, max_times) # else: # return False # # print json_obj.get("sMsg") # logging.info("cookie: %s" % response.cookies.get_dict()) # return True # def login(self): # # print "++++++++++++++login+++++++++++++++++" # varifycode = self._per_login() # is_success = self._do_login(varifycode) # return is_success '''++++++++++++++++++userverify+++++++++++++++++++''' def _pre_varify(self, url): try: response = self._http_client.get(url) except Exception, e: pass return self._get_varifyimage()