class QianzhanClient(object): def __init__(self, userId, password): self._userId = userId self._password = password self._http_client = HTTPClient() pass """+++++++++++++++++++login++++++++++++""" def _per_login(self): login_page_url = "http://qiye.qianzhan.com/usercenter/login?ReturnUrl=http%3A%2F%2Fqiye.qianzhan.com%2F" response = self._http_client.get(login_page_url) return self._get_varifyimage(True) def _get_varifyimage(self, is_first=False): if is_first: varifyimage_url = "http://qiye.qianzhan.com/usercenter/varifyimage" else: varifyimage_url = "http://qiye.qianzhan.com/usercenter/varifyimage?" + str(random.random()) response = self._http_client.get(varifyimage_url) # logging.debug("verifyimage: %s" % response.content) varifycode = read_body_to_string(response.content) logging.debug("varifycode: %s" % varifycode.replace(' ', '')) return varifycode.replace(' ', '') def _do_login(self, varifycode, max_times=10): form_data = { "userId": self._userId, "password": self._password, "VerifyCode": varifycode, "sevenDays": "false" } login_url = "http://qiye.qianzhan.com/usercenter/dologin" response = self._http_client.post(login_url, form_data) logging.debug("text: %s" % response.text) try: json_obj = json.loads(response.text) except Exception, e: json_obj = {"isSuccess": False, "sMsg": "is html return"} pass logging.debug("json_obj: %s" % json_obj) if not json_obj.get("isSuccess"): # print json_obj.get("sMsg") max_times -= 1 if max_times > 0: varifycode = self._get_varifyimage() return self._do_login(varifycode, max_times) else: return False # print json_obj.get("sMsg") logging.info("cookie: %s" % response.cookies.get_dict()) return True
class SNL: def __init__(self, url): self.url = url self.http_client = HTTPClient() def get_api_url(self): '''Generates the API URL.''' return self.url \ if self.url.endswith('.json') else self.url + '.json' def request(self): '''Requests and returns response.''' api_url = self.get_api_url() return self.http_client.get(api_url, {}) def parse(self, json): '''Parses JSON from SNL API.''' fields = loads(json) website = Website() website.id = 'web:%s' % self.url website.url = self.url website.title = fields['title'] date, time = fields['created_at'].split('T') website.publication_date = datetime.strptime(date, '%Y-%m-%d') website.authors = [Author(family=author['full_name'], given=None) for author in fields['authors']] website.name = 'Store Norske Leksikon' return website
class NDLA: def __init__(self, url): self.url = url self.soup = None self.http_client = HTTPClient() def request(self): return self.http_client.get(self.url, {}) def parse_title(self): title = self.soup.find(attrs={'property': 'og:title'}).get('content', None) return title def parse_publication_date(self): label, date, rest = self.soup.find(id='edit-dates').text.split(' ', 2) publication_date = datetime.strptime(date, '%d.%m.%Y,') return publication_date def parse_authors(self): author_anchors = self.soup.find(attrs={'class': 'owner'}).findAll('a') authors = [Author(family=anchor.text, given=None) for anchor in author_anchors] return authors def parse(self, html): self.soup = BeautifulSoup(html, 'html.parser') website = Website() website.url = self.url website.id = 'web:%s' % self.url website.title = self.parse_title() website.authors = self.parse_authors() website.name = 'Nasjonal digital læringsarena' website.publication_date = self.parse_publication_date() return website
class General: def __init__(self, url): self.url = url self.http_client = HTTPClient() def request(self): '''Requests and returns response.''' return self.http_client.get(self.url, {}) def extract_with_newspaper(self, html): '''Parses HTML using Newspaper.''' article = Article(self.url) article.set_html(html) filterwarnings('ignore', category=DeprecationWarning) with catch_warnings(): article.parse() return article.__dict__ def parse(self, html): '''Converts Newspaper fields into Website.''' fields = self.extract_with_newspaper(html) website = Website() website.id = 'web:%s' % fields['url'] website.publication_date = fields['publish_date'] website.url = fields['url'] website.name = self.extract_site_name(fields['meta_data']) website.title = fields['title'] \ if fields['title'] else None website.authors = [Author(family=name, given=None) \ for name in fields['authors']] return website @staticmethod def extract_site_name(meta_data): try: return meta_data['og']['site_name'] except (TypeError, KeyError): return None
def make_request(client: HTTPClient, path, headers): client.get(path, headers)
class SiteClient(object): def __init__(self, username, password): self._username = username self._password = password self._http_client = HTTPClient() pass """+++++++++++++++++++login++++++++++++""" def _per_login(self): login_page_url = "http://center.qianlima.com/login.jsp" response = self._http_client.get(login_page_url) return response def _do_login(self): form_data = { "username": self._username, "password": self._password, "rem_login": "******" } login_url = "http://center.qianlima.com/login_post.jsp?re_url=null" response = self._http_client.post(login_url, form_data) # logging.debug("text: %s" % response.text) # try: # json_obj = json.loads(response.text) # except Exception, e: # json_obj = {"isSuccess": False, "sMsg": "is html return"} # pass # # logging.debug("json_obj: %s" % json_obj) # # if not json_obj.get("isSuccess"): # return False logging.info("cookie: %s" % response.cookies.get_dict()) return True def login(self): # print "++++++++++++++login+++++++++++++++++" self._per_login() is_success = self._do_login() return is_success def _verify_post(self, url, data=None, json=None, **kwargs): kwargs.setdefault("allow_redirects", False) response = self._http_client.post(url, data, json, **kwargs) if response.status_code == 200: pass elif response.status_code == 302: location = response.headers['Location'] logging.debug("location: %s" % location) raise Error302() elif response.status_code == 403: raise Error403() elif response.status_code == 404: raise Error404() else: raise ErrorStatusCode() return response def _verify_get(self, url, **kwargs): kwargs.setdefault("allow_redirects", False) response = self._http_client.get(url, **kwargs) if response.status_code == 200: pass elif response.status_code == 302: location = response.headers['Location'] logging.debug("location: %s" % location) raise Error302() elif response.status_code == 403: raise Error403() elif response.status_code == 404: raise Error404() else: raise ErrorStatusCode() return response def get_company(self, url): response = self._verify_get(url) return response def get_search(self, url): response = self._verify_get(url) return response
class QianzhanClient(object): def __init__(self, userId, password): # self._userId = userId # self._password = password self._http_client = HTTPClient() pass """+++++++++++++++++++login++++++++++++""" # def _per_login(self): # login_page_url = "http://qiye.qianzhan.com/usercenter/login?ReturnUrl=http%3A%2F%2Fqiye.qianzhan.com%2F" # response = self._http_client.get(login_page_url) # return self._get_varifyimage(True) def _get_varifyimage(self, is_first=False): if is_first: varifyimage_url = "http://qiye.qianzhan.com/usercenter/varifyimage" else: varifyimage_url = "http://qiye.qianzhan.com/usercenter/varifyimage?" + str( random.random()) response = self._http_client.get(varifyimage_url) # logging.debug("verifyimage: %s" % response.content) varifycode = read_body_to_string(response.content) logging.debug("varifycode: %s" % varifycode.replace(' ', '')) return varifycode.replace(' ', '') # # def _do_login(self, varifycode, max_times=10): # form_data = { # "userId": self._userId, # "password": self._password, # "VerifyCode": varifycode, # "sevenDays": "false" # } # login_url = "http://qiye.qianzhan.com/usercenter/dologin" # response = self._http_client.post(login_url, form_data) # logging.debug("text: %s" % response.text) # # try: # json_obj = json.loads(response.text) # except Exception, e: # json_obj = {"isSuccess": False, "sMsg": "is html return"} # pass # # logging.debug("json_obj: %s" % json_obj) # # if not json_obj.get("isSuccess"): # # print json_obj.get("sMsg") # max_times -= 1 # if max_times > 0: # varifycode = self._get_varifyimage() # return self._do_login(varifycode, max_times) # else: # return False # # print json_obj.get("sMsg") # logging.info("cookie: %s" % response.cookies.get_dict()) # return True # def login(self): # # print "++++++++++++++login+++++++++++++++++" # varifycode = self._per_login() # is_success = self._do_login(varifycode) # return is_success '''++++++++++++++++++userverify+++++++++++++++++++''' def _pre_varify(self, url): try: response = self._http_client.get(url) except Exception, e: pass return self._get_varifyimage()