class QianzhanClient(object):
    def __init__(self, userId, password):
        self._userId = userId
        self._password = password
        self._http_client = HTTPClient()
        pass

    """+++++++++++++++++++login++++++++++++"""

    def _per_login(self):
        login_page_url = "http://qiye.qianzhan.com/usercenter/login?ReturnUrl=http%3A%2F%2Fqiye.qianzhan.com%2F"
        response = self._http_client.get(login_page_url)
        return self._get_varifyimage(True)

    def _get_varifyimage(self, is_first=False):
        if is_first:
            varifyimage_url = "http://qiye.qianzhan.com/usercenter/varifyimage"
        else:
            varifyimage_url = "http://qiye.qianzhan.com/usercenter/varifyimage?" + str(random.random())

        response = self._http_client.get(varifyimage_url)
        # logging.debug("verifyimage: %s" % response.content)
        varifycode = read_body_to_string(response.content)
        logging.debug("varifycode: %s" % varifycode.replace(' ', ''))
        return varifycode.replace(' ', '')

    def _do_login(self, varifycode, max_times=10):
        form_data = {
            "userId": self._userId,
            "password": self._password,
            "VerifyCode": varifycode,
            "sevenDays": "false"
        }
        login_url = "http://qiye.qianzhan.com/usercenter/dologin"
        response = self._http_client.post(login_url, form_data)
        logging.debug("text: %s" % response.text)

        try:
            json_obj = json.loads(response.text)
        except Exception, e:
            json_obj = {"isSuccess": False, "sMsg": "is html return"}
            pass

        logging.debug("json_obj: %s" % json_obj)

        if not json_obj.get("isSuccess"):
            # print json_obj.get("sMsg")
            max_times -= 1
            if max_times > 0:
                varifycode = self._get_varifyimage()
                return self._do_login(varifycode, max_times)
            else:
                return False
        # print json_obj.get("sMsg")
        logging.info("cookie: %s" % response.cookies.get_dict())
        return True
Beispiel #2
0
class SNL:

    def __init__(self, url):
        self.url = url
        self.http_client = HTTPClient()

    def get_api_url(self):
        '''Generates the API URL.'''
        return self.url \
            if self.url.endswith('.json') else self.url + '.json'

    def request(self):
        '''Requests and returns response.'''
        api_url = self.get_api_url()
        return self.http_client.get(api_url, {})

    def parse(self, json):
        '''Parses JSON from SNL API.'''
        fields = loads(json)
        website = Website()
        website.id = 'web:%s' % self.url
        website.url = self.url
        website.title = fields['title']
        date, time = fields['created_at'].split('T')
        website.publication_date = datetime.strptime(date, '%Y-%m-%d')
        website.authors = [Author(family=author['full_name'], given=None) for author in fields['authors']]
        website.name = 'Store Norske Leksikon'
        return website
Beispiel #3
0
class NDLA:

    def __init__(self, url):
        self.url = url
        self.soup = None
        self.http_client = HTTPClient()

    def request(self):
        return self.http_client.get(self.url, {})

    def parse_title(self):
        title = self.soup.find(attrs={'property': 'og:title'}).get('content', None)
        return title

    def parse_publication_date(self):
        label, date, rest = self.soup.find(id='edit-dates').text.split(' ', 2)
        publication_date = datetime.strptime(date, '%d.%m.%Y,')
        return publication_date

    def parse_authors(self):
        author_anchors = self.soup.find(attrs={'class': 'owner'}).findAll('a')
        authors = [Author(family=anchor.text, given=None) for anchor in author_anchors]
        return authors

    def parse(self, html):
        self.soup = BeautifulSoup(html, 'html.parser')

        website = Website()
        website.url = self.url
        website.id = 'web:%s' % self.url
        website.title = self.parse_title()
        website.authors = self.parse_authors()
        website.name = 'Nasjonal digital læringsarena'
        website.publication_date = self.parse_publication_date()
        return website
Beispiel #4
0
class General:

    def __init__(self, url):
        self.url = url
        self.http_client = HTTPClient()

    def request(self):
        '''Requests and returns response.'''
        return self.http_client.get(self.url, {})

    def extract_with_newspaper(self, html):
        '''Parses HTML using Newspaper.'''
        article = Article(self.url)
        article.set_html(html)
        filterwarnings('ignore', category=DeprecationWarning)
        with catch_warnings():
            article.parse()
        return article.__dict__

    def parse(self, html):
        '''Converts Newspaper fields into Website.'''
        fields = self.extract_with_newspaper(html)
        website = Website()
        website.id = 'web:%s' % fields['url']
        website.publication_date = fields['publish_date']
        website.url = fields['url']
        website.name = self.extract_site_name(fields['meta_data'])
        website.title = fields['title'] \
            if fields['title'] else None
        website.authors = [Author(family=name, given=None) \
            for name in fields['authors']]
        return website

    @staticmethod
    def extract_site_name(meta_data):
        try:
            return meta_data['og']['site_name']
        except (TypeError, KeyError):
            return None
Beispiel #5
0
def make_request(client: HTTPClient, path, headers):
    client.get(path, headers)
class SiteClient(object):
    def __init__(self, username, password):
        self._username = username
        self._password = password
        self._http_client = HTTPClient()
        pass

    """+++++++++++++++++++login++++++++++++"""

    def _per_login(self):
        login_page_url = "http://center.qianlima.com/login.jsp"
        response = self._http_client.get(login_page_url)
        return response

    def _do_login(self):
        form_data = {
            "username": self._username,
            "password": self._password,
            "rem_login": "******"
        }
        login_url = "http://center.qianlima.com/login_post.jsp?re_url=null"
        response = self._http_client.post(login_url, form_data)
        # logging.debug("text: %s" % response.text)

        # try:
        #     json_obj = json.loads(response.text)
        # except Exception, e:
        #     json_obj = {"isSuccess": False, "sMsg": "is html return"}
        #     pass
        #
        # logging.debug("json_obj: %s" % json_obj)
        #
        # if not json_obj.get("isSuccess"):
        #     return False
        logging.info("cookie: %s" % response.cookies.get_dict())
        return True

    def login(self):
        # print "++++++++++++++login+++++++++++++++++"
        self._per_login()
        is_success = self._do_login()
        return is_success

    def _verify_post(self, url, data=None, json=None, **kwargs):
        kwargs.setdefault("allow_redirects", False)
        response = self._http_client.post(url, data, json, **kwargs)
        if response.status_code == 200:
            pass
        elif response.status_code == 302:
            location = response.headers['Location']
            logging.debug("location: %s" % location)
            raise Error302()
        elif response.status_code == 403:
            raise Error403()
        elif response.status_code == 404:
            raise Error404()
        else:
            raise ErrorStatusCode()
        return response

    def _verify_get(self, url, **kwargs):
        kwargs.setdefault("allow_redirects", False)
        response = self._http_client.get(url, **kwargs)
        if response.status_code == 200:
            pass
        elif response.status_code == 302:
            location = response.headers['Location']
            logging.debug("location: %s" % location)
            raise Error302()
        elif response.status_code == 403:
            raise Error403()
        elif response.status_code == 404:
            raise Error404()
        else:
            raise ErrorStatusCode()
        return response

    def get_company(self, url):
        response = self._verify_get(url)
        return response

    def get_search(self, url):
        response = self._verify_get(url)
        return response
class QianzhanClient(object):
    def __init__(self, userId, password):
        # self._userId = userId
        # self._password = password
        self._http_client = HTTPClient()
        pass

    """+++++++++++++++++++login++++++++++++"""

    # def _per_login(self):
    #     login_page_url = "http://qiye.qianzhan.com/usercenter/login?ReturnUrl=http%3A%2F%2Fqiye.qianzhan.com%2F"
    #     response = self._http_client.get(login_page_url)
    #     return self._get_varifyimage(True)

    def _get_varifyimage(self, is_first=False):
        if is_first:
            varifyimage_url = "http://qiye.qianzhan.com/usercenter/varifyimage"
        else:
            varifyimage_url = "http://qiye.qianzhan.com/usercenter/varifyimage?" + str(
                random.random())

        response = self._http_client.get(varifyimage_url)
        # logging.debug("verifyimage: %s" % response.content)
        varifycode = read_body_to_string(response.content)
        logging.debug("varifycode: %s" % varifycode.replace(' ', ''))
        return varifycode.replace(' ', '')

    #
    # def _do_login(self, varifycode, max_times=10):
    #     form_data = {
    #         "userId": self._userId,
    #         "password": self._password,
    #         "VerifyCode": varifycode,
    #         "sevenDays": "false"
    #     }
    #     login_url = "http://qiye.qianzhan.com/usercenter/dologin"
    #     response = self._http_client.post(login_url, form_data)
    #     logging.debug("text: %s" % response.text)
    #
    #     try:
    #         json_obj = json.loads(response.text)
    #     except Exception, e:
    #         json_obj = {"isSuccess": False, "sMsg": "is html return"}
    #         pass
    #
    #     logging.debug("json_obj: %s" % json_obj)
    #
    #     if not json_obj.get("isSuccess"):
    #         # print json_obj.get("sMsg")
    #         max_times -= 1
    #         if max_times > 0:
    #             varifycode = self._get_varifyimage()
    #             return self._do_login(varifycode, max_times)
    #         else:
    #             return False
    #     # print json_obj.get("sMsg")
    #     logging.info("cookie: %s" % response.cookies.get_dict())
    #     return True

    # def login(self):
    #     # print "++++++++++++++login+++++++++++++++++"
    #     varifycode = self._per_login()
    #     is_success = self._do_login(varifycode)
    #     return is_success
    '''++++++++++++++++++userverify+++++++++++++++++++'''

    def _pre_varify(self, url):
        try:
            response = self._http_client.get(url)
        except Exception, e:
            pass
        return self._get_varifyimage()