class QianzhanClient(object):
    def __init__(self, userId, password):
        self._userId = userId
        self._password = password
        self._http_client = HTTPClient()
        pass

    """+++++++++++++++++++login++++++++++++"""

    def _per_login(self):
        login_page_url = "http://qiye.qianzhan.com/usercenter/login?ReturnUrl=http%3A%2F%2Fqiye.qianzhan.com%2F"
        response = self._http_client.get(login_page_url)
        return self._get_varifyimage(True)

    def _get_varifyimage(self, is_first=False):
        if is_first:
            varifyimage_url = "http://qiye.qianzhan.com/usercenter/varifyimage"
        else:
            varifyimage_url = "http://qiye.qianzhan.com/usercenter/varifyimage?" + str(random.random())

        response = self._http_client.get(varifyimage_url)
        # logging.debug("verifyimage: %s" % response.content)
        varifycode = read_body_to_string(response.content)
        logging.debug("varifycode: %s" % varifycode.replace(' ', ''))
        return varifycode.replace(' ', '')

    def _do_login(self, varifycode, max_times=10):
        form_data = {
            "userId": self._userId,
            "password": self._password,
            "VerifyCode": varifycode,
            "sevenDays": "false"
        }
        login_url = "http://qiye.qianzhan.com/usercenter/dologin"
        response = self._http_client.post(login_url, form_data)
        logging.debug("text: %s" % response.text)

        try:
            json_obj = json.loads(response.text)
        except Exception, e:
            json_obj = {"isSuccess": False, "sMsg": "is html return"}
            pass

        logging.debug("json_obj: %s" % json_obj)

        if not json_obj.get("isSuccess"):
            # print json_obj.get("sMsg")
            max_times -= 1
            if max_times > 0:
                varifycode = self._get_varifyimage()
                return self._do_login(varifycode, max_times)
            else:
                return False
        # print json_obj.get("sMsg")
        logging.info("cookie: %s" % response.cookies.get_dict())
        return True
 def __init__(self, proxies):
     # self._username = username
     # self._password = password
     self._http_client = HTTPClient(proxies=proxies)
     self._user_agent = random.choice(USER_AGENTS)
     self.credit_ticket = None
     self.currentTimeMillis = None
     self._detail_url = None
     self._index_1_url = None
     self._index_2_url = None
     self._search_list_url = None
     pass
Example #3
0
class Spider(Sailor):
    pass

    def ready(self):
        self.http = HTTPClient()
        self.http.set_proxy(settings.PROXY)
        self.logger = logging.getLogger("Spider")

        self._load_actions()

    def _load_actions(self, ):
        self.actions = settings.ACTIONS
        for k, v in self.actions.iteritems():
            self.actions[k] = self._init_actions(*v)

        self.actions['GET'] = self.http_get
        self.actions['POST'] = self.http_post

    def _init_actions(self, cls, param={}):
        cls = import_class(cls)

        if cls is None:
            raise RuntimeError, "Could not loading crawler '%s'." % cls

        return cls(**param)

    def start(self, t):

        site = WebSite(t.header('Site'), "", "worker")

        next_task = new_task('worker')
        next_task.header('Site', t.header('Site'))

        for l in t.list_actions():
            action, url, save_as, args = self._parse_action(l, site)
            try:
                handler = self.actions.get(action, None)
                self.logger.debug("[%s] %s --> %s, args:%s" %
                                  (action, url, save_as, str(args)))
                if handler is not None:
                    handler(site, self.http, next_task, url, save_as, *args)
                else:
                    self.logger.error("not found sipder action:'%s'", action)

            except Exception, e:
                self.logger.exception(trackable(u"Exception on task '%s'" % e))
        self.http.close()

        next_task.status = 'waiting'
Example #4
0
class Spider(Sailor):
    pass
    
    def ready(self):
        self.http = HTTPClient()
        self.http.set_proxy(settings.PROXY)
        self.logger = logging.getLogger("Spider")
        
        self._load_actions()
        
        
    def _load_actions(self, ):
        self.actions = settings.ACTIONS
        for k, v in self.actions.iteritems():
            self.actions[k] = self._init_actions(*v)  

        self.actions['GET'] = self.http_get
        self.actions['POST'] = self.http_post
            
    def _init_actions(self, cls, param={}):
        cls = import_class(cls)
        
        if cls is None: raise RuntimeError, "Could not loading crawler '%s'." % cls
        
        return cls(**param)            

    def start(self, t):
        
        site = WebSite(t.header('Site'), "", "worker")
        
        next_task = new_task('worker')
        next_task.header('Site', t.header('Site'))
        
        for l in t.list_actions():
            action, url, save_as, args = self._parse_action(l, site)
            try:
                handler = self.actions.get(action, None)
                self.logger.debug("[%s] %s --> %s, args:%s" % (action, url, save_as, str(args)))
                if handler is not None:
                    handler(site, self.http, next_task, url, save_as, *args)
                else:
                    self.logger.error("not found sipder action:'%s'", action)
                    
            except Exception, e:
                self.logger.exception(trackable(u"Exception on task '%s'" % e))
        self.http.close()
            
        next_task.status = 'waiting'
Example #5
0
class NDLA:

    def __init__(self, url):
        self.url = url
        self.soup = None
        self.http_client = HTTPClient()

    def request(self):
        return self.http_client.get(self.url, {})

    def parse_title(self):
        title = self.soup.find(attrs={'property': 'og:title'}).get('content', None)
        return title

    def parse_publication_date(self):
        label, date, rest = self.soup.find(id='edit-dates').text.split(' ', 2)
        publication_date = datetime.strptime(date, '%d.%m.%Y,')
        return publication_date

    def parse_authors(self):
        author_anchors = self.soup.find(attrs={'class': 'owner'}).findAll('a')
        authors = [Author(family=anchor.text, given=None) for anchor in author_anchors]
        return authors

    def parse(self, html):
        self.soup = BeautifulSoup(html, 'html.parser')

        website = Website()
        website.url = self.url
        website.id = 'web:%s' % self.url
        website.title = self.parse_title()
        website.authors = self.parse_authors()
        website.name = 'Nasjonal digital læringsarena'
        website.publication_date = self.parse_publication_date()
        return website
Example #6
0
class SNL:

    def __init__(self, url):
        self.url = url
        self.http_client = HTTPClient()

    def get_api_url(self):
        '''Generates the API URL.'''
        return self.url \
            if self.url.endswith('.json') else self.url + '.json'

    def request(self):
        '''Requests and returns response.'''
        api_url = self.get_api_url()
        return self.http_client.get(api_url, {})

    def parse(self, json):
        '''Parses JSON from SNL API.'''
        fields = loads(json)
        website = Website()
        website.id = 'web:%s' % self.url
        website.url = self.url
        website.title = fields['title']
        date, time = fields['created_at'].split('T')
        website.publication_date = datetime.strptime(date, '%Y-%m-%d')
        website.authors = [Author(family=author['full_name'], given=None) for author in fields['authors']]
        website.name = 'Store Norske Leksikon'
        return website
Example #7
0
 def __init__(self, query: str, max_item_count: int):
     self.http_client = HTTPClient()
     self.bearer_token = BEARER_TOKEN
     self.next_token = ''
     self.query = query
     self.max_item_count = max_item_count
     self.__lock = asyncio.Lock()
class SiteClient(object):
    def __init__(self, proxies):
        # self._username = username
        # self._password = password
        self._http_client = HTTPClient(proxies=proxies)

        pass

    def _verify_post(self, url, data=None, json=None, times=0):
        try:
            response = self._http_client.post(url, data, json)
            if response.status_code == 200:
                pass
            elif response.status_code == 302:
                location = response.headers['Location']
                logging.debug("location: %s" % location)
                raise Error302()
            elif response.status_code == 403:
                raise Error403()
            elif response.status_code == 404:
                raise Error404()
            elif response.status_code == 502:
                raise Error502()
            elif response.status_code == 503:
                raise Error503()
            else:
                raise ErrorStatusCode(response.status_code)
            return response

        except HttpClientError, err:
            times += 1
            if times < 3:
                return self._verify_post(url, data=data, json=json, times=times)
            else:
                raise err
Example #9
0
def get_captcha(path, group='emop'):
    fd = open(path, 'rb')
    
    data = base64.b64encode(fd.read())
    fd.close()
    
    client = HTTPClient()
    
    param = {'g': group, 'content': data}
    
    response = client.post_data("http://192.168.3.220:8000/route", param, {})
    #response = client.post_data("http://127.0.0.1:8925/route", param, {})
    print("the response : %s" % response)
    if response:    
        ret = json.loads(response)    
    else:
        ret = {}
        
    return ret
Example #10
0
def get_captcha(path, group="emop"):
    fd = open(path, "r")

    data = base64.b64encode(fd.read())
    fd.close()

    client = HTTPClient()

    param = {"g": group, "content": data}

    response = client.post_data("http://42.120.43.111:8925/route", param, {})
    # response = client.post_data("http://127.0.0.1:8925/route", param, {})

    if response:
        ret = json.loads(response)
    else:
        ret = {}

    return ret
Example #11
0
def get_captcha(path, group='emop'):
    fd = open(path, 'r')

    data = base64.b64encode(fd.read())
    fd.close()

    client = HTTPClient()

    param = {'g': group, 'content': data}

    response = client.post_data("http://42.120.43.111:8925/route", param, {})
    #response = client.post_data("http://127.0.0.1:8925/route", param, {})

    if response:
        ret = json.loads(response)
    else:
        ret = {}

    return ret
class SiteClient(object):
    def __init__(self):
        self._http_client = HTTPClient()
        pass

    def _verify_post(self,
                     url,
                     data=None,
                     json=None,
                     times=0,
                     headers=default_headers,
                     timeout=download_timeout):

        # headers.update({
        #     'User-Agent': self._user_agent,
        #     # "Proxy-Authorization": self.get_authHeader()
        # })

        try:
            response = self._http_client.post(url=url,
                                              data=data,
                                              json=json,
                                              headers=headers,
                                              timeout=timeout)
            if response.status_code == 200:
                logging.debug(response.headers)
                pass
            elif response.status_code == 302:
                location = response.headers['Location']
                logging.debug("location: %s" % location)
                raise Error302()
            elif response.status_code == 403:
                raise Error403()
            elif response.status_code == 404:
                raise Error404()
            elif response.status_code == 502:
                raise Error502()
            elif response.status_code == 503:
                raise Error503()
            else:
                raise ErrorStatusCode(response.status_code)
            return response
        except Error403, err:
            raise err
        except HttpClientError, err:
            times += 1
            if times < 2:
                return self._verify_post(url,
                                         data=data,
                                         json=json,
                                         times=times,
                                         headers=headers,
                                         timeout=timeout)
            else:
                raise err
Example #13
0
def demo():

    # Start Example TCP socket client
    for message in samplelist:  # see samplelist in /data_streams/samples.py
        SocketClient(message=message).start()

    # Start HTTP example client
    HTTPClient().start()

    # Start Example Random Number Stream
    DataStream(random_nr_config, random_nr).start()
Example #14
0
class General:

    def __init__(self, url):
        self.url = url
        self.http_client = HTTPClient()

    def request(self):
        '''Requests and returns response.'''
        return self.http_client.get(self.url, {})

    def extract_with_newspaper(self, html):
        '''Parses HTML using Newspaper.'''
        article = Article(self.url)
        article.set_html(html)
        filterwarnings('ignore', category=DeprecationWarning)
        with catch_warnings():
            article.parse()
        return article.__dict__

    def parse(self, html):
        '''Converts Newspaper fields into Website.'''
        fields = self.extract_with_newspaper(html)
        website = Website()
        website.id = 'web:%s' % fields['url']
        website.publication_date = fields['publish_date']
        website.url = fields['url']
        website.name = self.extract_site_name(fields['meta_data'])
        website.title = fields['title'] \
            if fields['title'] else None
        website.authors = [Author(family=name, given=None) \
            for name in fields['authors']]
        return website

    @staticmethod
    def extract_site_name(meta_data):
        try:
            return meta_data['og']['site_name']
        except (TypeError, KeyError):
            return None
class SiteClient(object):
    def __init__(self, proxies={}):
        # self._username = username
        # self._password = password
        self._http_client = HTTPClient(proxies=proxies)
        self._user_agent = random.choice(USER_AGENTS)

        self.credit_ticket = None
        self.currentTimeMillis = None
        self._detail_url = None
        self._index_1_url = None
        self._index_2_url = None
        self._search_list_url = None

        self._qynb_detail_url = None
        pass

    # -----------------------------------------------get post------------------------------------------------------- #
    def _verify_post(self,
                     url,
                     data=None,
                     json=None,
                     times=0,
                     headers=default_headers,
                     timeout=download_timeout):

        headers.update({
            'User-Agent': self._user_agent,
            # "Proxy-Authorization": self.get_authHeader()
        })

        try:
            response = self._http_client.post(url=url,
                                              data=data,
                                              json=json,
                                              headers=headers,
                                              timeout=timeout)
            if response.status_code == 200:
                logging.debug(response.headers)
                pass
            elif response.status_code == 302:
                location = response.headers['Location']
                logging.debug("location: %s" % location)
                raise NeedrefreshProxyError()
            elif response.status_code == 403:
                raise Error403()
            elif response.status_code == 404:
                raise NeedrefreshProxyError()
            elif response.status_code == 502:
                raise NeedrefreshProxyError()
            elif response.status_code == 503:
                raise NeedrefreshProxyError()
            else:
                raise ErrorStatusCode(response.status_code)
            if response.content.find('繁访问错误页面') > 0:
                logging.info(
                    "---------------||||||||||||||||||||||||-------------")
                raise NeedrefreshProxyError()
            return response
        except Error403, err:
            raise err
        except HttpClientError, err:
            times += 1
            if times < 2:
                return self._verify_post(url,
                                         data=data,
                                         json=json,
                                         times=times,
                                         headers=headers,
                                         timeout=timeout)
            else:
                raise err
Example #16
0
#!/bin/python
# -*- coding: utf-8 -*-

import logging, os, sys

def init_path():
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    lib_path = os.path.join(cur_dir, 'libs')
    sys.path.insert(0, lib_path)

if __name__ == "__main__":
    init_path()
    FORMAT = "%(asctime)s %(name)s T[%(thread)d]P[%(process)d] %(levelname)8s %(message)s"
    logging.basicConfig(level=logging.DEBUG, format=FORMAT, stream=sys.stdout)
    from http_client import HTTPClient
    
        
    http = HTTPClient()
    if os.environ.get("http_proxy", ""):
        http.set_proxy({'http': os.environ.get("http_proxy", "")})
    
    http.get_real_url("http://t.cn/aCAHu3")
    
Example #17
0
 def __init__(self, reader, writer, token):
     super().__init__(Platform.ItchIo, __version__, reader, writer, token)
     self.http_client = HTTPClient(self.store_credentials)
     self.session_cookie = None
Example #18
0
class SimpleCrawler(object):
    def __init__(self):
        self.http = HTTPClient()
        self.debug = True
        
    def start(self, lpid):
        url = "http://www.hzfc365.com/house_search/search_prj.jsp?lpid=%s" % lpid
        reps_text = self.http.download(url)
        if self.debug:
            self._save_temp(reps_text, lpid)
        build_list = self.parse_build_info(reps_text)
        for build in build_list:
            logging.info("start fetch:%s, Kai Pan Shi jian:%s" % (1, 2))
            build.lpid = lpid
            for zh_nm in build.zh_nm_list:
                self.fetch_zh_nm_pid_data(build, *zh_nm)
    
    def fetch_zh_nm_pid_data(self, build, zh_nm, pid, name):
        url = "http://www.hzfc365.com/house_view/lpxx-xs-2.jsp"\
              "?zh_nm=%s&pid=%s" % (zh_nm, pid)
        referer_url = "http://www.hzfc365.com/house_search/search_prj.jsp?lpid=%s" % build.lpid
        reps_text = self.http.download(url, {"Referer": referer_url})
        if self.debug:
            self._save_temp(reps_text, zh_nm)
        self._parse_room_info(reps_text, referer_url=url)
        
    def _parse_room_info(self, reps_text=None, cache_name = None, referer_url=None):
        if cache_name: reps_text = self._read_temp(cache_name)
        #http://www.hzfc365.com/house_view/lpxx-xs-2.jsp?zh_nm=120620&pid=87401
        """http://www.hzfc365.com/house_view/lpxx-xs-2-yt.jsp?zh_nm=120618&q_area=&keytime=1288790113772&sessionid=2ECA879E33D7BECC3941443553DAD4FC"""
        """http://www.hzfc365.com/house_view/lpxx-xs-2-yt.jsp?
        zh_nm=120618&
        q_area=&
        keytime=1288790113772& 
                //12887910169
        sessionid=2ECA879E33D7BECC3941443553DAD4FC"""
        r_zh_nm = re.search('<input id="info_zh_nm" type="hidden" value="(\d+)">', reps_text).group(1)
        sessionid = re.search('<input id="sessionid" type="hidden" value="(\w+)">', reps_text).group(1)
        import time
        cur_time = time.time()
        
        logging.info("r_zh_nm=%s, sessionid=%s, time=%s" % (r_zh_nm, sessionid, cur_time))
        url = "http://www.hzfc365.com/house_view/lpxx-xs-2-yt.jsp?zh_nm=%s&q_area=&keytime=%s&sessionid=%s" % (r_zh_nm, cur_time * 100, sessionid)
        
        reps_text = self.http.download(url, {"Referer": referer_url})
        if self.debug:
            self._save_temp(reps_text, "d%s" % r_zh_nm)
        return self._parse_room_detail_info(reps_text)     
                    
    def _parse_room_detail_info(self, reps_text=None, cache_name=None):
        if cache_name: reps_text = self._read_temp(cache_name)
        regex = "title='([^']+)'"
        factor = re.compile(regex, re.I)
        data = []
        for item in factor.finditer(reps_text):
            logging.info("details:%s" % str(item.groups()))
            data.append(RoomInfo(*item.groups()))
        return data
        #<input id="info_zh_nm" type="hidden" value="120620">
        #<input id="sessionid" type="hidden" value="27D3C558B4A632ABFE27FC1B48ADAB44">

                
        
    
    def parse_build_info(self, reps_text=None, cache_name = None):
        if cache_name: reps_text = self._read_temp(cache_name)
        td = r"\s+<td[^>]+>(.*?)</td>" 
        regex = r"<TR onmouseover=[^>]+><A [^>]+>%s</A>" % (td * 7)
        regex += "\s+<td[^>]+>(.*?)</td>"
        regex += "\s+</tr>"
        factor = re.compile(regex, re.I)
        #items = ( e.group(1) for e in factor.finditer(expr) )
        data = []
        for item in factor.finditer(reps_text):
            logging.info("data:%s" % str(item.groups()))
            data.append(BuidingInfo(*item.groups()))
        return data
            
    
    def _save_temp(self, data, name):
        fd = open("temp_cache_%s.txt" % name, "w")
        fd.write(data)
        fd.close()
        
    def _read_temp(self, name):
        data = ""
        fd = open("temp_cache_%s.txt" % name, "r")
        data = fd.read()
        fd.close()
        return data
Example #19
0
 def __init__(self, url):
     self.url = url
     self.http_client = HTTPClient()
 def __init__(self, userId, password):
     self._userId = userId
     self._password = password
     self._http_client = HTTPClient()
     pass
Example #21
0
 def ready(self):
     self.http = HTTPClient()
     self.http.set_proxy(settings.PROXY)
     self.logger = logging.getLogger("Spider")
     
     self._load_actions()
Example #22
0
def make_request(client: HTTPClient, path, headers):
    client.get(path, headers)
 def __init__(self, username, password):
     self._username = username
     self._password = password
     self._http_client = HTTPClient()
     pass
class SiteClient(object):
    def __init__(self, username, password):
        self._username = username
        self._password = password
        self._http_client = HTTPClient()
        pass

    """+++++++++++++++++++login++++++++++++"""

    def _per_login(self):
        login_page_url = "http://center.qianlima.com/login.jsp"
        response = self._http_client.get(login_page_url)
        return response

    def _do_login(self):
        form_data = {
            "username": self._username,
            "password": self._password,
            "rem_login": "******"
        }
        login_url = "http://center.qianlima.com/login_post.jsp?re_url=null"
        response = self._http_client.post(login_url, form_data)
        # logging.debug("text: %s" % response.text)

        # try:
        #     json_obj = json.loads(response.text)
        # except Exception, e:
        #     json_obj = {"isSuccess": False, "sMsg": "is html return"}
        #     pass
        #
        # logging.debug("json_obj: %s" % json_obj)
        #
        # if not json_obj.get("isSuccess"):
        #     return False
        logging.info("cookie: %s" % response.cookies.get_dict())
        return True

    def login(self):
        # print "++++++++++++++login+++++++++++++++++"
        self._per_login()
        is_success = self._do_login()
        return is_success

    def _verify_post(self, url, data=None, json=None, **kwargs):
        kwargs.setdefault("allow_redirects", False)
        response = self._http_client.post(url, data, json, **kwargs)
        if response.status_code == 200:
            pass
        elif response.status_code == 302:
            location = response.headers['Location']
            logging.debug("location: %s" % location)
            raise Error302()
        elif response.status_code == 403:
            raise Error403()
        elif response.status_code == 404:
            raise Error404()
        else:
            raise ErrorStatusCode()
        return response

    def _verify_get(self, url, **kwargs):
        kwargs.setdefault("allow_redirects", False)
        response = self._http_client.get(url, **kwargs)
        if response.status_code == 200:
            pass
        elif response.status_code == 302:
            location = response.headers['Location']
            logging.debug("location: %s" % location)
            raise Error302()
        elif response.status_code == 403:
            raise Error403()
        elif response.status_code == 404:
            raise Error404()
        else:
            raise ErrorStatusCode()
        return response

    def get_company(self, url):
        response = self._verify_get(url)
        return response

    def get_search(self, url):
        response = self._verify_get(url)
        return response
    def __init__(self, proxies):
        # self._username = username
        # self._password = password
        self._http_client = HTTPClient(proxies=proxies)

        pass
 def __init__(self):
     self._http_client = HTTPClient()
     pass
Example #27
0
    def ready(self):
        self.http = HTTPClient()
        self.http.set_proxy(settings.PROXY)
        self.logger = logging.getLogger("Spider")

        self._load_actions()
 def __init__(self):
     # self._username = username
     # self._password = password
     self._http_client = HTTPClient()
     pass
Example #29
0
class BaseSiteClient(object):
    def __init__(self, config, proxies={}):
        self._config = config
        self._http_client = HTTPClient(proxies=proxies)
        self._user_agent = random.choice(self._config.user_agents)

        pass

    def _check_response(self, response):
        if response.status_code == 200:
            logging.debug(response.headers)
            pass
        elif response.status_code == 302:
            location = response.headers['Location']
            logging.debug("location: %s" % location)
            raise Error302()
        elif response.status_code == 403:
            raise Error403()
        elif response.status_code == 404:
            raise Error404()
        elif response.status_code == 502:
            raise Error502()
        elif response.status_code == 503:
            raise Error503()
        else:
            raise ErrorStatusCode(response.status_code)
        return response

    def _verify_post(self,
                     url,
                     data=None,
                     json=None,
                     times=0,
                     headers=None,
                     timeout=None):
        if not headers:
            headers = self._config.default_headers
        if not timeout:
            timeout = self._config.default_timeout

        headers.update({
            'User-Agent': self._user_agent,
            # "Proxy-Authorization": self.get_authHeader()
        })

        try:
            response = self._http_client.post(url=url,
                                              data=data,
                                              json=json,
                                              headers=headers,
                                              timeout=timeout)
            return self._check_response(response)
        except Error403, err:
            raise err
        except HttpClientError, err:
            times += 1
            if times < 2:
                return self._verify_post(url,
                                         data=data,
                                         json=json,
                                         times=times,
                                         headers=headers,
                                         timeout=timeout)
            else:
                raise err
Example #30
0
    def __init__(self, config, proxies={}):
        self._config = config
        self._http_client = HTTPClient(proxies=proxies)
        self._user_agent = random.choice(self._config.user_agents)

        pass
Example #31
0
 def __init__(self):
     self.http = HTTPClient()
     self.debug = True
Example #32
0
class ItchIntegration(Plugin):
    def __init__(self, reader, writer, token):
        super().__init__(Platform.ItchIo, __version__, reader, writer, token)
        self.http_client = HTTPClient(self.store_credentials)
        self.session_cookie = None

    async def shutdown(self):
        await self.http_client.close()

    # implement methods
    async def authenticate(self, stored_credentials=None):
        logging.debug("authenticate")
        confirmation_uri = 'https://itch.io/user/oauth?client_id=3821cecdd58ae1a920be15f6aa479f7e&scope=profile&response_type=token&redirect_uri=http%3A%2F%2F127.0.0.1%3A7157%2Fgogg2itchintegration'
        if not stored_credentials:
            return NextStep("web_session", {
                "window_title":
                "Log in to Itch.io",
                "window_width":
                536,
                "window_height":
                675,
                "start_uri":
                confirmation_uri,
                "end_uri_regex":
                r"^(http://127\.0\.0\.1:7157/gogg2itchintegration#access_token=.+)",
            },
                            js={
                                r'^https://itch\.io/my-feed.*':
                                [f'window.location = "{confirmation_uri}"']
                            })
        else:
            self.http_client.update_cookies(stored_credentials)
            try:
                user = await self.get_user_data()
                return Authentication(str(user.get("id")),
                                      str(user.get("username")))
            except AccessDenied:
                raise InvalidCredentials()

    async def pass_login_credentials(self, step, credentials, cookies):
        session_cookies = {
            cookie['name']: cookie['value']
            for cookie in cookies if cookie['name']
        }
        self.http_client.update_cookies(session_cookies)

        user = await self.get_user_data()
        logging.debug(type(id))
        logging.debug(user.get("id"))
        logging.debug(user.get("username"))
        return Authentication(str(user.get("id")), str(user.get("username")))

    async def get_owned_games(self):
        page = 1
        games = []
        while True:
            try:
                resp = await self.http_client.get(
                    f"https://api.itch.io/profile/owned-keys?classification=game&page={page}"
                )
            except AuthenticationRequired:
                self.lost_authentication()
                raise
            if len(resp.get("owned_keys")) == 0:
                return games
            await self.parse_json_into_games(resp.get("owned_keys"), games)
            page += 1
        return games

    async def get_user_data(self):
        resp = await self.http_client.get(f"https://api.itch.io/profile?")
        self.authenticated = True
        return resp.get("user")

    async def parse_json_into_games(self, resp, games):
        for key in resp:
            game = key.get("game")
            if not game.get("classification") == "game":
                continue
            game_name = game.get("title")
            game_num = str(game.get("id"))
            logging.debug('Parsed %s, %s', game_name, game_num)
            self.persistent_cache[game_num] = game
            this_game = Game(game_id=game_num,
                             game_title=game_name,
                             license_info=LicenseInfo(
                                 LicenseType.SinglePurchase),
                             dlcs=[])
            games.append(this_game)

            if CHECK_GAMES_AGAINST_IGDB:
                itch_id = game.get("id")
                game_gog_url = GOG_GAME_URL.format(itch_id)
                start_time = time.time()

                try:
                    json_response = await self.http_client.get(game_gog_url)
                    if "error" in json_response:
                        log_unknown_game(
                            "No IGDB data found for {} (itch.io game ID is {})"
                            .format(game_name, itch_id))

                except ClientResponseError as e:
                    log_unknown_game("No IGDB data found for {}: {}".format(
                        game_name, e))

                stop_time = time.time()
                elapsed_seconds = stop_time - start_time
                if elapsed_seconds <= GOG_API_RATE_LIMIT_SECONDS:
                    diff_seconds = GOG_API_RATE_LIMIT_SECONDS - elapsed_seconds
                    await asyncio.sleep(diff_seconds)

    async def get_os_compatibility(self, game_id, context):
        try:
            compat = self.persistent_cache[str(game_id)].get("traits")
            os = (OSCompatibility.Windows
                  if "p_windows" in compat else OSCompatibility(0)) | (
                      OSCompatibility.MacOS if "p_osx" in compat else
                      OSCompatibility(0)) | (OSCompatibility.Linux if "p_linux"
                                             in compat else OSCompatibility(0))
            logging.debug("Compat value: %s", os)
            if not os == 0:
                return os
        except KeyError:
            logging.error("Key not found in cache: %s", game_id)
Example #33
0
 def __init__(self, proxies):
     self._http_client = HTTPClient(proxies=proxies)
     self._user_agent = random.choice(USER_AGENTS)
     pass
Example #34
0
class ItchIntegration(Plugin):
    def __init__(self, reader, writer, token):
        super().__init__(Platform.ItchIo, __version__, reader, writer, token)
        self.http_client = HTTPClient(self.store_credentials)
        self.session_cookie = None

    async def shutdown(self):
        await self.http_client.close()

    # implement methods
    async def authenticate(self, stored_credentials=None):
        logging.debug("authenticate")
        if not (stored_credentials.get("access_token")
                if stored_credentials else None):
            return NextStep(
                "web_session", {
                    "window_title":
                    "Log in to Itch.io",
                    "window_width":
                    536,
                    "window_height":
                    675,
                    "start_uri":
                    r"https://itch.io/user/oauth?client_id=3821cecdd58ae1a920be15f6aa479f7e&scope=profile&response_type=token&redirect_uri=http%3A%2F%2F127.0.0.1%3A7157%2Fgogg2itchintegration",
                    "end_uri_regex":
                    r"^http://127\.0\.0\.1:7157/gogg2itchintegration#access_token=.+",
                })
        else:
            try:
                user = await self.get_user_data(
                    stored_credentials["access_token"])

                return Authentication(user["id"], user["username"])
            except AccessDenied:
                raise InvalidCredentials()


    async def pass_login_credentials(self, step: str, credentials: Dict[str, str], cookies: List[Dict[str, str]]) -> \
            Union[NextStep, Authentication]:
        session_cookies = {
            cookie['name']: cookie['value']
            for cookie in cookies if cookie['name']
        }
        self.http_client.update_cookies(session_cookies)
        api_key = re.search(
            r"^http://127\.0\.0\.1:7157/gogg2itchintegration#access_token=(.+)",
            credentials["end_uri"])
        key = api_key.group(1)
        self.store_credentials({"access_token": key})

        user = await self.get_user_data(key)
        return Authentication(user["id"], user["username"])

    async def get_owned_games(self):
        page = 1
        games = []
        while True:
            try:
                resp = await self.http_client.get(
                    f"https://api.itch.io/profile/owned-keys?page={page}")
            except AuthenticationRequired:
                self.lost_authentication()
                raise
            if len(resp.get("owned_keys")) == 0:
                return games
            self.parse_json_into_games(resp.get("owned_keys"), games)
            page += 1
        return games

    async def get_user_data(self, api_key):
        resp = await self.http_client.get(f"https://api.itch.io/profile?")
        self.authenticated = True
        return resp.get("user")

    @staticmethod
    def parse_json_into_games(resp, games):
        for key in resp:
            game = key.get("game")
            if not game.get("classification") == "game":
                continue
            game_name = game.get("title")
            game_num = game.get("id")
            logging.debug('Parsed %s, %s', game_name, game_num)
            this_game = Game(game_id=game_num,
                             game_title=game_name,
                             license_info=LicenseInfo(
                                 LicenseType.SinglePurchase),
                             dlcs=[])
            games.append(this_game)
Example #35
0
class Site(object):
    def __init__(self, ):
        self.root = os.environ.get("WORKSPACE", ".")
        self.hostname = "fmei.sinaapp.com"
        
    def real_path(self, path):
        return os.path.join(self.root, path)
    
if __name__ == "__main__":
    init_path()
    FORMAT = "%(asctime)s %(name)s T[%(thread)d]P[%(process)d] %(levelname)8s %(message)s"
    logging.basicConfig(level=logging.DEBUG, format=FORMAT, stream=sys.stdout)
    from http_client import HTTPClient
    
    if len(sys.argv) == 2:
        pid = sys.argv[1]
        
        http = HTTPClient()
        task = GetTaokDetail(http)
        if os.environ.get("http_proxy", ""):
            http.set_proxy({'http': os.environ.get("http_proxy", "")})
        task.open_file(u"%s_cate_output.txt" % pid)
        data = task.generate_cate_tree(Site(), http, pid)
        task.close()
    else:
        print "python pgenerate_cate_tree <pid>"

    print "done"
    
    
class QianzhanClient(object):
    def __init__(self, userId, password):
        # self._userId = userId
        # self._password = password
        self._http_client = HTTPClient()
        pass

    """+++++++++++++++++++login++++++++++++"""

    # def _per_login(self):
    #     login_page_url = "http://qiye.qianzhan.com/usercenter/login?ReturnUrl=http%3A%2F%2Fqiye.qianzhan.com%2F"
    #     response = self._http_client.get(login_page_url)
    #     return self._get_varifyimage(True)

    def _get_varifyimage(self, is_first=False):
        if is_first:
            varifyimage_url = "http://qiye.qianzhan.com/usercenter/varifyimage"
        else:
            varifyimage_url = "http://qiye.qianzhan.com/usercenter/varifyimage?" + str(
                random.random())

        response = self._http_client.get(varifyimage_url)
        # logging.debug("verifyimage: %s" % response.content)
        varifycode = read_body_to_string(response.content)
        logging.debug("varifycode: %s" % varifycode.replace(' ', ''))
        return varifycode.replace(' ', '')

    #
    # def _do_login(self, varifycode, max_times=10):
    #     form_data = {
    #         "userId": self._userId,
    #         "password": self._password,
    #         "VerifyCode": varifycode,
    #         "sevenDays": "false"
    #     }
    #     login_url = "http://qiye.qianzhan.com/usercenter/dologin"
    #     response = self._http_client.post(login_url, form_data)
    #     logging.debug("text: %s" % response.text)
    #
    #     try:
    #         json_obj = json.loads(response.text)
    #     except Exception, e:
    #         json_obj = {"isSuccess": False, "sMsg": "is html return"}
    #         pass
    #
    #     logging.debug("json_obj: %s" % json_obj)
    #
    #     if not json_obj.get("isSuccess"):
    #         # print json_obj.get("sMsg")
    #         max_times -= 1
    #         if max_times > 0:
    #             varifycode = self._get_varifyimage()
    #             return self._do_login(varifycode, max_times)
    #         else:
    #             return False
    #     # print json_obj.get("sMsg")
    #     logging.info("cookie: %s" % response.cookies.get_dict())
    #     return True

    # def login(self):
    #     # print "++++++++++++++login+++++++++++++++++"
    #     varifycode = self._per_login()
    #     is_success = self._do_login(varifycode)
    #     return is_success
    '''++++++++++++++++++userverify+++++++++++++++++++'''

    def _pre_varify(self, url):
        try:
            response = self._http_client.get(url)
        except Exception, e:
            pass
        return self._get_varifyimage()