コード例 #1
0
ファイル: twitter_api.py プロジェクト: spacelis/tcrawl
def rest(api_path, **kargs):
    """REST API"""
    host = 'api.twitter.com'
    path = buildpath(api_path, kargs)
    ret = None
    while True:
        try:
            resp = api_call(host, path, True)
            ret = json.loads(resp.read())
            return ret
        except APIError as twe:
            if twe.code == 400:
                rate = int(twe.resp.getheader('X-RateLimit-Remaining', 0))
                if rate < 1:
                    sleeptime = int(twe.resp.getheader('X-RateLimit-Reset')) \
                            - int(time.time())
                    logging.warning( \
                        'Rate limits exceeded, retry after {0} sec'.format( \
                        sleeptime))
                    sleep(sleeptime)
            elif twe.code == 503:
                logging.warning('Service Unavailable. Retry after 1 min')
                sleep(60)
            else:
                raise twe
コード例 #2
0
ファイル: twitter_api.py プロジェクト: spacelis/tcrawl
def search(**kargs):
    """Search API"""
    host = 'search.twitter.com'
    api_path = '/search.json'
    kargs['result_type'] = 'recent'
    path = buildpath(api_path, kargs)
    statuses = list()
    while True:
        try:
            resp = api_call(host, path, True)
            jresp = json.loads(resp.read())
            statuses.extend(jresp['results'])
            if 'next_page' in jresp:
                path = api_path + jresp['next_page']
                continue
        except APIError as twe:
            if twe.code == 420:
                if twe.resp.read().find('limited') > 0:
                    sleeptime = int(twe.resp.getheader('Retry-After')) + 1
                    logging.warning(
                        'Rate limits exceeded, retry after {0} sec'.format(\
                        sleeptime))
                    sleep(sleeptime)
                    continue
            elif twe.code == 403:
                #if PAGE.search(twe.resp.read())!=None:
                    #break
                if twe.resp.read().find('since_id') > 0:
                    path = SINCEID_PATTERN.sub(r'\1', path)
                    continue
        break
    return statuses
コード例 #3
0
ファイル: twitter_api.py プロジェクト: spacelis/tcrawl
def search(**kargs):
    """Search API"""
    host = 'search.twitter.com'
    api_path = '/search.json'
    kargs['result_type'] = 'recent'
    path = buildpath(api_path, kargs)
    statuses = list()
    while True:
        try:
            resp = api_call(host, path, True)
            jresp = json.loads(resp.read())
            statuses.extend(jresp['results'])
            if 'next_page' in jresp:
                path = api_path + jresp['next_page']
                continue
        except APIError as twe:
            if twe.code == 420:
                if twe.resp.read().find('limited') > 0:
                    sleeptime = int(twe.resp.getheader('Retry-After')) + 1
                    logging.warning(
                        'Rate limits exceeded, retry after {0} sec'.format(\
                        sleeptime))
                    sleep(sleeptime)
                    continue
            elif twe.code == 403:
                #if PAGE.search(twe.resp.read())!=None:
                #break
                if twe.resp.read().find('since_id') > 0:
                    path = SINCEID_PATTERN.sub(r'\1', path)
                    continue
        break
    return statuses
コード例 #4
0
ファイル: google_api.py プロジェクト: spacelis/tcrawl
def websearch(**kargs):
    """Google web search API
        http://code.google.com/apis/websearch/
    """
    host = 'ajax.googleapis.com'
    api_path = '/ajax/services/search/web'
    sresults = list()
    if 'rsz' not in kargs:
        kargs['rsz'] = 8
    if 'start' not in kargs:
        kargs['start'] = 0
    while True:
        time.sleep(random.expovariate(0.05))
        path = buildpath(api_path, kargs)
        try:
            resp = api_call(host, path, True)
            jresp = json.loads(resp.read())
            print resp[:50]
            kargs['start'] += kargs['rsz']
            sresults.extend(jresp['responseData']['results'])
            if len(jresp['responseData']['cursor']) == 0:
                break
            elif jresp['responseData']['cursor']['pages'][-1]['start'] \
                    < kargs['start']:
                break
            if kargs['start'] > 50:
                break
        except APIError:
            print 'Deception Failed!'
            time.sleep(3600)
        except StandardError:
            traceback.print_exc(file=sys.stdout)
            time.sleep(3600)
    return sresults
コード例 #5
0
ファイル: foursq_api.py プロジェクト: spacelis/tcrawl
def search(**kargs):
    """Foursquare API"""
    host = 'api.foursquare.com'
    api_path = '/v2/venues/search'
    kargs['oauth_token'] = '31JU4VJBLV4SOFWMP2W13XIZEZDYIK5E3LH3PJ3TXTQY1HMF'
    path = buildpath(api_path, kargs)
    while True:
        try:
            resp = api_call(host, path, True)
            ret = json.loads(resp.read())
        except APIError as twe:
            if twe.code == 403:
                sleeptime = 600
                logging.warning( \
                    'Rate limits exceeded, retry after {0} sec'.format( \
                    sleeptime))
                time.sleep(sleeptime)
                continue
        break
    return ret
コード例 #6
0
def search(**kargs):
    """Foursquare API"""
    host = 'api.foursquare.com'
    api_path = '/v2/venues/search'
    kargs['oauth_token'] = '31JU4VJBLV4SOFWMP2W13XIZEZDYIK5E3LH3PJ3TXTQY1HMF'
    path = buildpath(api_path, kargs)
    while True:
        try:
            resp = api_call(host, path, True)
            ret = json.loads(resp.read())
        except APIError as twe:
            if twe.code == 403:
                sleeptime = 600
                logging.warning( \
                    'Rate limits exceeded, retry after {0} sec'.format( \
                    sleeptime))
                time.sleep(sleeptime)
                continue
        break
    return ret
コード例 #7
0
def searchrequest(**kargs):
    """Bing's search API
    """
    host = 'api.bing.net'
    #host = 'api.search.live.net'
    api_path = 'json.aspx'
    kargs['Web.Count'] = 50
    kargs['Web.Offset'] = 0
    path = buildpath(api_path, kargs)
    sresults = list()
    while True:
        try:
            time.sleep(1)
            resp = api_call2(host, path, False)
            jresp = json.loads(resp.read())
            sresults.extend(jresp['SearchResponse']['Web']['Results'])
            break
        except APIError as err:
            traceback.print_exc(file=sys.stdout)
            print err.code
            print err.resp.read()
    return sresults
コード例 #8
0
ファイル: twitter_api.py プロジェクト: spacelis/tcrawl
def rest(api_path, **kargs):
    """REST API"""
    host = 'api.twitter.com'
    path = buildpath(api_path, kargs)
    ret = None
    while True:
        try:
            resp = api_call(host, path, True)
            ret = json.loads(resp.read())
            return ret
        except APIError as twe:
            if twe.code == 400:
                rate = int(twe.resp.getheader('X-RateLimit-Remaining', 0))
                if rate < 1:
                    sleeptime = int(twe.resp.getheader('X-RateLimit-Reset')) \
                            - int(time.time())
                    logging.warning( \
                        'Rate limits exceeded, retry after {0} sec'.format( \
                        sleeptime))
                    sleep(sleeptime)
            elif twe.code == 503:
                logging.warning('Service Unavailable. Retry after 1 min')
                sleep(60)
            else: raise twe