コード例 #1
0
ファイル: pic_service_api.py プロジェクト: spacelis/tcrawl
def get_twit_pic(**kargs):
    """Retrieve the picture from TwitPic"""
    twitpage = api_call(*urlsplit(kargs["url"])).read()
    anchor = '<img class="photo" id="photo-display" src="'
    start = twitpage.index(anchor) + len(anchor)
    end = twitpage.index('"', start)
    imgurl = twitpage[start:end]
    return api_call(*urlsplit(imgurl)).read()
コード例 #2
0
ファイル: pic_service_api.py プロジェクト: spacelis/tcrawl
def get_twit_pic(**kargs):
    """Retrieve the picture from TwitPic"""
    twitpage = api_call(*urlsplit(kargs['url'])).read()
    anchor = '<img class="photo" id="photo-display" src="'
    start = twitpage.index(anchor) + len(anchor)
    end = twitpage.index('"', start)
    imgurl = twitpage[start:end]
    return api_call(*urlsplit(imgurl)).read()
コード例 #3
0
ファイル: pic_service_api.py プロジェクト: spacelis/tcrawl
def get_tweetphoto_pic(**kargs):
    """Retrieve the picture from TweetPhoto or Plixi.com
    """
    pic_page = api_call(*urlsplit(kargs["url"])).read()
    anchor = '" alt="" id="photo"'
    end = pic_page.find(anchor)
    start = pic_page.rfind('"', 0, end) + 1
    imgurl = pic_page[start:end]
    return api_call(*urlsplit(imgurl)).read()
コード例 #4
0
ファイル: pic_service_api.py プロジェクト: spacelis/tcrawl
def get_tweetphoto_pic(**kargs):
    """Retrieve the picture from TweetPhoto or Plixi.com
    """
    pic_page = api_call(*urlsplit(kargs['url'])).read()
    anchor = '" alt="" id="photo"'
    end = pic_page.find(anchor)
    start = pic_page.rfind('"', 0, end) + 1
    imgurl = pic_page[start:end]
    return api_call(*urlsplit(imgurl)).read()
コード例 #5
0
ファイル: twitter_api.py プロジェクト: spacelis/tcrawl
def rest(api_path, **kargs):
    """REST API"""
    host = 'api.twitter.com'
    path = buildpath(api_path, kargs)
    ret = None
    while True:
        try:
            resp = api_call(host, path, True)
            ret = json.loads(resp.read())
            return ret
        except APIError as twe:
            if twe.code == 400:
                rate = int(twe.resp.getheader('X-RateLimit-Remaining', 0))
                if rate < 1:
                    sleeptime = int(twe.resp.getheader('X-RateLimit-Reset')) \
                            - int(time.time())
                    logging.warning( \
                        'Rate limits exceeded, retry after {0} sec'.format( \
                        sleeptime))
                    sleep(sleeptime)
            elif twe.code == 503:
                logging.warning('Service Unavailable. Retry after 1 min')
                sleep(60)
            else:
                raise twe
コード例 #6
0
ファイル: twitter_api.py プロジェクト: spacelis/tcrawl
def search(**kargs):
    """Search API"""
    host = 'search.twitter.com'
    api_path = '/search.json'
    kargs['result_type'] = 'recent'
    path = buildpath(api_path, kargs)
    statuses = list()
    while True:
        try:
            resp = api_call(host, path, True)
            jresp = json.loads(resp.read())
            statuses.extend(jresp['results'])
            if 'next_page' in jresp:
                path = api_path + jresp['next_page']
                continue
        except APIError as twe:
            if twe.code == 420:
                if twe.resp.read().find('limited') > 0:
                    sleeptime = int(twe.resp.getheader('Retry-After')) + 1
                    logging.warning(
                        'Rate limits exceeded, retry after {0} sec'.format(\
                        sleeptime))
                    sleep(sleeptime)
                    continue
            elif twe.code == 403:
                #if PAGE.search(twe.resp.read())!=None:
                #break
                if twe.resp.read().find('since_id') > 0:
                    path = SINCEID_PATTERN.sub(r'\1', path)
                    continue
        break
    return statuses
コード例 #7
0
ファイル: twitter_api.py プロジェクト: spacelis/tcrawl
def search(**kargs):
    """Search API"""
    host = 'search.twitter.com'
    api_path = '/search.json'
    kargs['result_type'] = 'recent'
    path = buildpath(api_path, kargs)
    statuses = list()
    while True:
        try:
            resp = api_call(host, path, True)
            jresp = json.loads(resp.read())
            statuses.extend(jresp['results'])
            if 'next_page' in jresp:
                path = api_path + jresp['next_page']
                continue
        except APIError as twe:
            if twe.code == 420:
                if twe.resp.read().find('limited') > 0:
                    sleeptime = int(twe.resp.getheader('Retry-After')) + 1
                    logging.warning(
                        'Rate limits exceeded, retry after {0} sec'.format(\
                        sleeptime))
                    sleep(sleeptime)
                    continue
            elif twe.code == 403:
                #if PAGE.search(twe.resp.read())!=None:
                    #break
                if twe.resp.read().find('since_id') > 0:
                    path = SINCEID_PATTERN.sub(r'\1', path)
                    continue
        break
    return statuses
コード例 #8
0
ファイル: google_api.py プロジェクト: spacelis/tcrawl
def websearch(**kargs):
    """Google web search API
        http://code.google.com/apis/websearch/
    """
    host = 'ajax.googleapis.com'
    api_path = '/ajax/services/search/web'
    sresults = list()
    if 'rsz' not in kargs:
        kargs['rsz'] = 8
    if 'start' not in kargs:
        kargs['start'] = 0
    while True:
        time.sleep(random.expovariate(0.05))
        path = buildpath(api_path, kargs)
        try:
            resp = api_call(host, path, True)
            jresp = json.loads(resp.read())
            print resp[:50]
            kargs['start'] += kargs['rsz']
            sresults.extend(jresp['responseData']['results'])
            if len(jresp['responseData']['cursor']) == 0:
                break
            elif jresp['responseData']['cursor']['pages'][-1]['start'] \
                    < kargs['start']:
                break
            if kargs['start'] > 50:
                break
        except APIError:
            print 'Deception Failed!'
            time.sleep(3600)
        except StandardError:
            traceback.print_exc(file=sys.stdout)
            time.sleep(3600)
    return sresults
コード例 #9
0
ファイル: foursq_api.py プロジェクト: spacelis/tcrawl
def search(**kargs):
    """Foursquare API"""
    host = 'api.foursquare.com'
    api_path = '/v2/venues/search'
    kargs['oauth_token'] = '31JU4VJBLV4SOFWMP2W13XIZEZDYIK5E3LH3PJ3TXTQY1HMF'
    path = buildpath(api_path, kargs)
    while True:
        try:
            resp = api_call(host, path, True)
            ret = json.loads(resp.read())
        except APIError as twe:
            if twe.code == 403:
                sleeptime = 600
                logging.warning( \
                    'Rate limits exceeded, retry after {0} sec'.format( \
                    sleeptime))
                time.sleep(sleeptime)
                continue
        break
    return ret
コード例 #10
0
def search(**kargs):
    """Foursquare API"""
    host = 'api.foursquare.com'
    api_path = '/v2/venues/search'
    kargs['oauth_token'] = '31JU4VJBLV4SOFWMP2W13XIZEZDYIK5E3LH3PJ3TXTQY1HMF'
    path = buildpath(api_path, kargs)
    while True:
        try:
            resp = api_call(host, path, True)
            ret = json.loads(resp.read())
        except APIError as twe:
            if twe.code == 403:
                sleeptime = 600
                logging.warning( \
                    'Rate limits exceeded, retry after {0} sec'.format( \
                    sleeptime))
                time.sleep(sleeptime)
                continue
        break
    return ret
コード例 #11
0
ファイル: twitter_api.py プロジェクト: spacelis/tcrawl
def rest(api_path, **kargs):
    """REST API"""
    host = 'api.twitter.com'
    path = buildpath(api_path, kargs)
    ret = None
    while True:
        try:
            resp = api_call(host, path, True)
            ret = json.loads(resp.read())
            return ret
        except APIError as twe:
            if twe.code == 400:
                rate = int(twe.resp.getheader('X-RateLimit-Remaining', 0))
                if rate < 1:
                    sleeptime = int(twe.resp.getheader('X-RateLimit-Reset')) \
                            - int(time.time())
                    logging.warning( \
                        'Rate limits exceeded, retry after {0} sec'.format( \
                        sleeptime))
                    sleep(sleeptime)
            elif twe.code == 503:
                logging.warning('Service Unavailable. Retry after 1 min')
                sleep(60)
            else: raise twe
コード例 #12
0
ファイル: pic_service_api.py プロジェクト: spacelis/tcrawl
def get_twitgoo_pic(**kargs):
    """Retrieve the picture from TwitGoo
    """
    host, path, secure = urlsplit(kargs["url"])
    pic = api_call(host, path + "/img", secure).read()
    return pic
コード例 #13
0
ファイル: pic_service_api.py プロジェクト: spacelis/tcrawl
def get_yfrog_pic(**kargs):
    """Retrieve the picture from YFrog
    """
    host, path, secure = urlsplit(kargs["url"])
    pic = api_call(host, path + ":iphone", secure).read()
    return pic
コード例 #14
0
ファイル: pic_service_api.py プロジェクト: spacelis/tcrawl
def get_twitgoo_pic(**kargs):
    """Retrieve the picture from TwitGoo
    """
    host, path, secure = urlsplit(kargs['url'])
    pic = api_call(host, path + '/img', secure).read()
    return pic
コード例 #15
0
ファイル: pic_service_api.py プロジェクト: spacelis/tcrawl
def get_yfrog_pic(**kargs):
    """Retrieve the picture from YFrog
    """
    host, path, secure = urlsplit(kargs['url'])
    pic = api_call(host, path + ':iphone', secure).read()
    return pic