Ejemplo n.º 1
0
def run():
    logger.i('rmq_channel.start_consuming')
    _pika = None
    rmq_channel = None
    fail_times = 0
    while True and not should_stop and fail_times < max_fail_times:
        try:
            _pika = pika.BlockingConnection(
                pika.ConnectionParameters(
                    **settings.RABBITMQ['interception_download_urls']))
            pikas.append(_pika)
            rmq_channel = _pika.channel()
            rmq_channel.queue_declare(queue='downloadurls', durable=True)
            rmq_channel.basic_qos(prefetch_count=1)
            rmq_channel.basic_consume(callback, queue='downloadurls')
            rmq_channels.append(rmq_channel)
            rmq_channel.start_consuming()
        except Exception as e:
            fail_times = fail_times + 1
            logger.e('error consuming: %s' % e)
            if rmq_channel in rmq_channels:
                try:
                    rmq_channel.stop_consuming()
                    rmq_channel.close()
                except:
                    pass
                rmq_channels.remove(rmq_channel)
            if _pika in pikas:
                try:
                    _pika.close()
                except:
                    pass
                pikas.remove(_pika)
            time.sleep(5)
Ejemplo n.º 2
0
def run():
    logger.i('rmq_channel.start_consuming')
    _pika = None
    rmq_channel = None
    fail_times = 0
    while True and not should_stop and fail_times < max_fail_times:
        try:
            _pika = pika.BlockingConnection(
                pika.ConnectionParameters(**settings.RABBITMQ['interception_download_urls']))
            pikas.append(_pika)
            rmq_channel = _pika.channel()
            rmq_channel.queue_declare(queue='downloadurls', durable=True)
            rmq_channel.basic_qos(prefetch_count=1)
            rmq_channel.basic_consume(callback, queue='downloadurls')
            rmq_channels.append(rmq_channel)
            rmq_channel.start_consuming()
        except Exception as e:
            fail_times = fail_times + 1
            logger.e('error consuming: %s' % e)
            if rmq_channel in rmq_channels:
                try:
                    rmq_channel.stop_consuming()
                    rmq_channel.close()
                except:
                    pass
                rmq_channels.remove(rmq_channel)
            if _pika in pikas:
                try:
                    _pika.close()
                except:
                    pass
                pikas.remove(_pika)
            time.sleep(5)
Ejemplo n.º 3
0
def get_real_url(url):
    oldurl = url
    if url.find('http://www.appchina.com') == 0:
        url = no_redirect_opener.open(url).headers.dict['location']
    if url.find('http://www.d.appchina.com') == 0:
        url = no_redirect_opener.open(url).headers.dict['location']
    logger.i('get_real_url for url(%s) return url: %s' % (oldurl, url))
    return url
Ejemplo n.º 4
0
def update_apps_inpage(id_from, id_to, count, hash):
    id_from, id_to, count, hash = int(id_from), int(id_to), int(count), int(hash)
    apps = get_final_app(id_from, id_to, count, hash)
    while apps:
        id_from = apps[len(apps) - 1][0]
        apps = [(app[1], app[2]) for app in apps]
        suc_count = update_apps_labels(apps)
        logger.i('handled %s apps. success: %s fail: %s' % (len(apps), suc_count, (len(apps) - suc_count)))
        apps = get_final_app(id_from, id_to, count, hash)
Ejemplo n.º 5
0
def process(url):
    logger.i('process url: %s' % url)
    if hosts_regex.match(url):
        logger.i('ignore url: %s' % url)
        return
    filepath = '/tmp/interception_%s_%s.apk' % (
        os.getpid(), threading.current_thread().name)
    Net.download(url, filepath)
    package_name, version_code, version_name, sdk_version = parse_apk(filepath)
    update_avail_download_links(package_name, version_code, url)
Ejemplo n.º 6
0
def process(url):
    logger.i('process url: %s' % url)
    if hosts_regex.match(url):
        logger.i('ignore url: %s' % url)
        return
    filepath = '/tmp/interception_%s_%s.apk' % (os.getpid(),
                                                threading.current_thread().name)
    Net.download(url, filepath)
    package_name, version_code, version_name, sdk_version = parse_apk(filepath)
    update_avail_download_links(package_name, version_code, url)
Ejemplo n.º 7
0
def url404(url, url1=None):
    try:
        urllib2.urlopen(HeadRequest(url))
        return False
    except urllib2.HTTPError as e:
        return e.code == 404
    except Exception as e:
        logger.i('request url(%s) failed: %s' % (url, e))
        if url1:
            return url404(url1)
        else:
            return False
Ejemplo n.º 8
0
def url404(url, url1=None):
    try:
        urllib2.urlopen(HeadRequest(url))
        return False
    except urllib2.HTTPError as e:
        return e.code == 404
    except Exception as e:
        logger.i('request url(%s) failed: %s' % (url, e))
        if url1:
            return url404(url1)
        else:
            return False
Ejemplo n.º 9
0
def reform_interception_url(url):
    can_remove_param_hosts = ['118.123.97.191', 'download.taobaocdn.com',
                              'download.alipay.com', 'bs.baidu.com', 'cache.3g.cn',
                              'gamecache.3g.cn', 'a4.img.3366.com', 'apps.wandoujia.com',
                              '[0-9]+.[0-9]+.[0-9]+.[0-9]+/down.myapp.com']
    r = re.compile('^http://(%s)/.*.apk\?.*$' % '|'.join(can_remove_param_hosts))
    if r.match(url):
        ret = url[0: url.find('.apk?') + 4]
        logger.i('reform url: %s to url: %s' % (url, ret))
        return ret
    else:
        return url
Ejemplo n.º 10
0
def update_apps_labels(apps):
    suc_count = 0
    for pn, vc in apps:
        try:
            labels = get_wandoujia_app_labels(pn, vc)
            if labels:
                update_app(pn, vc, labels)
                suc_count += 1
                logger.i('update app(pn=%s, vc=%s) success.' % (pn, vc))
            else:
                logger.i('ignore app(pn=%s, vc=%s).' % (pn, vc))
        except Exception as e:
            logger.e('update app(pn=%s, vc=%s) failed: %s' % (pn, vc, e))
    return suc_count
Ejemplo n.º 11
0
def reform_interception_url(url):
    can_remove_param_hosts = [
        '118.123.97.191', 'download.taobaocdn.com', 'download.alipay.com',
        'bs.baidu.com', 'cache.3g.cn', 'gamecache.3g.cn', 'a4.img.3366.com',
        'apps.wandoujia.com', '[0-9]+.[0-9]+.[0-9]+.[0-9]+/down.myapp.com'
    ]
    r = re.compile('^http://(%s)/.*.apk\?.*$' %
                   '|'.join(can_remove_param_hosts))
    if r.match(url):
        ret = url[0:url.find('.apk?') + 4]
        logger.i('reform url: %s to url: %s' % (url, ret))
        return ret
    else:
        return url
Ejemplo n.º 12
0
def graceful_exit(*args, **kargs):
    logger.i('request rabbit_consumer stop(%s, %s). stopping...' % (args, kargs))
    global should_stop
    should_stop = True
    for c in rmq_channels:
        try:
            c.stop_consuming()
            c.close()
        except Exception as e:
            logger.e('stop_consuming failed: %s' % e)
    for p in pikas:
        try:
            p.close()
        except Exception as e:
            logger.e('close rabbit connection failed: %s' % e)
Ejemplo n.º 13
0
def graceful_exit(*args, **kargs):
    logger.i('request rabbit_consumer stop(%s, %s). stopping...' %
             (args, kargs))
    global should_stop
    should_stop = True
    for c in rmq_channels:
        try:
            c.stop_consuming()
            c.close()
        except Exception as e:
            logger.e('stop_consuming failed: %s' % e)
    for p in pikas:
        try:
            p.close()
        except Exception as e:
            logger.e('close rabbit connection failed: %s' % e)
Ejemplo n.º 14
0
def update_apps_avail_download_links(apps):
    suc_count = 0
    for pn, vc in apps:
        try:
            avail_download_links = get_final_app_cols(pn, vc, ['avail_download_links'])
            if avail_download_links:
                avail_download_links = avail_download_links[1]
                avail_download_links = update_appchina_real_downloadlink(avail_download_links)
            if avail_download_links:
                update_app(pn, vc, avail_download_links=avail_download_links)
                suc_count += 1
                logger.i('update app(pn=%s, vc=%s) success.' % (pn, vc))
            else:
                logger.i('ignore app(pn=%s, vc=%s).' % (pn, vc))
        except Exception as e:
            logger.e('update app(pn=%s, vc=%s) failed: %s' % (pn, vc, e))
    return suc_count
Ejemplo n.º 15
0
def handle_download_links(urls):
    accepted_domains = [
        "apkc.mumayi.com",
        "dl.coolapk.com",
        "qd.cache.baidupcs.com",
        "cdn6.down.apk.gfan.com",
        "file.liqucn.com",
        "smsftp.3g.cn",
        "dldir1.qq.com",
        "a4.img.3366.com",
        "static.nduoa.com",
        "download.taobaocdn.com",
        "wap.apk.anzhi.com",
        "cache.3g.cn",
        "bcscdn.baidu.com",
        "file.m.163.com",
        "down5.game.uc.cn",
        "dl.m.duoku.com",
        "cdn.market.hiapk.com",
        "gdown.baidu.com",
        "bs.baidu.com",
    ]
    # not_accepted_domains = ['hiapk.com', ]
    us = urls.split(' ')

    def get_domain(url):
        r = re.compile('http[s]*://([^/]+)/.*')
        m = r.match(url)
        if m:
            return m.groups()[0]
        return None

    us.reverse()
    for u in us:
        if get_domain(u) not in accepted_domains:
            logger.i('url too long, will remove %s' % u)
            us.remove(u)
            us.reverse()
            return ' '.join(us)
Ejemplo n.º 16
0
def update_app(package_name, version_code, labels=None, avail_download_links=None):
    try:
        updatecols = {}
        if labels:
            updatecols['labels'] = labels
        if avail_download_links:
            updatecols['avail_download_links'] = avail_download_links
        if not updatecols.keys():
            logger.i('nothing need to update')
            return
        c = _db.cursor()
        updatecols = ["%s='%s'" % (k, updatecols[k].replace("'", '')) for k in updatecols.keys()]
        sql = '''
        update final_app set %s, status=status&0xfffffffffffffff8, updated_at=now()
        where version_code=%s and package_name='%s'
        ''' % (','.join(updatecols), version_code, package_name.replace("'", ''))
        logger.d(sql)
        ret = c.execute(sql)
        logger.i('updated %s rows.' % ret)
        _db.conn.commit()
    finally:
        c.close()
Ejemplo n.º 17
0
def main(apps=None, target='labels', id_from=max_id, id_to=stop_id, hash=hash, count=count):
    if not apps:
        logger.i('update_apps_inpage start...')
        update_apps_inpage(id_from, id_to, count, hash)
        logger.i('update_apps_inpage end...')
    else:
        logger.i('update_apps start...')
        apps = [a.split('-') for a in apps.split(',')]
        target = 'update_apps_%s' % target
        func = getattr(sys.modules[__name__], target)
        if callable(func):
            func(apps)
        else:
            raise Exception('attibute: %s is not callable' % func)
        logger.i('update_apps end...')
Ejemplo n.º 18
0
def handle_download_links(urls):
    accepted_domains = ["apkc.mumayi.com", "dl.coolapk.com", "qd.cache.baidupcs.com", "cdn6.down.apk.gfan.com",
                        "file.liqucn.com", "smsftp.3g.cn", "dldir1.qq.com", "a4.img.3366.com", "static.nduoa.com",
                        "download.taobaocdn.com", "wap.apk.anzhi.com", "cache.3g.cn", "bcscdn.baidu.com",
                        "file.m.163.com", "down5.game.uc.cn", "dl.m.duoku.com", "cdn.market.hiapk.com",
                        "gdown.baidu.com", "bs.baidu.com", ]
    # not_accepted_domains = ['hiapk.com', ]
    us = urls.split(' ')

    def get_domain(url):
        r = re.compile('http[s]*://([^/]+)/.*')
        m = r.match(url)
        if m:
            return m.groups()[0]
        return None

    us.reverse()
    for u in us:
        if get_domain(u) not in accepted_domains:
            logger.i('url too long, will remove %s' % u)
            us.remove(u)
            us.reverse()
            return ' '.join(us)
Ejemplo n.º 19
0
def update_avail_download_links(packagename, version_code, url):
    logger.i('update_avail_download_links packagename: %s, version_code: %s, url: %s' %
             (packagename, version_code, url))
    id, urls = get_url(packagename, version_code)
    if urls and urls.find(url) != -1:
        logger.i('final app already has url: %s' % url)
        return
    elif not urls:
        logger.i('final app not found')
        return
    else:
        new_urls = '%s %s' % (urls, url)
        new_urls = reform_urls(new_urls)
        while len(new_urls) > 2048:
            new_urls1 = handle_download_links(new_urls)
            if new_urls1 == new_urls:
                break
            new_urls = new_urls1
        if len(new_urls) > 2048:
            logger.e('new url too long: %s, will not process.' % new_urls)
            return
        update_finalapp(id, new_urls)
Ejemplo n.º 20
0
def update_avail_download_links(packagename, version_code, url):
    logger.i(
        'update_avail_download_links packagename: %s, version_code: %s, url: %s'
        % (packagename, version_code, url))
    id, urls = get_url(packagename, version_code)
    if urls and urls.find(url) != -1:
        logger.i('final app already has url: %s' % url)
        return
    elif not urls:
        logger.i('final app not found')
        return
    else:
        new_urls = '%s %s' % (urls, url)
        new_urls = reform_urls(new_urls)
        while len(new_urls) > 2048:
            new_urls1 = handle_download_links(new_urls)
            if new_urls1 == new_urls:
                break
            new_urls = new_urls1
        if len(new_urls) > 2048:
            logger.e('new url too long: %s, will not process.' % new_urls)
            return
        update_finalapp(id, new_urls)
Ejemplo n.º 21
0
def process(id):
    logger.i('process id: %s' % id)
    file_path, vol_id = get_url(id)
    if file_path:
        data = '{"authkey":"9ORmsDOAJ3zcD21w", "url":"http://estoredwnld7.189store.com/downloads/vol%s/%s"}' % \
            (vol_id, file_path)
        url = 'http://%s/vol%s/%s' % ('estoredwnld7.189store.com', vol_id, file_path)
        url1 = 'http://%s/vol%s/%s' % (download_host, vol_id, file_path)
        if url404(url, url1):
            logger.i('found app(id=%s) url(%s) 404' % (id, url))
            update_finalapp_safe(id, 4, '')
            return True
        tencent_url = 'https://api.scan.qq.com/browser/scan'
        ret = ''
        try:
            ret = post(tencent_url, data)
            ret = ret.replace('\r', '')
            ret = ret.replace('\n', '')
            ret = simplejson.loads(ret)
        except Exception as e:
            logger.e(u'ret format error: %s' % ret)
            raise e
        logger.d('tencent returns: %s' % simplejson.dumps(ret))
        if 'safetype' not in ret:
            raise 'error format: %s' % simplejson.dumps(ret)
        if ret['safetype'] == 'unknown':
            update_finalapp_safe(id, None, '')
            return False
        if ret['safetype'] == 'virus':
            logger.i('found app(id=%s) has virus.' % id)
            update_finalapp_safe(id, 0, simplejson.dumps(ret))
            return True
        if ret['safetype'] == 'lowrisk':
            logger.i('found app(id=%s) lowrisk.' % id)
            update_finalapp_safe(id, 2, simplejson.dumps(ret))
            return True
        if ret['safetype'] == 'midrisk':
            logger.i('found app(id=%s) midrisk.' % id)
            update_finalapp_safe(id, 3, simplejson.dumps(ret))
            return True
        if ret['safetype'] == 'safe':
            logger.i('found app(id=%s) safe.' % id)
            update_finalapp_safe(id, 1, '')
            return True
    else:
        logger.i('can not find id in db, ignore: %s' % id)
        return True
Ejemplo n.º 22
0
def process(id):
    logger.i('process id: %s' % id)
    file_path, vol_id = get_url(id)
    if file_path:
        data = '{"authkey":"9ORmsDOAJ3zcD21w", "url":"http://estoredwnld7.189store.com/downloads/vol%s/%s"}' % \
            (vol_id, file_path)
        url = 'http://%s/vol%s/%s' % ('estoredwnld7.189store.com', vol_id,
                                      file_path)
        url1 = 'http://%s/vol%s/%s' % (download_host, vol_id, file_path)
        if url404(url, url1):
            logger.i('found app(id=%s) url(%s) 404' % (id, url))
            update_finalapp_safe(id, 4, '')
            return True
        tencent_url = 'https://api.scan.qq.com/browser/scan'
        ret = ''
        try:
            ret = post(tencent_url, data)
            ret = ret.replace('\r', '')
            ret = ret.replace('\n', '')
            ret = simplejson.loads(ret)
        except Exception as e:
            logger.e(u'ret format error: %s' % ret)
            raise e
        logger.d('tencent returns: %s' % simplejson.dumps(ret))
        if 'safetype' not in ret:
            raise 'error format: %s' % simplejson.dumps(ret)
        if ret['safetype'] == 'unknown':
            update_finalapp_safe(id, None, '')
            return False
        if ret['safetype'] == 'virus':
            logger.i('found app(id=%s) has virus.' % id)
            update_finalapp_safe(id, 0, simplejson.dumps(ret))
            return True
        if ret['safetype'] == 'lowrisk':
            logger.i('found app(id=%s) lowrisk.' % id)
            update_finalapp_safe(id, 2, simplejson.dumps(ret))
            return True
        if ret['safetype'] == 'midrisk':
            logger.i('found app(id=%s) midrisk.' % id)
            update_finalapp_safe(id, 3, simplejson.dumps(ret))
            return True
        if ret['safetype'] == 'safe':
            logger.i('found app(id=%s) safe.' % id)
            update_finalapp_safe(id, 1, '')
            return True
    else:
        logger.i('can not find id in db, ignore: %s' % id)
        return True