def run(): logger.i('rmq_channel.start_consuming') _pika = None rmq_channel = None fail_times = 0 while True and not should_stop and fail_times < max_fail_times: try: _pika = pika.BlockingConnection( pika.ConnectionParameters( **settings.RABBITMQ['interception_download_urls'])) pikas.append(_pika) rmq_channel = _pika.channel() rmq_channel.queue_declare(queue='downloadurls', durable=True) rmq_channel.basic_qos(prefetch_count=1) rmq_channel.basic_consume(callback, queue='downloadurls') rmq_channels.append(rmq_channel) rmq_channel.start_consuming() except Exception as e: fail_times = fail_times + 1 logger.e('error consuming: %s' % e) if rmq_channel in rmq_channels: try: rmq_channel.stop_consuming() rmq_channel.close() except: pass rmq_channels.remove(rmq_channel) if _pika in pikas: try: _pika.close() except: pass pikas.remove(_pika) time.sleep(5)
def run(): logger.i('rmq_channel.start_consuming') _pika = None rmq_channel = None fail_times = 0 while True and not should_stop and fail_times < max_fail_times: try: _pika = pika.BlockingConnection( pika.ConnectionParameters(**settings.RABBITMQ['interception_download_urls'])) pikas.append(_pika) rmq_channel = _pika.channel() rmq_channel.queue_declare(queue='downloadurls', durable=True) rmq_channel.basic_qos(prefetch_count=1) rmq_channel.basic_consume(callback, queue='downloadurls') rmq_channels.append(rmq_channel) rmq_channel.start_consuming() except Exception as e: fail_times = fail_times + 1 logger.e('error consuming: %s' % e) if rmq_channel in rmq_channels: try: rmq_channel.stop_consuming() rmq_channel.close() except: pass rmq_channels.remove(rmq_channel) if _pika in pikas: try: _pika.close() except: pass pikas.remove(_pika) time.sleep(5)
def get_real_url(url): oldurl = url if url.find('http://www.appchina.com') == 0: url = no_redirect_opener.open(url).headers.dict['location'] if url.find('http://www.d.appchina.com') == 0: url = no_redirect_opener.open(url).headers.dict['location'] logger.i('get_real_url for url(%s) return url: %s' % (oldurl, url)) return url
def update_apps_inpage(id_from, id_to, count, hash): id_from, id_to, count, hash = int(id_from), int(id_to), int(count), int(hash) apps = get_final_app(id_from, id_to, count, hash) while apps: id_from = apps[len(apps) - 1][0] apps = [(app[1], app[2]) for app in apps] suc_count = update_apps_labels(apps) logger.i('handled %s apps. success: %s fail: %s' % (len(apps), suc_count, (len(apps) - suc_count))) apps = get_final_app(id_from, id_to, count, hash)
def process(url): logger.i('process url: %s' % url) if hosts_regex.match(url): logger.i('ignore url: %s' % url) return filepath = '/tmp/interception_%s_%s.apk' % ( os.getpid(), threading.current_thread().name) Net.download(url, filepath) package_name, version_code, version_name, sdk_version = parse_apk(filepath) update_avail_download_links(package_name, version_code, url)
def process(url): logger.i('process url: %s' % url) if hosts_regex.match(url): logger.i('ignore url: %s' % url) return filepath = '/tmp/interception_%s_%s.apk' % (os.getpid(), threading.current_thread().name) Net.download(url, filepath) package_name, version_code, version_name, sdk_version = parse_apk(filepath) update_avail_download_links(package_name, version_code, url)
def url404(url, url1=None): try: urllib2.urlopen(HeadRequest(url)) return False except urllib2.HTTPError as e: return e.code == 404 except Exception as e: logger.i('request url(%s) failed: %s' % (url, e)) if url1: return url404(url1) else: return False
def reform_interception_url(url): can_remove_param_hosts = ['118.123.97.191', 'download.taobaocdn.com', 'download.alipay.com', 'bs.baidu.com', 'cache.3g.cn', 'gamecache.3g.cn', 'a4.img.3366.com', 'apps.wandoujia.com', '[0-9]+.[0-9]+.[0-9]+.[0-9]+/down.myapp.com'] r = re.compile('^http://(%s)/.*.apk\?.*$' % '|'.join(can_remove_param_hosts)) if r.match(url): ret = url[0: url.find('.apk?') + 4] logger.i('reform url: %s to url: %s' % (url, ret)) return ret else: return url
def update_apps_labels(apps): suc_count = 0 for pn, vc in apps: try: labels = get_wandoujia_app_labels(pn, vc) if labels: update_app(pn, vc, labels) suc_count += 1 logger.i('update app(pn=%s, vc=%s) success.' % (pn, vc)) else: logger.i('ignore app(pn=%s, vc=%s).' % (pn, vc)) except Exception as e: logger.e('update app(pn=%s, vc=%s) failed: %s' % (pn, vc, e)) return suc_count
def reform_interception_url(url): can_remove_param_hosts = [ '118.123.97.191', 'download.taobaocdn.com', 'download.alipay.com', 'bs.baidu.com', 'cache.3g.cn', 'gamecache.3g.cn', 'a4.img.3366.com', 'apps.wandoujia.com', '[0-9]+.[0-9]+.[0-9]+.[0-9]+/down.myapp.com' ] r = re.compile('^http://(%s)/.*.apk\?.*$' % '|'.join(can_remove_param_hosts)) if r.match(url): ret = url[0:url.find('.apk?') + 4] logger.i('reform url: %s to url: %s' % (url, ret)) return ret else: return url
def graceful_exit(*args, **kargs): logger.i('request rabbit_consumer stop(%s, %s). stopping...' % (args, kargs)) global should_stop should_stop = True for c in rmq_channels: try: c.stop_consuming() c.close() except Exception as e: logger.e('stop_consuming failed: %s' % e) for p in pikas: try: p.close() except Exception as e: logger.e('close rabbit connection failed: %s' % e)
def update_apps_avail_download_links(apps): suc_count = 0 for pn, vc in apps: try: avail_download_links = get_final_app_cols(pn, vc, ['avail_download_links']) if avail_download_links: avail_download_links = avail_download_links[1] avail_download_links = update_appchina_real_downloadlink(avail_download_links) if avail_download_links: update_app(pn, vc, avail_download_links=avail_download_links) suc_count += 1 logger.i('update app(pn=%s, vc=%s) success.' % (pn, vc)) else: logger.i('ignore app(pn=%s, vc=%s).' % (pn, vc)) except Exception as e: logger.e('update app(pn=%s, vc=%s) failed: %s' % (pn, vc, e)) return suc_count
def handle_download_links(urls): accepted_domains = [ "apkc.mumayi.com", "dl.coolapk.com", "qd.cache.baidupcs.com", "cdn6.down.apk.gfan.com", "file.liqucn.com", "smsftp.3g.cn", "dldir1.qq.com", "a4.img.3366.com", "static.nduoa.com", "download.taobaocdn.com", "wap.apk.anzhi.com", "cache.3g.cn", "bcscdn.baidu.com", "file.m.163.com", "down5.game.uc.cn", "dl.m.duoku.com", "cdn.market.hiapk.com", "gdown.baidu.com", "bs.baidu.com", ] # not_accepted_domains = ['hiapk.com', ] us = urls.split(' ') def get_domain(url): r = re.compile('http[s]*://([^/]+)/.*') m = r.match(url) if m: return m.groups()[0] return None us.reverse() for u in us: if get_domain(u) not in accepted_domains: logger.i('url too long, will remove %s' % u) us.remove(u) us.reverse() return ' '.join(us)
def update_app(package_name, version_code, labels=None, avail_download_links=None): try: updatecols = {} if labels: updatecols['labels'] = labels if avail_download_links: updatecols['avail_download_links'] = avail_download_links if not updatecols.keys(): logger.i('nothing need to update') return c = _db.cursor() updatecols = ["%s='%s'" % (k, updatecols[k].replace("'", '')) for k in updatecols.keys()] sql = ''' update final_app set %s, status=status&0xfffffffffffffff8, updated_at=now() where version_code=%s and package_name='%s' ''' % (','.join(updatecols), version_code, package_name.replace("'", '')) logger.d(sql) ret = c.execute(sql) logger.i('updated %s rows.' % ret) _db.conn.commit() finally: c.close()
def main(apps=None, target='labels', id_from=max_id, id_to=stop_id, hash=hash, count=count): if not apps: logger.i('update_apps_inpage start...') update_apps_inpage(id_from, id_to, count, hash) logger.i('update_apps_inpage end...') else: logger.i('update_apps start...') apps = [a.split('-') for a in apps.split(',')] target = 'update_apps_%s' % target func = getattr(sys.modules[__name__], target) if callable(func): func(apps) else: raise Exception('attibute: %s is not callable' % func) logger.i('update_apps end...')
def handle_download_links(urls): accepted_domains = ["apkc.mumayi.com", "dl.coolapk.com", "qd.cache.baidupcs.com", "cdn6.down.apk.gfan.com", "file.liqucn.com", "smsftp.3g.cn", "dldir1.qq.com", "a4.img.3366.com", "static.nduoa.com", "download.taobaocdn.com", "wap.apk.anzhi.com", "cache.3g.cn", "bcscdn.baidu.com", "file.m.163.com", "down5.game.uc.cn", "dl.m.duoku.com", "cdn.market.hiapk.com", "gdown.baidu.com", "bs.baidu.com", ] # not_accepted_domains = ['hiapk.com', ] us = urls.split(' ') def get_domain(url): r = re.compile('http[s]*://([^/]+)/.*') m = r.match(url) if m: return m.groups()[0] return None us.reverse() for u in us: if get_domain(u) not in accepted_domains: logger.i('url too long, will remove %s' % u) us.remove(u) us.reverse() return ' '.join(us)
def update_avail_download_links(packagename, version_code, url): logger.i('update_avail_download_links packagename: %s, version_code: %s, url: %s' % (packagename, version_code, url)) id, urls = get_url(packagename, version_code) if urls and urls.find(url) != -1: logger.i('final app already has url: %s' % url) return elif not urls: logger.i('final app not found') return else: new_urls = '%s %s' % (urls, url) new_urls = reform_urls(new_urls) while len(new_urls) > 2048: new_urls1 = handle_download_links(new_urls) if new_urls1 == new_urls: break new_urls = new_urls1 if len(new_urls) > 2048: logger.e('new url too long: %s, will not process.' % new_urls) return update_finalapp(id, new_urls)
def update_avail_download_links(packagename, version_code, url): logger.i( 'update_avail_download_links packagename: %s, version_code: %s, url: %s' % (packagename, version_code, url)) id, urls = get_url(packagename, version_code) if urls and urls.find(url) != -1: logger.i('final app already has url: %s' % url) return elif not urls: logger.i('final app not found') return else: new_urls = '%s %s' % (urls, url) new_urls = reform_urls(new_urls) while len(new_urls) > 2048: new_urls1 = handle_download_links(new_urls) if new_urls1 == new_urls: break new_urls = new_urls1 if len(new_urls) > 2048: logger.e('new url too long: %s, will not process.' % new_urls) return update_finalapp(id, new_urls)
def process(id): logger.i('process id: %s' % id) file_path, vol_id = get_url(id) if file_path: data = '{"authkey":"9ORmsDOAJ3zcD21w", "url":"http://estoredwnld7.189store.com/downloads/vol%s/%s"}' % \ (vol_id, file_path) url = 'http://%s/vol%s/%s' % ('estoredwnld7.189store.com', vol_id, file_path) url1 = 'http://%s/vol%s/%s' % (download_host, vol_id, file_path) if url404(url, url1): logger.i('found app(id=%s) url(%s) 404' % (id, url)) update_finalapp_safe(id, 4, '') return True tencent_url = 'https://api.scan.qq.com/browser/scan' ret = '' try: ret = post(tencent_url, data) ret = ret.replace('\r', '') ret = ret.replace('\n', '') ret = simplejson.loads(ret) except Exception as e: logger.e(u'ret format error: %s' % ret) raise e logger.d('tencent returns: %s' % simplejson.dumps(ret)) if 'safetype' not in ret: raise 'error format: %s' % simplejson.dumps(ret) if ret['safetype'] == 'unknown': update_finalapp_safe(id, None, '') return False if ret['safetype'] == 'virus': logger.i('found app(id=%s) has virus.' % id) update_finalapp_safe(id, 0, simplejson.dumps(ret)) return True if ret['safetype'] == 'lowrisk': logger.i('found app(id=%s) lowrisk.' % id) update_finalapp_safe(id, 2, simplejson.dumps(ret)) return True if ret['safetype'] == 'midrisk': logger.i('found app(id=%s) midrisk.' % id) update_finalapp_safe(id, 3, simplejson.dumps(ret)) return True if ret['safetype'] == 'safe': logger.i('found app(id=%s) safe.' % id) update_finalapp_safe(id, 1, '') return True else: logger.i('can not find id in db, ignore: %s' % id) return True