def post(self): parser = reqparse.RequestParser() parser.add_argument('username', type=str, help='') parser.add_argument('password', type=str, help='') parser.add_argument('token', type=str, help='') args = parser.parse_args() username = args.get('username') password = args.get('password') token = args.get('token') try: g = Github(username, token) github_col.save({ '_id': md5(username), 'username': username, 'password': password, 'token': token, 'mask_password': password.replace(''.join(password[2:-2]), '****'), 'addat': timestamp(), 'rate_limit': int(g.get_rate_limit().search.limit), 'rate_remaining': int(g.get_rate_limit().search.remaining) }) result = list(github_col.find({}, {'_id': 0})) data = {'status': 201, 'msg': '添加成功', 'result': result} except BadCredentialsException: data = {'status': 401, 'msg': '认证失败,请检查账号是否可用', 'result': []} return jsonify(data)
def get_chartdata(**kwargs): asset = kwargs.get('asset', 'WHAT') con = lite.connect('../db/riobravo.db') startdate = date.timestamp("1990-01-01") enddate = date.timestamp(time.strftime("%Y-%m-%d")) result = sql.query(con, "select * from %s where date >= %s and date <=%s" % (asset, startdate, enddate)) quotes = [] for x in result: quotes.append([ int(str(x[0]) + '000'), # date float(x[1]) / 100, # o float(x[2]) / 100, # h float(x[3]) / 100, # l float(x[4]) / 100, # c x[5] # v ]) dates = [date.ymd(item[0]) for item in result] closes = pandas.Series([item[4] / 100 for item in result], index=dates) sma17 = pandas.rolling_mean(closes, 17) sma34 = pandas.rolling_mean(closes, 34) o = [quote[1] for quote in quotes] h = [quote[2] for quote in quotes] l = [quote[3] for quote in quotes] c = [quote[4] for quote in quotes] doji = talib.CDLDOJI(open=numpy.asarray(o), high=numpy.asarray(h), low=numpy.asarray(l), close=numpy.asarray(c)) for i in range(0, len(sma17)): if (numpy.isnan(sma17[i])): quotes[i] += [0] else: quotes[i] += [round(sma17[i], 2)] for i in range(0, len(sma34)): if (numpy.isnan(sma34[i])): quotes[i] += [0] else: quotes[i] += [round(sma34[i], 2)] for i in range(0, len(doji)): quotes[i] += [doji[i]] return quotes
def search(query, page, g, github_username): mail_notice_list = [] webhook_notice_list = [] logger.info('开始抓取: tag is {} keyword is {}, page is {}'.format( query.get('tag'), query.get('keyword'), page + 1)) try: repos = g.search_code(query=query.get('keyword'), sort="indexed", order="desc") github_col.update_one({'username': github_username}, { '$set': { 'rate_remaining': int(g.get_rate_limit().search.remaining) } }) except Exception as error: logger.critical(error) logger.critical("触发限制啦") return try: for repo in repos.get_page(page): setting_col.update_one({'key': 'task'}, { '$set': { 'key': 'task', 'pid': os.getpid(), 'last': timestamp() } }, upsert=True) if not result_col.count({'_id': repo.sha}): try: code = str(repo.content).replace('\n', '') except: code = '' leakage = { 'link': repo.html_url, 'project': repo.repository.full_name, 'project_url': repo.repository.html_url, '_id': repo.sha, 'language': repo.repository.language, 'username': repo.repository.owner.login, 'avatar_url': repo.repository.owner.avatar_url, 'filepath': repo.path, 'filename': repo.name, 'security': 0, 'ignore': 0, 'tag': query.get('tag'), 'code': code, } try: leakage['affect'] = get_affect_assets(repo.decoded_content) except Exception as error: logger.critical('{} {}'.format(error, leakage.get('link'))) leakage['affect'] = [] if int(repo.raw_headers.get('x-ratelimit-remaining')) == 0: logger.critical('剩余使用次数: {}'.format( repo.raw_headers.get('x-ratelimit-remaining'))) return last_modified = datetime.datetime.strptime( repo.last_modified, '%a, %d %b %Y %H:%M:%S %Z') leakage['datetime'] = last_modified leakage['timestamp'] = last_modified.timestamp() in_blacklist = False for blacklist in blacklist_col.find({}): if blacklist.get('text').lower() in leakage.get( 'link').lower(): logger.warning('{} 包含白名单中的 {}'.format( leakage.get('link'), blacklist.get('text'))) in_blacklist = True if in_blacklist: continue if result_col.count({ "project": leakage.get('project'), "ignore": 1 }): continue if not result_col.count({ "project": leakage.get('project'), "filepath": leakage.get("filepath"), "security": 0 }): mail_notice_list.append( '上传时间:{} 地址: <a href={}>{}/{}</a>'.format( leakage.get('datetime'), leakage.get('link'), leakage.get('project'), leakage.get('filename'))) webhook_notice_list.append('[{}/{}]({}) 上传于 {}'.format( leakage.get('project').split('.')[-1], leakage.get('filename'), leakage.get('link'), leakage.get('datetime'))) try: result_col.insert_one(leakage) logger.info(leakage.get('project')) except errors.DuplicateKeyError: logger.info('已存在') logger.info('抓取关键字:{} {}'.format(query.get('tag'), leakage.get('link'))) except Exception as error: if 'Not Found' not in error.data: g, github_username = new_github() search.schedule(args=(query, page, g, github_username), delay=huey.pending_count() + huey.scheduled_count()) logger.critical(error) logger.error('抓取: tag is {} keyword is {}, page is {} 失败'.format( query.get('tag'), query.get('keyword'), page + 1)) return logger.info('抓取: tag is {} keyword is {}, page is {} 成功'.format( query.get('tag'), query.get('keyword'), page + 1)) query_col.update_one({'tag': query.get('tag')}, { '$set': { 'last': int(time.time()), 'status': 1, 'reason': '抓取第{}页成功'.format(page), 'api_total': repos.totalCount, 'found_total': result_col.count({'tag': query.get('tag')}) } }) if setting_col.count({ 'key': 'mail', 'enabled': True }) and len(mail_notice_list): main_content = '<h2>规则名称: {}</h2><br>{}'.format( query.get('tag'), '<br>'.join(mail_notice_list)) send_mail(main_content) logger.info(len(webhook_notice_list)) webhook_notice(query.get('tag'), webhook_notice_list)
if setting_col.count({ 'key': 'task', 'minute': { '$exists': True }, 'page': { '$exists': True } }): minute = int(setting_col.find_one({'key': 'task'}).get('minute')) setting_col.update_one( {'key': 'task'}, {'$set': { 'key': 'task', 'pid': os.getpid(), 'last': timestamp() }}, upsert=True) else: minute = 10 setting_col.update_one({'key': 'task'}, { '$set': { 'key': 'task', 'pid': os.getpid(), 'minute': 10, 'page': 3, 'last': timestamp() } }, upsert=True)
rownum = 0 curasset="" sql.query(con, "create table asset (name text, UNIQUE(name))") for row in reader: if rownum == 0 or " " in row[0] or row[0] == "IVBX2" or row[0] == "ICO2" or row[0].endswith("M") or row[0].endswith("B") or row[0].endswith("11") or row[0].endswith("33") or row[0].endswith("34"): #header rownum += 1 continue asset = row[0] if asset != curasset: print 'processing asset %s.' % asset curasset = asset sql.query(con, "INSERT OR IGNORE INTO asset (name) VALUES ('%s')" % curasset) sql.query(con, "create table if not exists %s(date int, open int, high int, low int, close int, volume int, delta int, UNIQUE(date))" % asset) #insert into existing asset curdate = date.timestamp(row[1]) sql.query(con, "INSERT OR IGNORE INTO %s (date, open, high, low, close, volume, delta) VALUES (%s, %s, %s, %s, %s, %s, 0)" % (asset, curdate, row[3].replace('.','')[:-3], row[4].replace('.','')[:-3], row[5].replace('.','')[:-3], row[6].replace('.','')[:-3], row[7])) rownum += 1 csvFile.close() file.delete('ultima_base.txt') file.rename('base_consolidada_atualizacao.txt','ultima_base.txt') file.delete('out.csv') con.close() file.delete('base_consolidada.zip') infile = open('ultima_base.txt','r') f = ''.join(infile.readlines()) infile.close() last_date = parser.parse(f.split('\r')[0], dayfirst=True) last_date += datetime.timedelta(days=1)
from github import Github from huey import RedisHuey, crontab from pymongo import errors, DESCENDING, ASCENDING from config.database import result_col, query_col, blacklist_col, notice_col, github_col, setting_col, REDIS_HOST, \ REDIS_PORT from utils.date import timestamp from utils.log import logger from utils.notice import mail_notice huey = RedisHuey('hawkeye', host=REDIS_HOST, port=int(REDIS_PORT)) base_path = os.path.split(os.path.realpath(__file__))[0] extract = tldextract.TLDExtract(cache_file='{}/.tld_set'.format(base_path)) if setting_col.count({'key': 'task', 'minute': {'$exists': True}, 'page': {'$exists': True}}): minute = int(setting_col.find_one({'key': 'task'}).get('minute')) setting_col.update_one({'key': 'task'}, {'$set': {'key': 'task', 'pid': os.getpid(), 'last': timestamp()}}, upsert=True) else: minute = 10 setting_col.update_one({'key': 'task'}, {'$set': {'key': 'task', 'pid': os.getpid(), 'minute': 10, 'page': 3, 'last': timestamp()}}, upsert=True) @huey.task() def search(query, page, g, github_username): mail_notice_list = [] webhook_notice_list = [] logger.info('开始抓取: tag is {} keyword is {}, page is {}'.format( query.get('tag'), query.get('keyword'), page + 1))