def setupOgre(self, pluginCfgPath="./Plugins.cfg", ogreCfgPath="./ogre.cfg", logPath="./ogre.log"):
        if platform.system() == "Windows":
            pluginCfgPath="./Plugins-windows.cfg"
        else:
            pluginCfgPath="./Plugins-linux.cfg"

        root = og.Root(pluginCfgPath, ogreCfgPath, logPath)
        self.ogreRoot = root

        if  not self.ogreRoot.restoreConfig() and not self.ogreRoot.showConfigDialog():
            sys.exit('Quit from Config Dialog')

        root.initialise(False)

        self.pivotRenderQueueListener = PivotRenderQueueListener()
        self.OgreMainWinSceneMgr = self.ogreRoot.createSceneManager(og.ST_GENERIC, "OgreMainWinSceneMgr")
        self.OgreMainWinSceneMgr.ambientLight = og.ColourValue(4, 4, 4)
        self.OgreMainWinSceneMgr.addRenderQueueListener(self.pivotRenderQueueListener)
        
        self.moduleName = ""
        self.myTerrainManager = MyTerrainManager(self.OgreMainWinSceneMgr)
        self.moduleManager = ModuleManager(self.ogreRoot,  self.OgreMainWinSceneMgr)
        self.moduleManager.myTerrainManager = self.myTerrainManager
        self.gocManager = self.moduleManager.gocManager
        
        self.ogreMainWindow = OgreMainWindow.OgreMainWindow(self.moduleManager,  root,  self.OgreMainWinSceneMgr,  self)
        self.gridlayout.addWidget(self.ogreMainWindow,0,0,1,1)
        self.hboxlayout.addLayout(self.gridlayout)
        self.setCentralWidget(self.centralwidget)
        
        self.myTerrainManager.ogreMainWindow = self.ogreMainWindow
        
        oglog = og.LogManager.getSingleton().getDefaultLog()
        oglog.addListener(self.consoleWindow.lockenLog)
Beispiel #2
0
    def __init__( self ):
        self.last_msg = -1
        self.msg_flood_limit = 0.25

        with open(os.path.join(os.path.dirname(__file__), 'ircbot.conf')) as f:
            data = json.load(f)
            self.servers = data['servers']
        
        self.select_server(0)

        self.db = sqlite3.connect( os.path.join( os.path.dirname( __file__ ), 'ircbot.sqlite3' ), check_same_thread = False )
        cursor = self.db.cursor()
        try:
            cursor.execute( 'select * from config limit 1' )
        except sqlite3.OperationalError: # table no exist
            cursor.execute( 'create table config ( `group` varchar(100), `key` varchar(100), `value` varchar(100) NULL )' )
        cursor.close()
        self.modules = ModuleManager( self )

        self.channel_ops = {}

        server = self.current_server['host']
        port = self.current_server['port'] if 'port' in self.current_server else 6667
        password = self.current_server['password'] if 'password' in self.current_server else ''
        nickname = self.current_server['nickname']

        if len(password):
            SingleServerIRCBot.__init__( self, [( server, port, password )], nickname, nickname, ipv6 = True )
        else:
            SingleServerIRCBot.__init__( self, [( server, port )], nickname, nickname, ipv6 = True )

        for module_name in self.modules.get_available_modules():
            self.modules.enable_module( module_name )
Beispiel #3
0
 def initConfig(self):
     '''
     初始化模块配置, 模块配置通常包括关键字和url两种方式
     '''
     if not self.config_dict:
         raise Exception("配置列表为空,请检查!")
     # 模块管理类
     self.module_manager = ModuleManager()
     self.holder.logging.info("加载模块配置信息")
     for mode in self.config_dict:
         self.module_manager.switchToMode(mode)
         for init_function in self.config_dict[mode]:
             varnames = inspect.getargspec(init_function).args
             if len(varnames) == 2:
                 init_function(self.module_manager)
             else:
                 init_function()
     pass
Beispiel #4
0
class Main(object):
    def __init__(self):
        self.module_manager = ModuleManager(self)
        self.thread_manager = ThreadManager()

        self.active = True

    def start(self):
        self.module_manager.start_modules()

        while self.active:
            self.thread_manager.check_scheduled()

            if not self.active:
                self.__exit()

    def __exit(self):
        self.thread_manager.stop_all()
        # Hacky way to get a clean exit when exiting through the display exit button:
        os._exit(0)
Beispiel #5
0
    def __init__( self ):
        logging.info('Bot __init__')
        self.last_msg = -1
        self.msg_flood_limit = 0.25

        with open(os.path.join(os.path.dirname(__file__), 'ircbot.conf')) as f:
            data = json.load(f)
            self.servers = data['servers']

        self.select_server(0)

        self.db = sqlite3.connect( os.path.join( os.path.dirname( __file__ ), 'ircbot.sqlite3' ), check_same_thread = False )
        cursor = self.db.cursor()
        try:
            cursor.execute( 'select * from config limit 1' )
        except sqlite3.OperationalError: # table no exist
            cursor.execute( 'create table config ( `group` varchar(100), `key` varchar(100), `value` varchar(100) NULL )' )
        cursor.close()
        modules_blacklist = data.get('blacklist', None)
        self.modules = ModuleManager(self, modules_blacklist)

        self.channel_ops = {}

        server = self.current_server['host']
        port = self.current_server['port'] if 'port' in self.current_server else 6667
        ssl_enabled = self.current_server['ssl'] if 'ssl' in self.current_server else False
        ipv6_enabled = self.current_server['ipv6'] if 'ipv6' in self.current_server else False
        password = self.current_server['password'] if 'password' in self.current_server else ''
        nickname = self.current_server['nickname']

        factory = irc.connection.Factory(wrapper=ssl.wrap_socket if ssl_enabled else lambda x: x, ipv6=ipv6_enabled)

        super(Bot, self).__init__([irc.bot.ServerSpec(server, port, password)], nickname, nickname, connect_factory=factory)
        
        self.connection.set_rate_limit(30)

        for module_name in self.modules.get_available_modules():
            self.modules.enable_module( module_name )
Beispiel #6
0
class Bot(irc.bot.SingleServerIRCBot):
    """The main brain of the IRC bot."""
    def __init__( self ):
        logging.info('Bot __init__')
        self.last_msg = -1
        self.msg_flood_limit = 0.25

        with open(os.path.join(os.path.dirname(__file__), 'ircbot.conf')) as f:
            data = json.load(f)
            self.servers = data['servers']

        self.select_server(0)

        self.db = sqlite3.connect( os.path.join( os.path.dirname( __file__ ), 'ircbot.sqlite3' ), check_same_thread = False )
        cursor = self.db.cursor()
        try:
            cursor.execute( 'select * from config limit 1' )
        except sqlite3.OperationalError: # table no exist
            cursor.execute( 'create table config ( `group` varchar(100), `key` varchar(100), `value` varchar(100) NULL )' )
        cursor.close()
        modules_blacklist = data.get('blacklist', None)
        self.modules = ModuleManager(self, modules_blacklist)

        self.channel_ops = {}

        server = self.current_server['host']
        port = self.current_server['port'] if 'port' in self.current_server else 6667
        ssl_enabled = self.current_server['ssl'] if 'ssl' in self.current_server else False
        ipv6_enabled = self.current_server['ipv6'] if 'ipv6' in self.current_server else False
        password = self.current_server['password'] if 'password' in self.current_server else ''
        nickname = self.current_server['nickname']

        factory = irc.connection.Factory(wrapper=ssl.wrap_socket if ssl_enabled else lambda x: x, ipv6=ipv6_enabled)

        super(Bot, self).__init__([irc.bot.ServerSpec(server, port, password)], nickname, nickname, connect_factory=factory)
        
        self.connection.set_rate_limit(30)

        for module_name in self.modules.get_available_modules():
            self.modules.enable_module( module_name )

    def select_server(self, index):
        self.current_server = self.servers[index]

        self.admin = self.current_server['global_admins']
        self.admin_channels = self.current_server['admin_channels']

    def start( self ):
        logging.debug( 'start()' )
        super(Bot, self).start()

    def die( self ):
        logging.debug( 'die()' )
        self.modules.unload()
        self.connection.disconnect( 'Bye, cruel world!' )
        #super(Bot, self).die()

    def __process_message(self, message):
        for char in '\r\n': message = message.replace(char, '')
        MAX_MESSAGE_COUNT = 5
        MAX_LINE_LEN = 256
        m = []
        for i in range(0, len(message), MAX_LINE_LEN):
            if len(m) >= MAX_MESSAGE_COUNT:
                m.append('(message truncated) ...')
                break
            m.append(message[i:i + MAX_LINE_LEN])
        return m
    def notice( self, target, message ):
        for m in self.__process_message(message):
            self.connection.notice(target, m)
    def privmsg( self, target, message ):
        for m in self.__process_message(message):
            self.connection.privmsg(target, m)
    def action( self, target, message ):
        for m in self.__process_message(message):
            self.connection.action(target, m)

    def __module_handle(self, handler, **kwargs):
        """Passed the "on_*" handlers through to the modules that support them"""
        handler = 'on_' + handler
        for (_ , module) in self.modules.get_loaded_modules():
            if hasattr(module, handler):
                try:
                    getattr(module, handler)(**kwargs)
                except Exception as e:
                    logging.debug('Module handler %s.%s failed: %s', _, handler, e)

    def __process_command( self, c, e ):
        """Process a message coming from the server."""
        message = e.arguments[0]
        # commands have to start with !
        if message[0] != '!':
            return
        # strip the ! off, and split the message
        args = message[1:].split()
        # cmd is the first item
        cmd = args.pop(0).strip()
        # test for admin
        admin = e.source.userhost in self.admin
        if not admin:
            if e.target in self.admin_channels and e.target in self.channel_ops and e.source.nick in self.channel_ops[ e.target ]:
                admin = True

        # nick is the sender of the message, target is either a channel or the sender.
        source = e.source.nick
        target = e.target if is_channel(e.target) else source

        # see if there is a module that is willing to handle this, and make it so.
        logging.debug( '__process_command (src: %s; tgt: %s; cmd: %s; args: %s; admin: %s)', source, target, cmd, args, admin )

        # handle die outside of module (in case module is dead :( )
        if admin:
            if cmd == 'die':
                self.notice( source, 'Goodbye cruel world!' )
                raise BotExitException
            elif cmd == 'jump':
                self.jump_server()
            elif cmd == 'restart_class':
                raise BotReloadException
            # config commands
            elif cmd == 'get_config' and len( args ) <= 2:
                if len( args ) == 2:
                    try:
                        value = self.get_config( args[0], args[1] )
                        self.notice( source, 'config[{0}][{1}] = {2}'.format( args[0], args[1], value ) )
                    except:
                        self.notice( source, 'config[{0}][{1}] not set'.format( *args ) )
                elif len( args ) == 1:
                    try:
                        values = self.get_config( args[0] )
                        if len( values ) > 0:
                            self.notice( source, 'config[{}]: '.format( args[0] ) + ', '.join( [ '{}: "{}"'.format( k,v ) for ( k, v ) in values.items() ] ) )
                        else:
                            self.notice( source, 'config[{}] is empty'.format( args[0] ) )
                    except:
                        self.notice( source, 'config[{}] not set'.format( args[0] ) )
                else:
                    try:
                        self.notice( source, 'config groups: ' + ', '.join( self.get_config_groups() ) )
                    except Exception as e:
                        self.notice( source, 'No config groups: {}'.format( e ) )
            elif cmd == 'set_config' and len( args ) >= 2:
                if len( args ) >= 3:
                    config_val = ' '.join( args[2:] )
                else:
                    config_val = None
                try:
                    self.set_config( args[0], args[1], config_val )
                    self.notice( source, 'Set config setting' if config_val else 'Cleared config setting' )
                except Exception as e:
                    self.notice( source, 'Failed setting/clearing config setting: {0}'.format( e ) )
            # other base admin commands
            elif cmd == 'raw':
                self.connection.send_raw( ' '.join( args ) )
                return
            elif cmd == 'admins':
                self.notice( source, 'Current operators:' )
                self.notice( source, ' - global: {0}'.format( ' '.join( self.admin ) ) )
                for chan in [ chan for chan in self.admin_channels if chan in self.channel_ops ]:
                    self.notice( source, ' - {0}: {1}'.format( chan, ' '.join( self.channel_ops[ chan ] ) ) )
                return

        if cmd == 'help':
            if len( args ) > 0:
                if args[0] == 'module':
                    if len( args ) < 2:
                        pass
                    elif self.modules.module_is_loaded( args[1] ):
                        module = self.modules.get_module( args[1] )
                        self.notice( target, module.__doc__ )
                else:
                    for ( module_name, module ) in self.modules.get_loaded_modules():
                        if module.has_cmd( args[0] ):
                            self.notice( target, module.get_cmd( args[0] ).__doc__ )
            else:
                self.notice( target, '!help: this help text (send !help <command> for command help, send !help module <module> for module help)' )
                for ( module_name, module ) in [ lst for lst in self.modules.get_loaded_modules() if lst[1].has_commands and not lst[1].admin_only ]:
                    cmds = module.get_cmd_list()
                    self.notice( target, ' * {0}: {1}'.format( module_name, ', '.join( cmds ) if len( cmds ) > 0 else 'No commands' ) )

        elif admin and cmd == 'admin_help':
            if len( args ) > 0:
                for ( module_name, module ) in self.modules.get_loaded_modules():
                    if module.has_admin_cmd( args[0] ):
                        self.notice( source, module.get_admin_cmd( args[0] ).__doc__ )
            else:
                self.notice( source, '!admin_help: this help text (send !admin_help <command> for command help' )
                self.notice( source, '!die:                                   kill the bot' )
                self.notice( source, '!raw:                                   send raw irc command' )
                self.notice( source, '!admins:                                see who are admin' )
                self.notice( source, '!restart_class:                         restart the main Bot class' )
                for ( module_name, module ) in self.modules.get_loaded_modules():
                    cmds = module.get_admin_cmd_list()
                    if len( cmds ) > 0:
                        self.notice( source, ' * {0}: {1}'.format( module_name, ', '.join( cmds ) ) )
        else:
            for ( module_name, module ) in self.modules.get_loaded_modules():
                try:
                    if module.has_cmd( cmd ):
                        lines = module.get_cmd( cmd )(args=args, source=source, target=target, admin=admin)
                        if lines:
                            for line in lines:
                                self.notice( target, line )
                    elif admin and module.has_admin_cmd( cmd ):
                        lines = module.get_admin_cmd(cmd)(args=args, source=source, target=target, admin=admin)
                        if lines:
                            for line in lines:
                                self.notice( source, line )
                except Exception as e:
                    logging.exception( "Module '{0}' handle error: {1}".format( module_name, e ) )

    def on_privmsg(self, c, e):
        logging.debug("on_privmsg")

        source = e.source.nick
        target = e.target if is_channel( e.target ) else source
        message = e.arguments[0]

        self.__module_handle('privmsg', source=source, target=target, message=message)
        try:
            self.__process_command( c, e )
        except BotExitException as e:
            raise e
        except BotReloadException as e:
            self.connection.disconnect( "Reloading bot..." )
            self.modules.unload()
            raise e
        except Exception as e:
            logging.exception( 'Error in __process_command: %s', e )

    def on_pubmsg(self, c, e):
        logging.debug("on_pubmsg")
        self.on_privmsg(c, e)

    def on_pubnotice(self, c, e):
        self.on_notice( c, e )
    def on_privnotice(self, c, e):
        self.on_notice(c, e)

    def on_notice(self, c, e):
        source = e.source
        target = e.target
        message = e.arguments[0]
        logging.debug('notice! source: {}, target: {}, message: {}'.format(source, target, message))
        self.__module_handle('notice', source=source, target=target, message=message)

    def on_join(self, connection, event):
        self.connection.names([event.target])
        self.__module_handle('join', connection=connection, event=event)

    def on_part(self, c, e):
        self.connection.names([e.target])

    def on_kick(self, c, e):
        self.connection.names([e.target])

    def on_mode( self, c, e ):
        self.connection.names( [e.target] )

    def on_endofnames(self, c, e):
        channel, text = e.arguments
        if not channel in self.channels:
            return
        self.channel_ops[channel] = list(self.channels[channel].opers())

    # def on_nick(self, c, e):
    #     self.connection.names(self.channels.keys())

    def on_nicknameinuse( self, c, e ):
        """Gets called if the server complains about the name being in use. Tries to set the nick to nick + '_'"""
        logging.debug( "on_nicknameinuse" )
        c.nick( c.get_nickname() + "_" )

    def on_welcome(self, connection, event):
        for chan in self.current_server['channels']:
            connection.join( chan )
        self.__module_handle('welcome', connection=connection, event=event)

    def get_config_groups( self ):
        resultset = self.db.execute( 'select distinct `group` from config' )
        return [ g for ( g, ) in resultset.fetchall() ]

    def get_config( self, group, key = None, default = None ):
        """gets a config value"""
        logging.info( 'get config %s.%s', group, key )
        if key == None:
            resultset = self.db.execute( 'select `key`, `value` from config where `group` = :group', { 'group': group } )
            values = {}
            for ( key, value ) in resultset.fetchall():
                values[ key ] = value
            return values
        else:
            resultset = self.db.execute( 'select `value` from config where `group` = :group and `key` = :key', { 'group': group, 'key': key } )
            value = resultset.fetchone()
            if value == None:
                if default != None:
                    return default
                raise Exception('Value not found')
            return value[0]

    def set_config( self, group, key, value ):
        """sets a config value"""
        logging.info( 'set config %s.%s to "%s"', group, key, value )
        cursor = self.db.cursor()
        data = { 'group': group, 'key': key, 'value': value }
        if value == None:
            cursor.execute( 'delete from config where `group` = :group and `key` = :key', data )
        else:
            try:
                self.get_config( group, key )
                cursor.execute( 'update config set `value` = :value where `group` = :group and `key` = :key', data )
            except:
                cursor.execute( 'insert into config ( `group`, `key`, `value` ) values( :group, :key, :value )', data )
        cursor.close()
        self.db.commit()
Beispiel #7
0
class CrawlerBase:
    """
    爬虫基类,提供通用模块方法的实现,供子类复用
    """

# region 初始化方法
    # __metaclass__ = LogMetaclass

    def __init__(self, pinyin, config_dict, check_dict, callback):
        """
        初始化对象参数
        :param pinyin: 省份拼音
        :param config_dict: 模块配置字典
        :param callback: 外部回调方法
        """
        # 持有核心业务无关的通用功能对象
        self.holder = HolderUtil(pinyin)
        self.pinyin = pinyin
        # 爬虫调度委托
        self.crawl_delegate = CrawlerControl(self)
        # 模块配置字典
        self.config_dict = config_dict
        # web内容下载器
        self.downloader = DownLoader(pinyin, self.holder.logging)
        # html内容检查
        self.html_check_dict = check_dict['html_check_dict'] if check_dict and 'html_check_dict' in check_dict else None
        # json内容检查
        self.json_check_dict = check_dict['json_check_dict'] if check_dict and 'json_check_dict' in check_dict else None
        # 初始化搜索列表页是否搜索到公司的判断配置
        self.non_company_set = {"无查询结果","未查询到相关记录"}
        # 外部回调方法,每爬取完一个公司的信息会被调用一次
        self.callback = callback
        # 解析器开关
        self.parse_on = False           # 除jbxx、gdxx之外的开关,False表示不做解析
        self.parse_jbxx_on = True      # jbxx开关
        self.parse_gdxx_on = True       # gdxx开关
        # 抓取情况统计
        self.statistic = CrawlerStatic(self.holder.logging)
        # 年报是否需要抓取判断
        self.nb_judge = NbxxApiControler()
        pass

    def setNonCompanyConfig(self, non_company_set):
        '''
        设置无此公司判断
        :param non_company_set:
        :return:
        '''
        self.non_company_set = non_company_set

    def initConfig(self):
        '''
        初始化模块配置, 模块配置通常包括关键字和url两种方式
        '''
        if not self.config_dict:
            raise Exception("配置列表为空,请检查!")
        # 模块管理类
        self.module_manager = ModuleManager()
        self.holder.logging.info("加载模块配置信息")
        for mode in self.config_dict:
            self.module_manager.switchToMode(mode)
            for init_function in self.config_dict[mode]:
                varnames = inspect.getargspec(init_function).args
                if len(varnames) == 2:
                    init_function(self.module_manager)
                else:
                    init_function()
        pass

    def init(self):
        '''
        爬行初始化,每次爬行会被初始化一次
        :return:
        '''
        self.holder.init()
        # value dictionary: used for passing value between functions
        self.value_dict = {'ua':self.holder.ua}
        # all company results
        self.result_list = list()
        # company dictionary: used for storing one company value between functions
        self.result_dict = dict()
        # all company pages
        self.page_list = list()
        # one company html page
        self.page_dict = dict()
        # snapshot the middle values for sub modules
        self.value_dict_snap = dict()
        # 每次爬行开始初始化downloader
        self.downloader.firstInit()
        # 每次爬行的种子report
        self.report = SeedAccessReport(0, 0, SeedAccessType.ERROR)
        # 每次爬行前需要恢复初始模块状态
        self.initConfig()

# endregion

#region rowkey 计算

    def setRowKey(self, map_dict=None):
        if not map_dict:
            map_dict = {'名称':'company_name','注册号':'company_zch','信用代码':'company_zch'}
        return self.defaultRowKey(map_dict)

    def defaultRowKey(self, map_dict=None):
        if 'company' not in self.result_dict:
            return False
        if not map_dict:
            return False
        rowkey_dict = dict()
        for v_list in self.result_dict['company']:
            if not v_list:
                continue
            for k,v in v_list.items():
                for km in map_dict:
                    if km in k:
                        rowkey_dict[map_dict[km]] = v
            if rowkey_dict:
                break
        self.page_dict['rowkey_dict'] = rowkey_dict
        if not rowkey_dict:
            return False
        return True

#endregion

# region 爬行入口方法 crawl crawl_url
    def crawl(self, company_key):
        self.init()
        # 切换为通过关键字抓取模式
        self.module_manager.switchToMode(CrawlerRunMode.COMPANY_KEY)
        self.holder.logging.info(u"通过关键词(%s)开始抓取信息" % company_key)
        self.value_dict['company_key'] = company_key.strip()
        self.value_dict['search_company'] = company_key.strip()
        return self._delegateCrawl(CrawlerRunMode.COMPANY_KEY)

    def crawlUrl(self, company_url, company_name):
        self.init()
        # 切换为通过Url抓取模式
        self.module_manager.switchToMode(CrawlerRunMode.COMPANY_URL)
        self.holder.logging.info(u"通过公司名(%s)和url(%s)开始抓取信息" % (company_name, company_url))
        self.value_dict['company_url'] = company_url
        self.value_dict['search_company'] = company_name
        return self._delegateCrawl(CrawlerRunMode.COMPANY_URL)

    def _delegateCrawl(self, model):
        v_dict = copy.deepcopy(self.value_dict)
        retry_times = 1
        while retry_times <= 5:
            self.crawl_delegate.crawl()
            # 统计搜索列表中各公司爬取状态,生成seed抓取报告
            self.seedReport()
            self.holder.logging.info(u"本次抓取结果类型:%s" % SeedAccessType.description(self.report.access_type))
            if self.report.access_type == SeedAccessType.ERROR:
                self.holder.logging.info(u"抓取失败,开始第 %s 次重试!" % retry_times)
            else:
                break
            retry_times += 1
            self.init()
            # 切换抓取模式
            self.module_manager.switchToMode(model)
            self.value_dict = copy.deepcopy(v_dict)
        self.statistic.statistic(self.report, retry_times-1)
        self.statistic.description()
        return self.report

# endregion

# region 模块方法

    def visitHomePage(self, module):
        """
        访问首页
        :param module:
        :return:
        """
        web = self.fetchWebContent(module, u"访问首页,期望输出参数 %s")
        if not web.body:
            self.holder.logging.warn(u"获取 (%s) 信息失败" % module.name)
            return
        self.parseOutput(module.outputs, web.body)

    def visitValidateCode(self, module):
        """
        访问验证码
        :param module:
        :return:
        """
        web = self.fetchWebContent(module, u"访问验证码,期望输出参数 %s", is_pic=True)
        yzm_type = None
        if self.holder.debug == 0:
            yzm_type = self.holder.pinyin
        if not web.body:
            self.holder.logging.error(u"获取的验证码图片为空!")
            return
        url = module.getInputByType(InputType.URL, self.value_dict, self.holder.logging)
        try:
            (yzm, code_id, is_report_error, recChar, img_path) = yzm_util.parse_yzm(url, web.body, 5000, yzm_max_len=6,
                                                                                    type=yzm_type, holder=self.holder)
        except Exception as e:
            self.downloader.changeProxy()
            raise Exception(e)
        self.value_dict['yzm'] = yzm
        self.value_dict['img_path'] = img_path

    def visitSearchList(self, module):
        """
        访问搜索列表
        :param module:
        :return:
        """
        web = self.fetchWebContent(module, u"访问公司列表,期望输出参数 %s")
        # print("搜索结果列表页:%s" % company_list_html)
        if not web.body:
            self.holder.logging.warn(u"获取公司列表信息失败")
            return
        # 无此公司判断
        for keyword in self.non_company_set:
            if keyword in web.body:
                self.holder.logging.warn(u"无此公司!")
                self.report.access_type = SeedAccessType.NON_COMPANY
                return
        if web.access_type == WebAccessType.TOO_OFTEN:
            self.holder.logging.warning(u"访问过于频繁,可能已被网站禁止访问!!!")
            self.downloader.insertBlack()
            return
        elif web.access_type == WebAccessType.ACCESS_VIOLATION:
            self.holder.logging.warning(u"非法访问!!!")
            return
        self.parseOutput(module.outputs, web.body)
        pass
            
    def visitTopInfo(self, module):
        """
        访问页面top信息
        :param module:
        :return:
        """
        web = self.fetchSpecificCompany(module, u"访问 (%s) 的Top信息,期望输出参数 %s")
        self.appendWebContent(u'top_html', web)
        if not web: return
        self.value_dict['html'] = web.body
        # 此处未做解析
        self.parseOutput(module.outputs, web.body)
        pass

    def visitJbxx(self, module):
        """
        访问基本信息页面
        :param module:
        :return:
        """
        web = self.fetchSpecificCompany(module, u"访问 (%s) 的基本信息,期望输出参数 %s")
        self.appendWebContent(u'jbxx_html', web)
        if not web: return
        if self.parse_jbxx_on:
            self.parseHtmlTable(u"解析(%s)的基本信息")
        if 'company' in self.result_dict:
            self.value_dict['company'] = self.result_dict['company']
        self.parseOutput(module.outputs, web.body)
        pass

    def visitGdxx(self, module):
        """
        访问股东信息
        :param module:
        :return:
        """
        web = self.fetchSpecificCompany(module, u"访问 (%s) 的股东信息,期望输出参数 %s")
        self.appendWebContent(u'gdxx_html', web)
        if not web: return
        self.value_dict['html'] = web.body
        gdxx_list = None
        if self.parse_gdxx_on:
            gdxx_list = self.parseHtmlTable(u"解析(%s)的股东信息")
        if not gdxx_list or len(gdxx_list) == 0:
            return
        all_gdxx_list = []
        if 'gdxx_list' in self.value_dict:
            all_gdxx_list = self.value_dict['gdxx_list']
        all_gdxx_list.extend(gdxx_list)
        self.value_dict['gdxx_list'] = all_gdxx_list

    def visitGdxq(self, module):
        """
        访问股东详情
        :param module:
        :return:
        """
        web = self.fetchSpecificCompany(module, u"访问 (%s) 的股东详细信息,期望输出参数 %s")
        self.appendWebContent(u'gdxq_html', web)
        if not web: return
        if self.parse_on:
            self.parseGdxq()

    def visitBgxx(self, module):
        """
        访问变更信息
        :param module:
        :return:
        """
        web = self.fetchSpecificCompany(module, u"访问 (%s) 的变更信息,期望输出参数 %s")
        self.appendWebContent(u'bgxx_html', web)
        if not web: return
        if self.parse_on:
            self.parseHtmlTable(u"解析(%s)的变更信息")

    def visitBaxx(self, module):
        """
        访问备案信息
        :param module:
        :return:
        """
        web = self.fetchSpecificCompany(module, u"访问 (%s) 的备案信息,期望输出参数 %s")
        self.appendWebContent(u'baxx_html', web)
        if not web: return
        if self.parse_on:
            self.parseHtmlTable(u"解析(%s)的备案信息")
    
    def visitFzjg(self, module):
        """
        访问分支机构
        :param module:
        :return:
        """
        web = self.fetchSpecificCompany(module, u"访问 (%s) 的分支机构信息,期望输出参数 %s")
        self.appendWebContent(u"fzjg_html", web)
        if not web: return
        if self.parse_on:
            self.parseHtmlTable(u"解析(%s)的分支机构信息")

    def visitXzcf(self, module):
        """
        访问行政处罚信息
        :param module:
        :return:
        """
        web = self.fetchSpecificCompany(module, u"访问 (%s) 的行政处罚信息,期望输出参数 %s")
        self.appendWebContent(u"xzcf_html", web)
        if not web: return
        if self.parse_on:
            self.parseHtmlTable(u"解析(%s)的行政处罚信息")

    def visitQynbList(self, module):
        """
        访问行政处罚信息
        :param module:
        :return:
        """
        web = self.fetchSpecificCompany(module, u"访问 (%s) 的企业年报列表,期望输出参数 %s")
        if not web: return
        self.parseOutput(module.outputs, web.body)

    def visitQynb(self, module):
        """
        访问行政处罚信息
        :param module:
        :return:
        """
        web = self.fetchSpecificCompany(module, u"访问 (%s) 的企业年报信息,期望输出参数 %s")
        key = u"qynb_%s_html" % self.value_dict['nb_name']
        self.appendWebContent(key, web)
        if not web: return
        self.parseOutput(module.outputs, web.body)
        if self.parse_on:
            self.parseHtmlTable(u"解析(%s)的企业年报信息")

    def resultCollect(self, module):
        """
        抓取结果收集,调用ParserMapper实现映射
        :param module:
        :return:
        """
        if 'company' in self.result_dict and self.parse_on:
            result_list = self.result_dict['company']
            company_mapped = ParserMapper.doMap(mapper.transform, result_list)
            self.result_dict['company_mapped'] = company_mapped
        self.resultDelivery(module)
        pass

    def resultDelivery(self, module):
        """
        1.清理中间结果集
        2.标识页面内容抓取状态类型
        3.调用callback交付结果
        :param module:
        :return:
        """
        if 'company_mapped' in self.result_dict:
            company_mapped = self.result_dict['company_mapped']
        else:
            company_mapped = None
        self.cleanWebContents()
        html_dict_wrapped = self.wrapReturnObject()
        self.page_list.append(self.page_dict)
        self.page_dict = dict()
        self.result_list.append(self.result_dict)
        self.result_dict = dict()
        self.callback(html_dict_wrapped, company_mapped)
        pass

    def visitTopInfoJson(self, module):
        """
        访问页面顶部json结果
        :param module:
        :return:
        """
        web = self.fetchJson(module, u"访问 (%s) 的Top信息,期望输出参数 %s")
        self.appendWebContent(u'top_json', web)

    def visitJbxxJson(self, module):
        """
        访问基本信息json结果
        :param module:
        :return:
        """
        web = self.fetchJson(module, u"访问 (%s) 的基本信息,期望输出参数 %s")
        self.appendWebContent(u'jbxx_json', web)
        if self.parse_jbxx_on:
            self.parseJson(module)

    def visitGdxxJson(self, module):
        """
        访问股东信息json结果
        :param module:
        :return:
        """
        if module.web_content:
            if module.web_content in self.value_dict:
                body = self.value_dict[module.web_content]
            else:
                body = module.web_content
            web = WebContent(status_code=200, body=body, content_type=WebContentType.JSON)
        else:
            web = self.fetchJson(module, u"访问 (%s) 的股东信息,期望输出参数 %s")
        self.appendWebContent(u'gdxx_json', web)
        # 当json内容是由上级模块解析生成,但body为None,说明未解析出需要的输出但不代表是异常状态,例如:长白山森工集团安图林业有限公司安林物流中心分公司
        if module.web_content and body is None:
            return
        if self.parse_gdxx_on:
            gdxx_list = self.parseJson(module, web.body)
            self.value_dict['gdxx_list'] = gdxx_list

    def visitGdxqJson(self, module):
        """
        访问股东详情信息json结果
        :param module:
        :return:
        """
        web = self.fetchJson(module, u"访问 (%s) 的股东详情信息,期望输出参数 %s")
        self.appendWebContent(u'gdxq_json', web)

    def visitBgxxJson(self, module):
        """
        访问变更信息json结果
        :param module:
        :return:
        """
        if module.web_content:
            if module.web_content in self.value_dict:
                body = self.value_dict[module.web_content]
            else:
                body = module.web_content
            web = WebContent(status_code=200, body=body, content_type=WebContentType.JSON)
        else:
            web = self.fetchJson(module, u"访问 (%s) 的变更信息,期望输出参数 %s")
        self.appendWebContent(u'bgxx_json', web)
        if self.parse_on:
            self.parseJson(module, web.body)

    def visitBaxxJson(self, module):
        """
        访问备案信息json结果
        :param module:
        :return:
        """
        web = self.fetchJson(module, u"访问 (%s) 的备案信息,期望输出参数 %s")
        self.appendWebContent(u'baxx_json', web)
        if self.parse_on:
            self.parseJson(module)

    def visitFzjgJson(self, module):
        """
        访问分支机构json结果
        :param module:
        :return:
        """
        web = self.fetchJson(module, u"访问 (%s) 的分支机构信息,期望输出参数 %s")
        self.appendWebContent(u'fzjg_json', web)
        if self.parse_on:
            self.parseJson(module)

    def visitXzcfJson(self, module):
        """
        访问行政处罚json结果
        :param module:
        :return:
        """
        web = self.fetchJson(module, u"访问 (%s) 的行政处罚信息,期望输出参数 %s")
        self.appendWebContent(u'xzcf_json', web)
        if self.parse_on:
            self.parseJson(module)

    def visitQynbJson(self, module):
        """
        访问企业年报json结果
        :param module:
        :return:
        """
        web = self.fetchJson(module, u"访问 (%s) 的企业年报信息,期望输出参数 %s")
        key = u"qynb_%s_json" % self.value_dict['nb_name']
        self.appendWebContent(key, web)
        if self.parse_on:
            self.parseJson(module)

    def getWebHtml(self, module):
        """
        访问获取html页面内容通用模块方法
        :param module:
        :return:
        """
        self.value_dict['html'] = None
        self.value_dict['web'] = None
        url, headers, method, post_data = module.getHttpInput(self.value_dict, self.holder.logging)
        if not url:
            self.holder.logging.warn(u"缺少url参数")
            return None
        encoding = module.getInputByType(InputType.ENCODING, self.value_dict, self.holder.logging)
        accept_code = module.getInputByType(InputType.STATUS_CODE, self.value_dict, self.holder.logging)
        self.holder.logging.info(u"访问%s,获取输出参数 %s" % (url, module.outputsDescription()))
        self.setCookie(module)
        web = crawler_util.request(downloader=self.downloader,
                                   url=url,
                                   method=method,
                                   headers=headers,
                                   data=post_data,
                                   encoding=encoding,
                                   ua=self.holder.ua,
                                   use_proxy=module.use_proxy,
                                   holder=self.holder,
                                   accept_code=accept_code)
        # 模块休眠
        crawler_util.moduleSleep(module, self.holder)
        self.htmlContentCheck(web)
        module.detectWebContent(web=web, log=self.holder.logging)
        self.value_dict['html'] = web.body if web else None
        self.value_dict['web'] = web
        if web and web.body:
            self.parseOutput(module.outputs, web.body)
        self.htmlContentCheck(web)

    def getJson(self, module):
        """
        访问获取json页面内容通用模块方法
        :param module:
        :return:
        """
        self.value_dict['json'] = None
        self.value_dict['web'] = None
        search_company = self.value_dict.get('search_company', '')
        self.holder.logging.info(u"访问json信息[company_key=%s],获取输出参数 %s" % (search_company, module.outputsDescription()))
        url, headers, method, post_data = module.getHttpInput(self.value_dict, self.holder.logging)
        if not url:
            self.holder.logging.warn(u"缺少url参数")
            return None
        encoding = module.getInputByType(InputType.ENCODING, self.value_dict, self.holder.logging)
        accept_code = module.getInputByType(InputType.STATUS_CODE, self.value_dict, self.holder.logging)
        self.setCookie(module)
        web = crawler_util.request(downloader=self.downloader,
                                   url=url,
                                   method=method,
                                   headers=headers,
                                   data=post_data,
                                   encoding=encoding,
                                   ua=self.holder.ua,
                                   use_proxy=module.use_proxy,
                                   holder=self.holder,
                                   accept_code=accept_code)
        # 模块休眠
        crawler_util.moduleSleep(module, self.holder)
        web.content_type = WebContentType.JSON
        self.jsonContentCheck(web)
        module.detectWebContent(web=web, log=self.holder.logging)
        body = web.body if web.body else ''
        self.holder.logging.info(u"本次json抓取结果:\n"+body)
        if body:
            json_data = json.loads(web.body)
            self.value_dict['json'] = json_data
        self.value_dict['web'] = web
        return web

# endregion

# region 抓取页面内容

    def fetchWebContent(self, module, prompt_info, is_pic=False):
        """
        抓取搜索列表之前页面
        :param module:
        :param prompt_info: 提示信息
        :param is_pic: 是否是获取图片
        :return:
        """
        self.value_dict["html"] = None
        self.value_dict['web'] = None
        self.holder.logging.info(prompt_info % module.outputsDescription())
        url, headers, method, post_data = module.getHttpInput(self.value_dict, self.holder.logging)
        if not url:
            self.holder.logging.warn(u"缺少url参数")
            return None
        elif url == OutputType.NONE_TYPE:
            return None
        encoding = module.getInputByType(InputType.ENCODING, self.value_dict, self.holder.logging)
        accept_code = module.getInputByType(InputType.STATUS_CODE, self.value_dict, self.holder.logging)
        self.setCookie(module)
        web = crawler_util.request(downloader=self.downloader,
                                   url=url,
                                   method=method,
                                   headers=headers,
                                   data=post_data,
                                   encoding=encoding,
                                   ua=self.holder.ua,
                                   is_pic=is_pic,
                                   use_proxy=module.use_proxy,
                                   holder=self.holder,
                                   accept_code=accept_code)
        # 模块休眠
        crawler_util.moduleSleep(module, self.holder)
        self.htmlContentCheck(web)
        redo_module = self.module_manager.getFirstModule()
        module.detectWebContent(web=web, redo_module=redo_module.module_id, log=self.holder.logging)
        self.value_dict['html'] = web.body if web else None
        self.value_dict['web'] = web
        return web

    def fetchSpecificCompany(self, module, prompt_info):
        """
        抓取具体公司信息页面
        :param module:
        :param prompt_info: 提示信息
        :return:
        """
        self.value_dict["html"] = None
        self.value_dict['web'] = None
        search_company = self.value_dict.get('search_company', '')
        self.holder.logging.info(prompt_info % (search_company, module.outputsDescription()))
        url, headers, method, post_data = module.getHttpInput(self.value_dict, self.holder.logging)
        if not url:
            self.holder.logging.warn(u"缺少url参数")
            return None
        # 存在输入url在某种情况下为空的情况(广东-深圳信用)
        elif url == OutputType.NONE_TYPE:
            return None
        encoding = module.getInputByType(InputType.ENCODING, self.value_dict, self.holder.logging)
        accept_code = module.getInputByType(InputType.STATUS_CODE, self.value_dict, self.holder.logging)
        self.setCookie(module)
        web = crawler_util.request(downloader=self.downloader,
                                   url=url,
                                   method=method,
                                   headers=headers,
                                   data=post_data,
                                   encoding=encoding,
                                   ua=self.holder.ua,
                                   use_proxy=module.use_proxy,
                                   holder=self.holder,
                                   accept_code=accept_code)
        # 模块休眠
        crawler_util.moduleSleep(module, self.holder)
        self.htmlContentCheck(web)
        module.detectWebContent(web=web, log=self.holder.logging)
        self.value_dict['html'] = web.body if web else None
        self.value_dict['web'] = web
        return web

    def fetchJson(self, module, prompt_info):
        """
        抓取json页面
        :param module:
        :param prompt_info:
        :return:
        """
        self.value_dict['json'] = None
        search_company = self.value_dict.get('search_company', '')
        self.holder.logging.info(prompt_info % (search_company, module.outputsDescription()))
        web = self.getJson(module)
        return web

    def setCookie(self, module):
        cookie = module.getInputByType(InputType.COOKIE, self.value_dict, self.holder.logging)
        if cookie:
            self.downloader.cookieUpdate(cookie)

# endregion

# region 解析模块输出、html页面、json页面及股东详情信息

    def parseOutput(self, outputs, html):
        """
        解析模块输出
        :param outputs:模块所需要的输出
        :param html: 页面内容
        :return:
        """
        if not html or not outputs:
            return
        tree = etree.HTML(html)
        for output in outputs:
            if tree and output.xpath:
                if output.type == OutputType.LIST:
                    result = tree.xpath(output.xpath)
                else:
                    result = "".join(tree.xpath(output.xpath))
            elif output.regex:
                if output.type == OutputType.LIST:
                    result = re.findall(output.regex, html)
                else:
                    result = "".join(re.findall(output.regex, html))
            else:
                continue
            # 自动合并同名list中间结果
            if output.name in self.value_dict and isinstance(self.value_dict[output.name], list) and isinstance(result, list):
                self.value_dict[output.name].extend(result)
            else:
                self.value_dict[output.name] = result

    def parseHtmlTable(self, prompt_info, should_collect_result=True):
        """
        解析html table型的数据,解析为键值对的标准形式
        :param prompt_info: 提示信息
        :param should_collect_result:是否需要收集本次解析结果到结果集中
        :return:
        """
        search_company = self.value_dict.get('search_company', '')
        self.holder.logging.info(prompt_info % search_company)

        if 'company' not in self.result_dict:
            self.result_dict['company'] = list()
        if 'html' not in self.value_dict or not self.value_dict['html']:
            raise Exception(u"未获取到html页面")
        html = self.value_dict['html']
        parser = TableParseUtil(html)
        info_list = parser.parse()
        self.holder.logging.info(u"本次模块解析结果:\n %s", json.dumps(info_list))
        # 获取股东详情的情况下不应该加入,而应update
        if should_collect_result:
            self.result_dict['company'].extend(info_list)
        return info_list

    def parseJson(self, module, json_obj=None):
        """
        解析json页面内容
        :param module:
        :return:
        """
        if 'json' in self.value_dict:
            json_obj = self.value_dict['json']
        # 此处判断不能简化,需要区分空list和None
        elif json_obj is None:
            # raise Exception("未获取到json页面")  # 存在网站原因缺少某些信息,例如:长白山森工集团安图林业有限公司安林物流中心分公司,页面上完全无备案信息、分支机构的展示
            self.holder.logging.error(u"未获取到json页面!!!")
            return None
        if isinstance(json_obj, basestring):
            json_obj = json.loads(json_obj)
        if not json_obj:
            if isinstance(json_obj, list):
                self.holder.logging.warn(u"成功得到了json页面内容,但json体为空!")
            else:
                self.holder.logging.error(u"未获取到json页面!!!")
            return None
        parser = JsonParseUtil()
        info_list = parser.parse(json_obj, module.mapper_config)
        if not info_list:
            return None
        if 'company' not in self.result_dict:
            self.result_dict['company'] = list()
        self.result_dict['company'].extend(info_list)
        self.holder.logging.info(u"本次模块解析结果:\n %s", json.dumps(info_list))
        return info_list

    def parseGdxq(self):
        """
        解析股东详情信息内容
        :return:
        """
        gdxq_list = self.parseHtmlTable(u"解析(%s)的股东详情信息", False)

        if not gdxq_list or len(gdxq_list) == 0:
            self.holder.logging.info(u"未获取到股东详情信息")
            return
        if 'gdxx_rcd' not in self.value_dict:
            return
        gdxx_rcd = self.value_dict['gdxx_rcd']
        if not gdxx_rcd or not isinstance(gdxx_rcd, dict):
            return
        for key in gdxx_rcd:
            try:
                if isinstance(eval(gdxx_rcd[key]), dict):
                    gdxx_rcd[key] = gdxq_list[0]
                    return
            except Exception as e:
                self.holder.logging.warn(e.message)
        key = ''
        for rcd_key in gdxx_rcd.keys():
            if '.' not in rcd_key:
                continue
            keys = rcd_key.split('.')
            key = ''
            if len(keys) >= 2:
                for i in range(0, len(keys)-1):
                    key += keys[i]+'.'
            if key:
                break
        key += u'详情'
        gdxx_rcd[key] = gdxq_list[0]

# endregion

#region 中间结果状态的保存与恢复

    def snapshot(self, snap_id):
        """
        存储当前中间状态
        :param snap_id: 需保存的中间状态id
        :return:
        """
        self.value_dict_snap[snap_id] = copy.deepcopy(self.value_dict)

    def recoverFromSnapshot(self, snap_id):
        """
        从之前保存的中间状态中恢复
        :param snap_id: 待恢复的中间状态id
        :return:
        """
        if not snap_id or snap_id not in self.value_dict_snap:
            self.holder.logging.warning("snap id %s not exist!!" % snap_id)
            return
        self.value_dict = self.value_dict_snap[snap_id]

#endregion

# region Web页面内容检查

    def htmlContentCheck(self, web):
        """
        验证并封装页面
        :param web:
        :return:
        """
        self.WebKeywordCheck(web, self.html_check_dict)

    def jsonContentCheck(self, web):
        """
        验证并封装页面
        :param web:
        :return:
        """
        self.WebKeywordCheck(web, self.json_check_dict)

    def WebKeywordCheck(self, web, check_dict):
        """
        验证并封装页面
        :param web:
        :param check_dict:
        :return:
        """
        if web.access_type != WebAccessType.OK:
            return
        if not web.body:
            web.access_type = WebAccessType.NO_CONTENT
            return
        if not web.access_type:
            web.access_type = WebAccessType.OK
        if not check_dict:
            return
        for key in check_dict:
            if key in web.body:
                # 后面配置会覆盖前面配置类型
                web.access_type = check_dict[key]
                self.holder.logging.info(u"页面因包含 '%s' 被识别为类型 %s" % (key,WebAccessType.description(check_dict[key])))
        return web

# endregion

# region WebContent相关处理
    def appendWebContent(self, name, web):
        """
        追加页面内容到页面结果集中
        :param name:
        :param web:
        :return:
        """
        if name not in self.page_dict:
            self.page_dict[name] = list()
        self.page_dict[name].append(web)

    def cleanWebContents(self):
        """
        1.清理掉页面结果中的None及重试出错的页面
        2.生成该公司抓取情况类型
        :return:
        """
        if not self.page_dict:
            self.page_dict['status'] = CompanyAccessType.ERROR
            return
        success_num = 0
        failed_num = 0
        for key in self.page_dict:
            values = self.page_dict[key]
            if not isinstance(values, list):
                continue
            if not values:
                failed_num += 1
                continue
            req_md5_set = set()
            i = len(values)-1
            while i >= 0:
                val = values[i]
                if not val:
                    del values[i]
                elif val.req_md5 in req_md5_set:
                    del values[i]
                else:
                    req_md5_set.add(val.req_md5)
                    # 暂对股东详情不做要求
                    if val.status_code >= 400 and key != u'gdxq_html':
                        failed_num += 1
                    else:
                        success_num += 1
                i -= 1
        # 过滤掉空值
        self.page_dict = dict(filter(lambda item: item[1], self.page_dict.items()))
        if success_num > 0 and failed_num == 0:
            self.page_dict['status'] = CompanyAccessType.OK
        elif success_num > 0:
            self.page_dict['status'] = CompanyAccessType.INCOMPLETE
        else:
            self.page_dict['status'] = CompanyAccessType.ERROR

    def wrapReturnObject(self):
        """
        封装web内容结果集返回给外部callback
        :return:
        """
        # 所有子类调用existQynbList方法后,此处设置rowkey逻辑可以去掉
        if 'rowkey_dict' not in self.page_dict:
            success = self.setRowKey()
            if not success:
                self.holder.logging.error(u"提取rowkey参数出错!")
        html_dict_copy = copy.deepcopy(self.page_dict)
        for hk,hv in html_dict_copy.items():
            if isinstance(hv, list):
                v_list = filter(lambda x: isinstance(x, WebContent), hv)
                v_dict_list = map(lambda x: x.toDictionary(), v_list)
                html_dict_copy[hk] = v_dict_list
        return html_dict_copy

# endregion
    def bypassQynb(self):
        """
        判断年报是否需要抓取以及哪些年份已被抓取
        :return: 是否访问年报信息,True=不访问,False=访问,set():哪些年份不需要访问
        """
        # 抓取年报信息需要依赖rowkey,放在此处设置
        success = self.setRowKey()
        if not success:
            self.holder.logging.error(u"提取rowkey参数出错!")
            should_visit,has_years = True,set()
        else:
            should_visit, has_years = self.nb_judge.visitJudgement(company_name=self.page_dict['rowkey_dict']['company_name'],
                                                                   company_zch=self.page_dict['rowkey_dict']['company_zch'])
        self.value_dict['qynb_should_visit'] = should_visit
        self.value_dict['qynb_has_years'] = has_years
        return not should_visit

    def filterQynbList(self, nb_list):
        """
        清理年报列表,将不需要抓取的年份清除出去
        :param nb_list: 年报列表,每一项必须是一个包含年份数据的标签,否则需要子类提供
        :return:
        """
        should_visit = self.value_dict.get('qynb_should_visit','')
        has_years = self.value_dict.get('qynb_has_years',set())
        if not should_visit:
            del nb_list[:]
            return
        temp_list = list(nb_list)
        for nb in temp_list:
            arr = re.findall('\d{4}', ''.join(nb.xpath('text()')))
            if not arr:
                nb_list.remove(nb)
                continue
            if arr[0] in has_years:
                nb_list.remove(nb)

    def yzmSave(self, yzm, img_path):
        """
        保存验证码,子类根据需要进行调用
        :param img_path:
        :return:
        """
        record_success(self.pinyin, yzm, img_path, self.holder)
        pass

    def getMonitorMiddleValues(self, module):
        """
        获取被监视的中间结果
        :param module:
        :return:
        """
        if not module.monitor_values:
            return None
        mm_dict = dict()
        for key in module.monitor_values:
            mm_dict[key] = self.value_dict.get(key, None)
        return mm_dict

    def seedReport(self):
        """
        生成种子抓取情况报告
        :return:
        """
        try:
            if self.report.access_type == SeedAccessType.NON_COMPANY or self.report.access_type == SeedAccessType.NO_VALID_COMPANY:
                return
            for page_dict in self.page_list:
                if not page_dict or 'status' not in page_dict:
                    self.report.failed_num += 1
                elif page_dict['status'] == CompanyAccessType.OK:
                    self.report.success_num += 1
                else:
                    self.report.failed_num += 1
            if self.report.success_num > 0 and self.report.failed_num == 0:
                self.report.access_type = SeedAccessType.OK
            elif self.report.success_num > 0:
                self.report.access_type = SeedAccessType.INCOMPLETE
            elif self.report.access_type == SeedAccessType.NO_TARGET_SOURCE:
                return
            else:
                self.report.access_type = SeedAccessType.ERROR
        except Exception as e:
            self.holder.logging.error(e.message)
class Lockenwickler(QtGui.QMainWindow):
    def __init__(self, parent=None):
        QtGui.QWidget.__init__(self, parent)

#        pixmap = QPixmap("media/icons/lockenwickler_provisorium.png")
#        splash = QSplashScreen(pixmap, Qt.WindowStaysOnTopHint)
#        splash.setMask(pixmap.mask())
#        splash.showMessage("Starting...")
#        splash.show() 

        self.setupUi()

        self.consoleWindow = ConsoleWindow(False,  self)

        self.setupOgre()

        self.prefDialog = PreferencesDialog(self)
        self.objectPropertyWin = ObjectPropertyWin(self.OgreMainWinSceneMgr, self.gocManager, self)
        self.moduleExplorerWin = ModuleExplorer(self)
        self.modelSelectionDialog = ModelSelectionDialog(self.ogreRoot, self)
        self.materialSelectionDialog = MaterialSelectionDialog(self.ogreRoot, self)
        self.moduleManager.modelSelectionDialog = self.modelSelectionDialog
        self.moduleManager.materialSelectionDialog = self.materialSelectionDialog
        self.moduleDirectoryViewWin = ModuleDirectoryView(self)
        
        triggerManager = TriggerManager()
        
        self.gameObjectClassView = GameObjectClassView(self.moduleManager.gocManager)

        self.createDockWindows()

        self.mainTimer = QtCore.QTimer(self)
        self.mainTimer.connect(self.mainTimer, QtCore.SIGNAL("timeout()"), self.update)
        self.mainTimer.start(5)

        settings = QtCore.QSettings()
        self.restoreGeometry(settings.value("MainWindow/Geometry").toByteArray())
        self.restoreState(settings.value("MainWindow/DockWindows").toByteArray())
        if not self.prefDialog.setCfgPath(settings.value("Preferences/moduleCfgPath").toString()):
            self.prefDialog.show()
            self.moduleManager.moduleCfgPath = self.prefDialog.moduleCfgPath

        else:
            self.moduleManager.moduleCfgPath = self.prefDialog.moduleCfgPath
            
        self.prefDialog.setExternalEditorPath(str(settings.value("Preferences/externalEditorPath").toString()))
        
        if self.prefDialog.moduleCfgPath is not None:
            self.moduleDirectoryViewWin.modulesPath = self.prefDialog.moduleCfgPath.replace("modules.cfg", "")

        
        self.moduleManager.setModuleExplorer(self.moduleExplorerWin)
        self.moduleManager.setModuleDirView(self.moduleDirectoryViewWin)
        self.moduleManager.setPropertyWindow(self.objectPropertyWin)
        self.moduleManager.setContextMenuCallback(self.onContextMenuCallback)
        
        self.setWindowIcon(QIcon("media/icons/lockenwickler_provisorium_small.png"))
        self.setWindowTitle("Rastullahs Lockenwickler")
        
        self.editorSetupFinished = False
        
#        splash.finish(self)

    def createAction(self, text, slot=None, shortcut=None, icon=None, tip=None, checkable=False, signal="triggered()"):
        action = QtGui.QAction(text, self)
        if icon is not None:
            action.setIcon(QtGui.QIcon("media/icons/%s" % icon))
        if shortcut is not None:
            action.setShortcut(shortcut)
        if tip is not None:
            action.setToolTip(tip)
            action.setStatusTip(tip)
        if slot is not None:
            self.connect(action, QtCore.SIGNAL(signal), slot)

        action.setCheckable(checkable)

        return action

    def addActions(self, target, actions):
        for act in actions:
            if act is None:
               target.addSeparator()
            else:
                target.addAction(act)

    def setupUi(self):
        self.setObjectName("MainWindow")

        self.centralwidget = QtGui.QWidget(self)
        self.centralwidget.setObjectName("centralwidget")

        self.hboxlayout = QtGui.QHBoxLayout(self.centralwidget)
        self.hboxlayout.setContentsMargins(0, 0, 0, 0)
        self.hboxlayout.setObjectName("hboxlayout")

        self.gridlayout = QtGui.QGridLayout()
        self.gridlayout.setObjectName("gridlayout")
        self.gridlayout.setContentsMargins(0, 0, 0, 0)
        
        self.menubar = QtGui.QMenuBar(self)
        self.menubar.setObjectName("menubar")

        self.menuFile = QtGui.QMenu(self.menubar)
        self.menuFile.setObjectName("menuFile")

        self.menuEdit = QtGui.QMenu(self.menubar)
        self.menuEdit.setObjectName("menuEdit")

        self.menuView = QtGui.QMenu(self.menubar)
        self.menuView.setObjectName("menuView")
        self.setMenuBar(self.menubar)


        self.statusbar = QtGui.QStatusBar(self)
        self.statusbar.setObjectName("statusbar")
        self.setStatusBar(self.statusbar)

#####################################
        self.actionNeu =self.createAction("&New Module",  self.actionNewSlot,  QKeySequence.New,  "filenew.png",  "New Module")
        self.actionNeu.setObjectName("actionNeu")

        self.actionOpen = self.createAction("&Open Module",  self.actionOpenSlot,  QKeySequence.Open,  "fileopen.png",  "Open Module")
        self.actionOpen.setObjectName("actionOpen")
        
        self.actionSave = self.createAction("&Save",  self.actionSaveSlot,  QKeySequence.Save,  "filesave.png",  "Save Module")
        self.actionSave.setObjectName("actionSave")
        
        self.actionRunModule = self.createAction("&Save and Run",  self.actionRunModuleSlot,  "Alt+R",  "fileexport.png",  "Save And Run Module")
        self.actionRunModule.setObjectName("actionRunModule")

        self.actionClose = self.createAction("Quit",  self.actionQuitSlot,  "Alt+Q",  "exit.png",  "Quit")
        self.actionClose.setObjectName("actionQuit")
#####################################


#####################################
        self.actionDelete = self.createAction("Delete",  self.actionDeleteSlot,  QKeySequence.Delete,  "editdelete.png",  "Delete")
        self.actionDelete.setObjectName("actionDelete")

        self.actionCopy = self.createAction("Copy",  self.actionCopySlot,  QKeySequence.Copy,  "editcopy.png",  "Copy")
        self.actionCopy.setObjectName("actionCopy")

        self.actionCut = self.createAction("Cut",  self.actionCutSlot,  QKeySequence.Cut,  "editcut.png",  "Cut")
        self.actionCut.setObjectName("actionCut")

        self.actionPaste = self.createAction("Paste",  self.actionPasteSlot,  QKeySequence.Paste,  "editpaste.png",  "Paste")
        self.actionPaste.setObjectName("actionPaste")

        self.actionSelect = self.createAction("&Select",  self.actionSelectSlot,  "Space",  "cursor.png",  "Move selected object")
        self.actionSelect.setObjectName("actionSelect")

        self.actionMove = self.createAction("&Move",  self.actionMoveSlot,  "g",  "move.png",  "Move selected object")
        self.actionMove.setObjectName("actionMove")

        self.actionRotate = self.createAction("&Rotate",  self.actionRotateSlot,  "r",  "rotate.png",  "Rotate selected object")
        self.actionRotate.setObjectName("actionRotate")

        self.actionScale = self.createAction("&Scale",  self.actionScaleSlot,  "x",  "resizecol.png",  "Scale selected object")
        self.actionScale.setObjectName("actionScale")

        self.actionOneClickEntityPlacement = self.createAction("&OneClickEntityPlacement",  self.actionOneClickEntityPlacementSlot,  "",  "resizecol.png",  "Add an Entity just by a click")
        self.actionOneClickEntityPlacement.setObjectName("actionOneClickEntityPlacement")
        self.actionOneClickEntityPlacement.setCheckable(True)

#####################################
#####################################
        self.actionSceneExplorer = self.createAction("&Scene Exlporer",  self.toggleModuleExplorer,  "Alt+E",  "view_tree.png",  "Module Explorer",  False)
        self.actionSceneExplorer.setObjectName("actionSceneExplorer")
        
        self.actionModuleDirView = self.createAction("&Directory Explorer",  self.toggleModuleDirView,  "Alt+D",  "view_tree.png",  "Module Directory Explorer",  False)
        self.actionModuleDirView.setObjectName("actionDirectoryExplorer")
        
        self.actionPreferences = self.createAction("&Preferences",  self.togglePreferencesWindow,  None,  "configure.png",  "Lockenwickler Preferences",  False)
        self.actionPreferences.setObjectName("actionPreferences")

        self.actionProperty_Window = self.createAction("Pr&operty Window",  self.togglePropertyWindow,  "Alt+P",  "unsortedlist1.png",  "Property Window")
        self.actionProperty_Window.setObjectName("actionProperty_Window")

        self.actionObject_Selection = self.createAction("&Model Preview Window",  self.toggleModelPreviewWindow,  "Alt+O",  "tux.png",  "Model Preview")
        self.actionObject_Selection.setObjectName("actionObject_Selection")
        
        self.actionMaterial_Selection = self.createAction("Material &Preview Window",  self.toggleMaterialPreviewWindow,  "Alt+M",  "colors.png",  "Material Preview")
        self.actionMaterial_Selection.setObjectName("actionMaterial_Selection")

        self.actionGameObjectClass_Selection = self.createAction("&Game Object Class Preview Window",  self.toggleGameObjectViewWindow,  "Ctrl+G",  "multirow.png",  "GameObjectClass Preview")
        self.actionGameObjectClass_Selection.setObjectName("actionObject_Selection")

        self.actionConsole_Window = self.createAction("&Console Window",  self.toggleConsoleWindow,  "Alt+C",  "console.png",  "Console Window")
        self.actionConsole_Window.setObjectName("actionConsole_Window")
        
        self.actionTerrainTools_Window = self.createAction("&Terrain Tools",  self.toggleTerrainToolsWindow,  "Alt+T",  "terrain_small.png",  "Console Window")
        self.actionTerrainTools_Window.setObjectName("actionTerrainTools_Window")
        
        self.actionToggleViewportGrid = self.createAction("&Toggle Grid",  self.toggleViewportGrid,  "Alt+G",  "console.png",  "Toggle Viewport Grid")
        self.actionToggleViewportGrid.setObjectName("actionToggleViewportGrid")

#####################################
#####################################


        self.menuFile.addAction(self.actionNeu)
        self.menuFile.addAction(self.actionOpen)
        self.menuFile.addAction(self.actionSave)
        self.menuFile.addAction(self.actionRunModule)
        self.menuFile.addAction(self.actionClose)

        self.menuEdit.addAction(self.actionSelect)
        self.menuEdit.addAction(self.actionMove)
        self.menuEdit.addAction(self.actionRotate)
        self.menuEdit.addAction(self.actionScale)
        self.menuEdit.addSeparator()
        self.menuEdit.addAction(self.actionDelete)
        self.menuEdit.addAction(self.actionCopy)
        self.menuEdit.addAction(self.actionCut)
        self.menuEdit.addAction(self.actionPaste)
        self.menuEdit.addSeparator()
        self.menuEdit.addAction(self.actionOneClickEntityPlacement)
        

        self.menuView.addAction(self.actionSceneExplorer)
        self.menuView.addAction(self.actionModuleDirView)
        self.menuView.addAction(self.actionPreferences)
        self.menuView.addAction(self.actionProperty_Window)
        self.menuView.addAction(self.actionObject_Selection)
        self.menuView.addAction(self.actionMaterial_Selection)
        self.menuView.addAction(self.actionGameObjectClass_Selection)
        self.menuView.addAction(self.actionConsole_Window)
        self.menuView.addAction(self.actionTerrainTools_Window)
        self.menuView.addAction(self.actionToggleViewportGrid)
        
        self.menubar.addAction(self.menuFile.menuAction())
        self.menubar.addAction(self.menuEdit.menuAction())
        self.menubar.addAction(self.menuView.menuAction())

        self.retranslateUi()
        QtCore.QMetaObject.connectSlotsByName(self)

    def retranslateUi(self):
        self.setWindowTitle(QtGui.QApplication.translate("MainWindow", "MainWindow", None, QtGui.QApplication.UnicodeUTF8))
        self.menuFile.setTitle(QtGui.QApplication.translate("MainWindow", "File", None, QtGui.QApplication.UnicodeUTF8))
        self.menuEdit.setTitle(QtGui.QApplication.translate("MainWindow", "Edit", None, QtGui.QApplication.UnicodeUTF8))
        self.menuView.setTitle(QtGui.QApplication.translate("MainWindow", "View", None, QtGui.QApplication.UnicodeUTF8))
        self.actionNeu.setText(QtGui.QApplication.translate("MainWindow", "New Module", None, QtGui.QApplication.UnicodeUTF8))
        self.actionMove.setText(QtGui.QApplication.translate("MainWindow", "Move", None, QtGui.QApplication.UnicodeUTF8))
        self.actionRotate.setText(QtGui.QApplication.translate("MainWindow", "Rotate", None, QtGui.QApplication.UnicodeUTF8))
        self.actionSceneExplorer.setText(QtGui.QApplication.translate("MainWindow", "Module Explorer", None, QtGui.QApplication.UnicodeUTF8))
        self.actionPreferences.setText(QtGui.QApplication.translate("MainWindow", "Preferences", None, QtGui.QApplication.UnicodeUTF8))
        self.actionProperty_Window.setText(QtGui.QApplication.translate("MainWindow", "Property Window", None, QtGui.QApplication.UnicodeUTF8))
        self.actionObject_Selection.setText(QtGui.QApplication.translate("MainWindow", "Object Selection", None, QtGui.QApplication.UnicodeUTF8))
        self.actionClose.setText(QtGui.QApplication.translate("MainWindow", "Quit", None, QtGui.QApplication.UnicodeUTF8))
        self.actionConsole_Window.setText(QtGui.QApplication.translate("MainWindow", "Console Window", None, QtGui.QApplication.UnicodeUTF8))

    def setupOgre(self, pluginCfgPath="./Plugins.cfg", ogreCfgPath="./ogre.cfg", logPath="./ogre.log"):
        if platform.system() == "Windows":
            pluginCfgPath="./Plugins-windows.cfg"
        else:
            pluginCfgPath="./Plugins-linux.cfg"

        root = og.Root(pluginCfgPath, ogreCfgPath, logPath)
        self.ogreRoot = root

        if  not self.ogreRoot.restoreConfig() and not self.ogreRoot.showConfigDialog():
            sys.exit('Quit from Config Dialog')

        root.initialise(False)

        self.pivotRenderQueueListener = PivotRenderQueueListener()
        self.OgreMainWinSceneMgr = self.ogreRoot.createSceneManager(og.ST_GENERIC, "OgreMainWinSceneMgr")
        self.OgreMainWinSceneMgr.ambientLight = og.ColourValue(4, 4, 4)
        self.OgreMainWinSceneMgr.addRenderQueueListener(self.pivotRenderQueueListener)
        
        self.moduleName = ""
        self.myTerrainManager = MyTerrainManager(self.OgreMainWinSceneMgr)
        self.moduleManager = ModuleManager(self.ogreRoot,  self.OgreMainWinSceneMgr)
        self.moduleManager.myTerrainManager = self.myTerrainManager
        self.gocManager = self.moduleManager.gocManager
        
        self.ogreMainWindow = OgreMainWindow.OgreMainWindow(self.moduleManager,  root,  self.OgreMainWinSceneMgr,  self)
        self.gridlayout.addWidget(self.ogreMainWindow,0,0,1,1)
        self.hboxlayout.addLayout(self.gridlayout)
        self.setCentralWidget(self.centralwidget)
        
        self.myTerrainManager.ogreMainWindow = self.ogreMainWindow
        
        oglog = og.LogManager.getSingleton().getDefaultLog()
        oglog.addListener(self.consoleWindow.lockenLog)

    def finishEditorSetup(self):
        if not self.editorSetupFinished:
            og.ResourceGroupManager.getSingleton().addResourceLocation("./media", "FileSystem", "General", False)
            og.ResourceGroupManager.getSingleton().initialiseAllResourceGroups()

            self.moduleManager.pivot = Pivot(self.OgreMainWinSceneMgr)
            self.moduleManager.pivot.hide()
            self.editorSetupFinished = True
        
    def update(self):
        self.ogreRoot.renderOneFrame()
        if platform.system() == "Linux":
            self.ogreMainWindow.updateRenderWindow()
            self.modelSelectionDialog.updateRenderWindow()
            self.materialSelectionDialog.updateRenderWindow()

    def actionOpenSlot(self):
        self.finishEditorSetup()
        self.moduleManager.openLoadModuleDialog()

    def actionNewSlot(self):
        newModuleWiz = NewModuleWizard(self.moduleManager, self)
        newModuleWiz.exec_()
        return
        
    def actionSaveSlot(self):
        self.moduleManager.save()
        
    def actionRunModuleSlot(self):
        self.moduleManager.save()
        if platform.system() == "Windows":
            workingDir = self.prefDialog.moduleCfgPath.replace("/modules/modules.cfg", "")
            executable = os.path.join(workingDir, "Rastullah.exe")
            executable = executable.replace("/",  "\\")
            if os.path.isfile(executable):
                subprocess.Popen([executable, "--module", self.moduleManager.mainModule.name], 0, None, None, None, None, None, False, False, workingDir)

        
    def actionQuitSlot(self):
        self.close()

    def actionDeleteSlot(self):
        self.moduleManager.deleteObjects()

    def actionCopySlot(self):
        self.moduleManager.copyObjects()

    def actionCutSlot(self):
        self.moduleManager.cutObjects()

    def actionPasteSlot(self):
        self.moduleManager.pasteObjects(self.ogreMainWindow.getCameraToViewportRay())

    def actionSelectSlot(self):
        self.moduleManager.pivot.hide()

    def actionMoveSlot(self):
        self.moduleManager.pivot.setMoveMode()

    def actionRotateSlot(self):
        self.moduleManager.pivot.setRotateMode()

    def actionScaleSlot(self):
        self.moduleManager.pivot.setScaleMode()

    def actionOneClickEntityPlacementSlot(self):
        self.moduleManager.setOneClickEntityPlacement(self.actionOneClickEntityPlacement.isChecked())

    def togglePreferencesWindow(self):
        if self.prefDialog.isHidden():
            self.prefDialog.show()
        else:
            self.prefDialog.hide()

    def toggleModelPreviewWindow(self):
        if self.modelSelectionDock.isHidden():
            self.modelSelectionDock.show()
        else:
            self.modelSelectionDock.hide()
    
    def toggleMaterialPreviewWindow(self):
        if self.materialSelectionDock.isHidden():
            self.materialSelectionDock.show()
        else:
            self.materialSelectionDock.hide()

    def toggleGameObjectViewWindow(self):
        if self.gameObjectClassViewDock.isHidden():
            self.gameObjectClassViewDock.show()
        else:
            self.gameObjectClassViewDock.hide()

    def toggleModuleExplorer(self):
        if self.moduleExplorerDock.isHidden():
            self.moduleExplorerDock.show()
        else:
            self.moduleExplorerDock.hide()
            
    def toggleModuleDirView(self):
        if self.moduleDirectoryViewDock.isHidden():
            self.moduleDirectoryViewDock.show()
        else:
            self.moduleDirectoryViewDock.hide()

    def togglePropertyWindow(self):
        if self.propertyDock.isHidden():
            self.propertyDock.show()
        else:
            self.propertyDock.hide()

    def toggleConsoleWindow(self):
        if self.consoleDock.isHidden():
            self.consoleDock.show()
        else:
            self.consoleDock.hide()
            
    def toggleTerrainToolsWindow(self):
        if self.myTerrainManagerDock.isHidden():
            self.myTerrainManagerDock.show()
        else:
            self.myTerrainManagerDock.hide()

    def toggleViewportGrid(self):
        self.ogreMainWindow.toggleViewportGrid()

    def createDockWindows(self):
        self.modelSelectionDock = QtGui.QDockWidget(self.tr("Models"), self)
        self.modelSelectionDock.setObjectName("ModelSelectionDockWindow")
        self.modelSelectionDock.setAllowedAreas(QtCore.Qt.LeftDockWidgetArea | QtCore.Qt.RightDockWidgetArea)
        self.modelSelectionDock.setWidget(self.modelSelectionDialog)
        self.addDockWidget(QtCore.Qt.RightDockWidgetArea, self.modelSelectionDock)
        
        self.materialSelectionDock = QtGui.QDockWidget(self.tr("Materials"), self)
        self.materialSelectionDock.setObjectName("MaterialSelectionDockWindow")
        self.materialSelectionDock.setAllowedAreas(QtCore.Qt.LeftDockWidgetArea | QtCore.Qt.RightDockWidgetArea)
        self.materialSelectionDock.setWidget(self.materialSelectionDialog)
        self.addDockWidget(QtCore.Qt.RightDockWidgetArea, self.materialSelectionDock)
        self.tabifyDockWidget(self.modelSelectionDock, self.materialSelectionDock)
        
        self.gameObjectClassViewDock = QtGui.QDockWidget(self.tr("GameObjectClasses"), self)
        self.gameObjectClassViewDock.setObjectName("GameObjectClassView")
        self.gameObjectClassViewDock.setAllowedAreas(QtCore.Qt.LeftDockWidgetArea | QtCore.Qt.RightDockWidgetArea)
        self.gameObjectClassViewDock.setWidget(self.gameObjectClassView)
        self.addDockWidget(QtCore.Qt.RightDockWidgetArea, self.gameObjectClassViewDock)        
        self.tabifyDockWidget(self.modelSelectionDock, self.gameObjectClassViewDock)
        
        self.propertyDock = QtGui.QDockWidget(self.tr("Properties"), self)
        self.propertyDock.setObjectName("PropertyDockWindow")
        self.propertyDock.setAllowedAreas(QtCore.Qt.LeftDockWidgetArea | QtCore.Qt.RightDockWidgetArea)
        self.propertyDock.setWidget(self.objectPropertyWin)
        self.addDockWidget(QtCore.Qt.LeftDockWidgetArea, self.propertyDock)

        self.moduleExplorerDock = QtGui.QDockWidget(self.tr("Module Explorer"), self)
        self.moduleExplorerDock.setObjectName("ModuleExplorerDockWindow")
        self.moduleExplorerDock.setAllowedAreas(QtCore.Qt.LeftDockWidgetArea | QtCore.Qt.RightDockWidgetArea)
        self.moduleExplorerDock.setWidget(self.moduleExplorerWin)
        self.addDockWidget(QtCore.Qt.LeftDockWidgetArea, self.moduleExplorerDock)
        self.tabifyDockWidget(self.moduleExplorerDock, self.propertyDock)
        
        self.moduleDirectoryViewDock = QtGui.QDockWidget(self.tr("Module Directory View"), self)
        self.moduleDirectoryViewDock.setObjectName("ModuleDirectoryViewDockWindow")
        self.moduleDirectoryViewDock.setAllowedAreas(QtCore.Qt.LeftDockWidgetArea | QtCore.Qt.RightDockWidgetArea | QtCore.Qt.TopDockWidgetArea | QtCore.Qt.BottomDockWidgetArea)
        self.moduleDirectoryViewDock.setWidget(self.moduleDirectoryViewWin)
        self.addDockWidget(QtCore.Qt.LeftDockWidgetArea, self.moduleDirectoryViewDock)
        
        self.myTerrainManagerDock = self.myTerrainManager.getDockWidget(self)
        
        self.consoleDock = QtGui.QDockWidget(self.tr("Console"), self)
        self.consoleDock.setObjectName("ConsoleDockWindow")
        self.consoleDock.setAllowedAreas(QtCore.Qt.BottomDockWidgetArea | QtCore.Qt.TopDockWidgetArea)
        self.consoleDock.setWidget(self.consoleWindow)
        self.addDockWidget(QtCore.Qt.BottomDockWidgetArea, self.consoleDock)

        self.fileToolBar = self.addToolBar("File Toolbar")
        self.fileToolBar.setObjectName("FileToolBar")
        self.fileToolBar.setAllowedAreas(QtCore.Qt.TopToolBarArea | QtCore.Qt.BottomToolBarArea)
        self.fileToolBar.addAction(self.actionNeu)
        self.fileToolBar.addAction(self.actionOpen)
        self.fileToolBar.addAction(self.actionSave)
        self.fileToolBar.addAction(self.actionRunModule)
        self.fileToolBar.addAction(self.actionClose)
        self.addToolBar(QtCore.Qt.TopToolBarArea, self.fileToolBar)

        self.moveToolBar = self.addToolBar("Transformation Bar")
        self.moveToolBar.setObjectName("TransformationBar")
        self.moveToolBar.setAllowedAreas(QtCore.Qt.TopToolBarArea | QtCore.Qt.BottomToolBarArea)
        self.moveToolBar.addAction(self.actionSelect)
        self.moveToolBar.addAction(self.actionMove)
        self.moveToolBar.addAction(self.actionRotate)
        self.moveToolBar.addAction(self.actionScale)
        self.addToolBar(QtCore.Qt.TopToolBarArea, self.moveToolBar)

    def keyPressEvent(self,  event):
        if not event.isAutoRepeat():
            self.ogreMainWindow.keyPressEvent(event)

    def keyReleaseEvent(self,  event):
        if not event.isAutoRepeat():
            self.ogreMainWindow.keyReleaseEvent(event)
        pass

    def onContextMenuCallback(self, actions, menus):
        menu = QMenu("My Menu!!")
        menu.addAction(self.actionDelete)
        menu.addAction(self.actionCopy)
        menu.addAction(self.actionCut)
        menu.addAction(self.actionPaste)
        menu.addSeparator()
        
        for m in menus:
            menu.addMenu(m)
        for a in actions:
            menu.addAction(a)

        menu.exec_(QCursor.pos())

    def connectActionButtons(self):
        pass

    def saveOnClose(self):
#        reply = QtGui.QMessageBox.question(self,  "Rastullahs Lockenwickler - Unsaved Chages",  "Save unsaved changes?",  QtGui.QMessageBox.Yes|QtGui.QMessageBox.No|QtGui.QMessageBox.Cancel)
#        if reply == QtGui.QMessageBox.Cancel:
#            return False
#        if reply == QtGui.QMessageBox.Yes:
#            print""
#            #TODO: implement save here
        return True

    def closeEvent(self,  event):
        if self.saveOnClose():
            settings = QtCore.QSettings()
            settings.setValue("Preferences/moduleCfgPath", QtCore.QVariant(self.prefDialog.lineEdit.text()))
            settings.setValue("MainWindow/Geometry",  QtCore.QVariant(self.saveGeometry()))
            settings.setValue("MainWIndow/DockWindows",  QtCore.QVariant(self.saveState()))
            settings.setValue("Preferences/externalEditorPath",  QtCore.QVariant(self.prefDialog.externalTextAppLineEdit.text()))
            #self.ogreRoot.shutdown()
        else:
            event.ignore()
Beispiel #9
0
def main():
    preprocesser = PreProcessor()
    mm = ModuleManager()

    def generate_random_points_on_hyperellipsoid(vol_data,
                                                 cor_data,
                                                 alpha_vec=np.array(
                                                     [0.9, 0.95, 0.975, 0.99]),
                                                 n_sample=int(1e4),
                                                 dim=30):
        header = alpha_vec
        result = pd.DataFrame(columns=header)
        for i in range(vol_data.shape[0]):
            start_time = time.time()
            var_estimates = []
            vol_mat = np.diag(vol_data.iloc[i, :])
            cor_mat = preprocesser.construct_correlation_matrix(
                corr_vec=cor_data.iloc[i, :], n=dim)
            H = preprocesser.construct_covariance_matrix(vol_matrix=vol_mat,
                                                         corr_matrix=cor_mat)
            r = np.random.randn(H.shape[0], n_sample)
            # u contains random points on the unit hypersphere
            u = r / np.linalg.norm(r, axis=0)
            for alpha in alpha_vec:
                y = np.sqrt(chi2.ppf(q=alpha, df=dim))
                # Transform points on the unit hypersphere to the hyperellipsoid
                xrandom = sqrtm(H).dot(np.sqrt(y) * u)
                # Compute the lowest (equally) weighted average of random points on the hyperellipsoid.
                # This is the maximum loss with alpha percent probability, i.e. Value-at-Risk
                xrandom_min = np.max(
                    np.abs(np.array([np.mean(x) for x in xrandom.T])))
                var_estimates.append(xrandom_min)
            result = pd.merge(result,
                              pd.DataFrame(np.asarray(var_estimates).reshape(
                                  1, -1),
                                           columns=header),
                              how='outer')
            print((i, time.time() - start_time))
        return result

    ##################################################################################################################
    ###                                      Multivariate Quantile Computation                                     ###
    ##################################################################################################################
    dim = 30
    vol_data = mm.load_data(
        'multivariate_analysis/volatilities_garch_norm_DJI30_2000_2001.pkl')
    #cor_data = mm.load_data('multivariate_analysis/cor_DCC_mvnorm_DJI30_1994_1995.pkl')
    cor_data = mm.load_data(
        'multivariate_analysis/pearson/pearson_cor_estimates/cor_knn5_pearson_10_DJI30_2000_2001.pkl'
    )

    result = generate_random_points_on_hyperellipsoid(vol_data=vol_data,
                                                      cor_data=cor_data)
    print(result)
    #mm.save_data('multivariate_analysis/VaR/var_dcc_mvnorm_1994_1995_nsample_1e6.pkl', result)
    #mm.transform_pickle_to_csv('multivariate_analysis/VaR/var_dcc_mvnorm_1994_1995_nsample_1e6.pkl')
    mm.save_data(
        'multivariate_analysis/VaR/var_knn5_pearson_garch_2000_2001_nsample_1e5_sqrt_chi2.pkl',
        result)
    mm.transform_pickle_to_csv(
        'multivariate_analysis/VaR/var_knn5_pearson_garch_2000_2001_nsample_1e5_sqrt_chi2.pkl'
    )
Beispiel #10
0
 def __init__(self):
     """Initializer PreProcessor object."""
     self.ta = TechnicalAnalyzer()
     self.mm = ModuleManager()
Beispiel #11
0
class PreProcessor(object):
    """Preprocessor class. This class has the responsibility to preprocess the data. More specifically, the class
    has the task of simulating random correlated asset paths in the bivariate case. Additionally, the class has the
    responsibility for estimating the uncertainty in the output variable through a bootstrap resampling procedure."""
    def __init__(self):
        """Initializer PreProcessor object."""
        self.ta = TechnicalAnalyzer()
        self.mm = ModuleManager()

    def simulate_random_correlation_ar(self, T, a0, a1):
        """Simulate a random correlation process with highly persistent time-varying correlations following an
           auto-regressive process. Add noise with ar process
        :param T: simulation length
        :param a0:
        :param a1:
        :return: random_corr: correlation process following specified dynamics."""
        eps = 1e-5
        random_corr = np.empty(T)
        random_corr[0] = a0 / (1 - a1)  # initialise random correlation process
        for t in range(1, T):
            eta = np.random.normal(0, 0.2)
            random_corr[t] = np.maximum(
                -1 + eps,
                np.minimum(1 - eps, a0 + a1 * random_corr[t - 1] + eta))
        return random_corr

    def simulate_correlated_asset_paths(self, corr_vector, vol_matrix, T):
        """Simulate asset paths with specified time-varying correlation dynamics.
        :param corr_vector: time-varying correlation vector
        :param vol_matrix: volatility matrix
        :param T: simulation length
        :return: correlated_asset_paths: simulated asset paths with specified correlation dynamics."""
        if corr_vector.ndim == 1:
            size = 2
        else:
            size = corr_vector.shape[1]  # no of columns, i.e. no of assets
        z = np.random.normal(
            0, 1,
            (T,
             size))  # T-by-number of assets draws from N(0,1) random variable
        correlated_asset_paths = np.empty([
            T, size
        ])  # initialise Txsize dimensional array for correlated asset paths
        for t, rho in enumerate(corr_vector):
            corr_matrix = self.construct_correlation_matrix(rho, size)
            cov_matrix = self.construct_covariance_matrix(
                vol_matrix, corr_matrix)
            cholesky_factor = self.cholesky_factorization(
                cov_matrix)  # Cholesky decomposition
            correlated_asset_paths[t] = np.dot(
                cholesky_factor,
                z[t].transpose())  # Generating Y_t = H_t^(0.5) * z_t
        return correlated_asset_paths

    def construct_correlation_matrix(self, corr_vec, n):
        """Method for constructing time-varying correlation matrix given a time-varying correlations vector.
        :param corr_vec: time-varying correlation vector
        :param n: dimension correlation matrix
        :return corr_matrix: time-varying correlation matrix"""
        corr_triu = np.zeros((n, n))
        iu1 = np.triu_indices(
            n, 1
        )  # returns indices for upper-triangular matrix with diagonal offset of 1
        corr_triu[
            iu1] = corr_vec  # Assign vector correlations to corresponding upper-triangle matrix indices
        corr_matrix = corr_triu + corr_triu.T + np.eye(
            n)  # Transform upper-triangular matrix into symmetric matrix
        return corr_matrix

    def construct_covariance_matrix(self, vol_matrix, corr_matrix):
        """Method for constructing time-varying covariance matrix given a time-varying correlations matrix and asset
        volatility vector.
        :param vol_matrix: diagonal matrix containing asset volatilities
        :param corr_matrix: time-varying correlation matrix
        :return: cov_matrix: time-varying covariance matrix."""
        cov_matrix = np.dot(vol_matrix, np.dot(corr_matrix, vol_matrix))
        return cov_matrix

    def cholesky_factorization(self, cov_matrix):
        """Method for matrix decomposition through Cholesky factorization. The Cholesky factorization states that every
        symmetric positive definite matrix A has a unique factorization A = LL' where L is a lower-triangular matrix and
        L' is its conjugate transpose.
        :param cov_matrix: time-varying positive definite covariance matrix
        :return: cholesky_factor: cholesky decomposition lower-triangular matrix L such that LL' = cov_matrix"""
        cholesky_factor = np.linalg.cholesky(cov_matrix)
        return cholesky_factor

    def determinant_LU_factorization(self, corr_vec, n):
        """Method for determining the determinant of a given matrix. Determinants are computed using
        LU factorization.
        :param corr_vec: time-varying correlation vector
        :param n: dimension correlation matrix
        :return: determinant."""
        cor_matrix = self.construct_correlation_matrix(corr_vec, n)
        det = np.linalg.det(cor_matrix)
        return det

    def generate_bivariate_dataset(self,
                                   ta,
                                   simulated_data_process,
                                   dt,
                                   proxy_type='pearson',
                                   T=500):
        """Method for generating a bivariate dataset with proxies moving window correlation estimates for covariate set
        and true correlation as the output variables.
        :param ta: technical analyzer object
        :param simulated_data_process: bivariate asset process with predefined correlation dynamics.
        :param dt: window length
        :param proxy_type: type definition of proxy for estimates of true correlation
        :param T: length test set
        :return: datasets with true correlation and proxy for output variable."""
        if proxy_type is 'pearson':
            pearson_estimates = ta.moving_window_correlation_estimation(
                simulated_data_process.iloc[:, :2], dt)
            # Feature set consists of lagged asset price and mw correlation estimate, e.g. x_t = MW_t-1
            dataset = simulated_data_process.iloc[:, :2].shift(
                periods=1, axis='index')  # Dataframe
            dataset['MW_t-1'] = pearson_estimates.shift(periods=1,
                                                        axis='index')
            dataset_proxy = dataset.copy()  # copy feature matrix
            # Dataset with true correlations as target variable and proxies
            dataset['rho_true'] = simulated_data_process['rho']
            dataset_proxy['rho_proxy'] = pearson_estimates
        else:  # Kendall as proxy
            kendall_estimates = ta.moving_window_correlation_estimation(
                simulated_data_process.iloc[:, :2], dt, proxy_type='kendall')
            # Feature set consists of lagged asset price and kendall correlation estimate, e.g. x_t = kendall_t-1
            dataset = simulated_data_process.iloc[:, :2].shift(
                periods=1, axis='index')  # Dataframe
            dataset['Kendall_t-1'] = kendall_estimates.shift(periods=1,
                                                             axis='index')
            dataset_proxy = dataset.copy()  # copy feature matrix
            # Dataset with true correlations as target variable and proxies
            dataset['rho_true'] = simulated_data_process['rho']
            dataset_proxy['rho_proxy'] = kendall_estimates
        return dataset, dataset_proxy

    def generate_multivariate_dataset(self,
                                      ta,
                                      data,
                                      dt,
                                      proxy_type='pearson'):
        """Method for generating a multivariate dataset with moving window estimates as approximation for true
        correlation constructing the set of covariates and output variable.
        :param ta: technical analyzer object
        :param data: dataframe with log returns
        :param dt: window length
        :param proxy_type: type definition of proxy for estimates of true correlation
        :return: dataset with approximated covariates and output variable."""
        correlation_estimates = ta.moving_window_correlation_estimation(
            data, dt, proxy_type=proxy_type)
        # Feature set consists of lagged kendall correlation estimate amd lagged min. and max. asset returns
        dataset = correlation_estimates.shift(periods=1, axis='index')
        dataset['r_min'] = np.min(data, axis=1).shift(periods=1, axis='index')
        dataset['r_max'] = np.max(data, axis=1).shift(periods=1, axis='index')
        # Dataset with proxies
        result = pd.concat([dataset, correlation_estimates],
                           axis=1,
                           join='inner')
        return result

    def bootstrap_moving_window_estimate(self,
                                         data,
                                         delta_t,
                                         T=500,
                                         reps=1000,
                                         ciw=99,
                                         proxy_type='pearson'):
        """Method for measuring the estimation uncertainty associated to the correlation coefficients when moving
        window estimates are used for approximating true correlations.
        :param data: dataset used for the task of bootstrap resampling
        :param T: length of test set
        :param delta_t: window length for moving window estimates of Pearson correlation coefficient
        :param reps: number of bootstrap samples
        :param ciw: confidence interval width
        :param proxy_type: type definition of proxy for estimates of true correlation (pearson, emw, kendall)
        :return: correlation estimates with associated estimation uncertainty."""
        assets_price = data.tail(T + delta_t - 1).iloc[:, :-1]
        assets_price.reset_index(drop=True, inplace=True)
        rho_true = data.tail(T).iloc[:, -1]
        rho_true.reset_index(drop=True, inplace=True)
        rho_estimates = np.full(T, np.nan)
        sd_rho_estimates = np.full(
            T, np.nan)  # bootstrapped standard error of rho estimates
        lower_percentiles = np.full(
            T,
            np.nan)  # Initialisation array containing lower percentile values
        upper_percentiles = np.full(
            T,
            np.nan)  # Initialisation array containing upper percentile values
        p_low = (100 - ciw) / 2
        p_high = 100 - p_low

        for j, t in enumerate(range(delta_t, T + delta_t)):
            sampling_data = np.asarray(assets_price.iloc[t - delta_t:t, :])
            # Bootstrap resampling procedure:
            # draw sample of size delta_t by randomly extracting time units with uniform probability, with replacement.
            rho_bootstrapped = np.full(reps, np.nan)
            for rep in range(reps):
                indices = np.random.randint(low=0,
                                            high=sampling_data.shape[0],
                                            size=delta_t)
                sample = sampling_data[indices]
                if proxy_type is 'emw':
                    # Setup bootstrap procedure for weighted moving window estimates
                    w = self.ta.exponential_weights(delta_t, delta_t / 3)
                    weight_vec_raw = w[indices]
                    sum_w = np.sum(weight_vec_raw)
                    weight_vec_norm = [i / sum_w for i in weight_vec_raw
                                       ]  # Re-normalize weights to one
                    rho_bootstrapped[rep] = \
                        self.ta.pearson_weighted_correlation_estimation(sample[:, 0], sample[:, 1], delta_t,
                                                                        weight_vec_norm)
                elif proxy_type is 'pearson':
                    rho_bootstrapped[rep] = pearsonr(sample[:, 0],
                                                     sample[:, 1])[0]
                elif proxy_type is 'kendall':
                    rho_bootstrapped[rep] = kendalltau(sample[:, 0],
                                                       sample[:, 1])[0]
                else:
                    print(
                        'Please, choose an option from the supported set of proxies for true correlations (Pearson '
                        'moving window or Kendall moving window')
            lower, upper = np.nanpercentile(rho_bootstrapped, [p_low, p_high])
            lower_percentiles[j] = lower
            upper_percentiles[j] = upper
            rho_estimates[j] = np.nanmean(rho_bootstrapped)
            sd_rho_estimates[j] = np.nanstd(rho_bootstrapped)
        return rho_estimates, lower_percentiles, upper_percentiles, sd_rho_estimates

    def bootstrap_learner_estimate(self,
                                   data,
                                   T=500,
                                   reps=1000,
                                   ciw=99,
                                   model='knn',
                                   n_neighbors=5):
        """"Method for measuring the estimation uncertainty associated to the correlation coefficients when a learner
        model is used for approximating true correlations.
        :param data: dataset used for the task of bootstrap resampling
        :param T: length of test set
        :param reps: number of bootstrap samples
        :param ciw: confidence interval width
        :param model: learner model (e.g. nearest neighbour or random forest regressors)
        :param n_neighbors: number of multivariate neighbours
        :return: correlation estimates with associated estimation uncertainty."""
        rho_estimates = np.full(T, np.nan)
        sd_rho_estimates = np.full(
            T, np.nan)  # bootstrapped standard error of rho estimates
        lower_percentiles = np.full(
            T,
            np.nan)  # Initialisation array containing lower percentile values
        upper_percentiles = np.full(
            T,
            np.nan)  # Initialisation array containing upper percentile values
        p_low = (100 - ciw) / 2
        p_high = 100 - p_low
        data.drop(data.head(251).index, inplace=True)
        data.reset_index(drop=True, inplace=True)
        t_train_init = data.shape[0] - T  # 1000 for T = 500

        for j, t in enumerate(
                range(t_train_init,
                      data.shape[0])):  # j = {0, 499}, t = {1000, 1499}
            sampling_data = np.asarray(
                data.iloc[:t, :])  # True rolling window is [j:t, :]
            x_test = np.asarray(data.iloc[t, 0:-1])  # This is in fact x_t+1
            y_test = np.asarray(data.iloc[t, -1])  # This is in fact y_t+1
            # Bootstrap resampling procedure:
            # draw sample of size train_set by randomly extracting time units with uniform probability, with replacement
            rho_bootstrapped = np.full(reps, np.nan)
            for rep in range(reps):
                indices = np.random.randint(low=0, high=t, size=t)
                sample = sampling_data[
                    indices]  # Use sample to make a prediction with learner model
                # Separate data into feature and response components
                X = np.asarray(
                    sample[:, 0:-1]
                )  # feature matrix (vectorize data for speed up)
                y = np.asarray(sample[:, -1])  # response vector
                X_train = X[0:t, :]
                y_train = y[0:t]
                # Obtain estimation uncertainty in Pearson correlation estimation rho_t using bootstrap resampling:
                if model is 'knn':
                    knn = KNeighborsRegressor(
                        n_neighbors=5)  # n_neighbors=len(X_train)
                    rho_bootstrapped[rep] = knn.fit(X_train, y_train).predict(
                        x_test.reshape(1, -1))
                elif model is 'rf':
                    rf = RandomForestRegressor(n_jobs=1,
                                               n_estimators=10,
                                               max_features=1).fit(
                                                   X_train, y_train)
                    rho_bootstrapped[rep] = rf.predict(x_test.reshape(1, -1))
                else:
                    print(
                        'Please, choose an option from the supported set of learner algorithms (nearest neighbour, '
                        'random forest)')
            lower, upper = np.nanpercentile(rho_bootstrapped, [p_low, p_high])
            lower_percentiles[j] = lower
            upper_percentiles[j] = upper
            rho_estimates[j] = np.nanmean(rho_bootstrapped)
            sd_rho_estimates[j] = np.nanstd(rho_bootstrapped)
        return rho_estimates, lower_percentiles, upper_percentiles, sd_rho_estimates

    def mse_knn_sensitivity_analysis(self,
                                     proxy_type='pearson',
                                     output_type='true'):
        """Method for creation of a dataframe containing information on MSE decomposition as a function of different
        parameterizations for knn learner model.
        :param proxy_type: type of moving window estimator used as covariate.
        :param output_type: output variable true correlation or proxy.
        :return: dataframe."""
        rho_bias_squared = np.full(1001, np.nan)
        rho_var_vec = np.full(1001, np.nan)
        rho_mse_vec = np.full(1001, np.nan)
        # Load mse decomposition data
        mse_knn5 = self.mm.load_data(
            'bivariate_analysis/%s_cor/mse_results_%s_cor/mse_knn5_%s_%s_cor.pkl'
            % (output_type, output_type, proxy_type, output_type))
        mse_knn10 = self.mm.load_data(
            'bivariate_analysis/%s_cor/mse_results_%s_cor/mse_knn10_%s_%s_cor.pkl'
            % (output_type, output_type, proxy_type, output_type))
        mse_knn25 = self.mm.load_data(
            'bivariate_analysis/%s_cor/mse_results_%s_cor/mse_knn25_%s_%s_cor.pkl'
            % (output_type, output_type, proxy_type, output_type))
        mse_knn50 = self.mm.load_data(
            'bivariate_analysis/%s_cor/mse_results_%s_cor/mse_knn50_%s_%s_cor.pkl'
            % (output_type, output_type, proxy_type, output_type))
        mse_knn_100_to_1000 = self.mm.load_data(
            'bivariate_analysis/%s_cor/mse_results_%s_cor/'
            'mse_knn100_to_1000_%s_%s_cor.pkl' %
            (output_type, output_type, proxy_type, output_type))
        # Creation of dataframe
        rho_mse_vec[5], rho_bias_squared[5], rho_var_vec[5] = mse_knn5.iloc[
            10, :]
        rho_mse_vec[10], rho_bias_squared[10], rho_var_vec[
            10] = mse_knn10.iloc[10, :]
        rho_mse_vec[25], rho_bias_squared[25], rho_var_vec[
            25] = mse_knn25.iloc[10, :]
        rho_mse_vec[50], rho_bias_squared[50], rho_var_vec[
            50] = mse_knn50.iloc[10, :]
        rho_mse_vec[100], rho_bias_squared[100], rho_var_vec[
            100] = mse_knn_100_to_1000.iloc[1, :]
        rho_mse_vec[200], rho_bias_squared[200], rho_var_vec[
            200] = mse_knn_100_to_1000.iloc[2, :]
        rho_mse_vec[300], rho_bias_squared[300], rho_var_vec[
            300] = mse_knn_100_to_1000.iloc[3, :]
        rho_mse_vec[400], rho_bias_squared[400], rho_var_vec[
            400] = mse_knn_100_to_1000.iloc[4, :]
        rho_mse_vec[500], rho_bias_squared[500], rho_var_vec[
            500] = mse_knn_100_to_1000.iloc[5, :]
        rho_mse_vec[600], rho_bias_squared[600], rho_var_vec[
            600] = mse_knn_100_to_1000.iloc[6, :]
        rho_mse_vec[700], rho_bias_squared[700], rho_var_vec[
            700] = mse_knn_100_to_1000.iloc[7, :]
        rho_mse_vec[800], rho_bias_squared[800], rho_var_vec[
            800] = mse_knn_100_to_1000.iloc[8, :]
        rho_mse_vec[900], rho_bias_squared[900], rho_var_vec[
            900] = mse_knn_100_to_1000.iloc[9, :]
        rho_mse_vec[1000], rho_bias_squared[1000], rho_var_vec[
            1000] = mse_knn_100_to_1000.iloc[10, :]
        # Dataframe with information on MSE decomposition as a function of different learner parameterizations
        data_frame = pd.DataFrame({
            'bias_squared': rho_bias_squared,
            'variance': rho_var_vec,
            'MSE': rho_mse_vec
        })
        return data_frame

    def mse_rf_sensitivity_analysis(self,
                                    rho_true,
                                    proxy_type='pearson',
                                    output_type='true',
                                    type='trees'):
        """Method for creation of a dataframe containing information on MSE decomposition as a function of different
        parameterizations for rf learner model.
        :param rho_true: vector containing true correlation
        :param proxy_type: type of moving window estimator used as covariate.
        :param output_type: output variable true correlation or proxy.
        :return: dataframe."""
        if type is 'trees':
            rho_bias_squared = np.full(1001, np.nan)
            rho_var_vec = np.full(1001, np.nan)
            rho_mse_vec = np.full(1001, np.nan)
            trees = [10, 100, 300, 600, 1000]
            # Load mse decomposition data
            for tree in trees:
                data = self.mm.load_data(
                    'bivariate_analysis/%s_cor/%s/results_rf_%s_%s_cor/'
                    'rf%i_%s_10_estimate_uncertainty_rep_100_%s_corr.pkl' %
                    (output_type, proxy_type, proxy_type, output_type, tree,
                     proxy_type, output_type))
                rho_estimates = data['Rho_estimate']
                rho_bias_squared[tree] = np.mean(
                    np.power(rho_estimates - rho_true, 2))
                rho_var_vec[tree] = np.power(np.mean(data['std rho estimate']),
                                             2)
            rho_mse_vec = np.array(
                [np.sum(pair) for pair in zip(rho_bias_squared, rho_var_vec)])
            data_frame = pd.DataFrame({
                'bias_squared': rho_bias_squared,
                'variance': rho_var_vec,
                'MSE': rho_mse_vec
            })
            filename_save = 'mse_rf_%s_%s_cor_sensitivity_analysis_trees.pkl' % (
                proxy_type, output_type)
            self.mm.save_data(
                'bivariate_analysis/%s_cor/mse_results_%s_cor/' %
                (output_type, output_type) + filename_save, data_frame)
        else:
            rho_bias_squared = np.full(4, np.nan)
            rho_var_vec = np.full(4, np.nan)
            rho_mse_vec = np.full(4, np.nan)
            # Load mse decomposition data
            mse_rf300_1_to_3 = self.mm.load_data(
                'bivariate_analysis/%s_cor/mse_results_%s_cor/'
                'mse_rf300_1_to_3_%s_%s_cor.pkl' %
                (output_type, output_type, proxy_type, output_type))
            rho_mse_vec[1], rho_bias_squared[1], rho_var_vec[
                1] = mse_rf300_1_to_3.iloc[1, :]
            rho_mse_vec[2], rho_bias_squared[2], rho_var_vec[
                2] = mse_rf300_1_to_3.iloc[2, :]
            rho_mse_vec[3], rho_bias_squared[3], rho_var_vec[
                3] = mse_rf300_1_to_3.iloc[3, :]
            # Dataframe with information on MSE decomposition as a function of different learner parameterizations
            data_frame = pd.DataFrame({
                'bias_squared': rho_bias_squared,
                'variance': rho_var_vec,
                'MSE': rho_mse_vec
            })
            filename_save = 'mse_rf_%s_%s_cor_sensitivity_analysis_covariates.pkl' % (
                proxy_type, output_type)
            self.mm.save_data(
                'bivariate_analysis/%s_cor/mse_results_%s_cor/' %
                (output_type, output_type) + filename_save, data_frame)
        return data_frame
Beispiel #12
0
    def __init__(self):
        self.module_manager = ModuleManager(self)
        self.thread_manager = ThreadManager()

        self.active = True
Beispiel #13
0
def main():

    preprocesser = PreProcessor()
    mm = ModuleManager()
    ta = TechnicalAnalyzer()


    ##################################################################################################################
    ###     Asset path simulation using Cholesky Factorization and predefined time-varying correlation dynamics    ###
    ################## ###############################################################################################
    """
    T = 1751
    a0 = 0.1
    a1 = 0.8
    random_corr = preprocesser.simulate_random_correlation_ar(T, a0, a1)
    # Simple volatility matrix with randomly chosen volatilities for illustration purposes
    vol_matrix = np.array([[0.08, 0],
                           [0, 0.1]])
    correlated_asset_paths = preprocesser.simulate_correlated_asset_paths(random_corr, vol_matrix, T)
    data = pd.DataFrame(correlated_asset_paths)
    data['rho'] = random_corr
    mm.save_data('/bivariate_analysis/correlated_sim_data.pkl', data)
    # Figure
    correlated_asset_paths = mm.load_data('bivariate_analysis/correlated_sim_data.pkl')
    correlated_asset_paths = correlated_asset_paths.tail(500)
    correlated_asset_paths.reset_index(drop=True, inplace=True)
    plt.plot(correlated_asset_paths.iloc[:, 0], label='$y_{1,t}$', linewidth=1, color='black')
    plt.plot(correlated_asset_paths.iloc[:, 1], label='$y_{2,t}$', linewidth=1, linestyle='--', color='blue')
    plt.plot(correlated_asset_paths.iloc[:, -1], label='$\\rho_t$', linewidth=1, color='red')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 500)
    plt.ylim(-0.5, 1)
    plt.show()
    """
    ##################################################################################################################
    ###     Estimation uncertainty in Pearson and Kendall correlation coefficient using moving window estimates    ###
    ##################################################################################################################
    simulated_data_process = mm.load_data('/bivariate_analysis/correlated_sim_data.pkl')
    T = 500
    delta_t = [21] #np.arange(3, 252)        # 3, 4, 5, 6, 7, 8, 9, 10, 21, 42, 63, 84, 126, 251
    proxy_type = ['pearson']  # kendall ['mw', 'emw', 'kendall']
    ciw = 99

    """
    for dt, proxy_type in [(x, y) for x in delta_t for y in proxy_type]:
        start_time = time.time()
        print('(%s, %i)' % (proxy_type, dt))
        rho_estimates, lower_percentiles, upper_percentiles, sd_rho_estimates = \
        preprocesser.bootstrap_moving_window_estimate(data=simulated_data_process, delta_t=dt, T=T, ciw=ciw,
                                                      proxy_type=proxy_type)
        data_frame = pd.DataFrame({'Percentile_low': lower_percentiles, 'Percentile_up': upper_percentiles,
                                   'std rho estimate': sd_rho_estimates, 'Rho_estimate': rho_estimates})
        filename = '%s_%i_estimate_uncertainty.pkl' % (proxy_type, dt)
        mm.save_data('bivariate_analysis/' + filename, data_frame)
        print("%s: %f" % ('Execution time:', (time.time() - start_time)))
    """
    """
    # Figures
    for dt, proxy_type in [(x, y) for x in delta_t for y in proxy_type]:
        data = mm.load_data('bivariate_analysis/results_%s/%s_%i_estimate_uncertainty.pkl' % (proxy_type, proxy_type, dt))
        rho_estimates = data['Rho_estimate']
        lower_percentiles = data['Percentile_low']
        upper_percentiles = data['Percentile_up']
        plt.figure()
        plt.plot(simulated_data_process['rho'], label='true correlation', linewidth=1, color='black')
        plt.plot(rho_estimates, label='%s correlation' % proxy_type.upper(), linewidth=1, color='red')
        plt.plot((upper_percentiles-lower_percentiles)-1, label='%d%% interval (bootstrap)'
                                                                % ciw, linewidth=1, color='magenta')
        #plt.plot(lower_percentiles, label='%d%% interval (bootstrap)' % ciw, linewidth=1, color='magenta')
        #plt.plot(upper_percentiles, label="", linewidth=1, color='magenta')
        plt.xlabel('observation')
        plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True,
                   edgecolor='black')
        plt.xlim(0, T)
        plt.yticks(np.arange(-1, 1.00000001, 0.2))
        plt.ylim(-1, 1)
        plt.show()
    """
    ##################################################################################################################
    ###       Mean squared error of Pearson and Kendall correlation coefficient using moving window estimates      ###
    ##################################################################################################################
    simulated_data_process = mm.load_data('/bivariate_analysis/correlated_sim_data.pkl')
    T = 500
    rho_true = simulated_data_process.tail(T).iloc[:, -1]
    rho_true.reset_index(drop=True, inplace=True)
    delta_t_min, delta_t_max = 3, 252
    delta_t = np.arange(3, 252)  # dt = {[3, 10], 21, 42, 63, 126, 251}  (and 84 possibly)
    proxy_type = ['pearson', 'emw', 'kendall']  # run proxies individually otherwise one saves dataframe over other.
    rho_bias_squared = np.full(delta_t_max, np.nan)
    rho_var_vec = np.full(delta_t_max, np.nan)
    """
    # Create dataframe with (interpolated) mse results, squared bias, variance for varying window sizes
    for proxy_type, dt in [(x, y) for x in proxy_type for y in delta_t]:
        print('%s, %i' % (proxy_type, dt))
        data = mm.load_data('bivariate_analysis/%s_%i_estimate_uncertainty.pkl'
                            % (proxy_type, dt))
        rho_estimates = data['Rho_estimate']
        rho_bias_squared[dt] = np.mean(np.power(rho_estimates - rho_true, 2))
        rho_var_vec[dt] = np.power(np.mean(data['std rho estimate']), 2)

    rho_mse_vec = np.array([np.sum(pair) for pair in zip(rho_bias_squared, rho_var_vec)])
    data_frame = pd.DataFrame({'bias_squared': rho_bias_squared, 'variance': rho_var_vec,
                               'MSE': rho_mse_vec})
    filename = 'mse_%s.pkl' % proxy_type
    mm.save_data('bivariate_analysis/' + filename, data_frame)
    """
    """
    # Kendall correlation estimate 
        for col1, col2, in IT.combinations(simulated_data_process.columns[:-1], 2):
            def my_tau(idx):
                df_tau = simulated_data_process[[col1, col2]].iloc[idx+len(simulated_data_process)-T-dt+1]
                return kendalltau(df_tau[col1], df_tau[col2])[0]
            kendall_estimates = pd.rolling_apply(np.arange(T+dt-1), dt, my_tau)
        mse_kendall_vec[dt - 1] = mean_squared_error(rho_true, kendall_estimates[-T:])
    mm.save_data('/bivariate_analysis/mse_kendall_true_corr.pkl', mse_kendall_vec)
    print("%s: %f" % ('Execution time:', (time.time() - start_time)))
    """
    """
    # Load MSE data Pearson/ Kendall
    mse_pearson_vec = mm.load_data('bivariate_analysis/mse_pearson.pkl')
    mse_kendall_vec = mm.load_data('bivariate_analysis/mse_kendall.pkl')
    """
    """
    # Figure without interpolation MSE 
    plt.figure(1)
    plt.plot(mse_pearson_vec['MSE'], label='Pearson', color='indigo', linewidth=1)
    plt.plot(mse_kendall_vec['MSE'], label='Kendall', color='aquamarine', linewidth=1, linestyle='--')
    plt.xlabel('window length')
    plt.ylabel('MSE')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=5, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 250)
    plt.yticks(np.arange(0, 0.61, 0.1))
    plt.ylim(0, 0.6)
    plt.show()
    """
    """
    # Figure without interpolation MSE decomposition 
    plt.figure(2)
    plt.plot(mse_kendall_vec['bias_squared'], label='Squared Bias', color='blue', linewidth=1)
    plt.plot(mse_kendall_vec['variance'], label='Variance', color='red', linewidth=1)
    plt.plot(mse_kendall_vec['MSE'], label='MSE', color='black', linestyle='--', linewidth=1)
    plt.xlabel('window length')
    plt.ylabel('MSE')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=5, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 250)
    plt.yticks(np.arange(0, 0.61, 0.1))
    plt.ylim(0, 0.6)
    plt.show()
    """
    """
    # Variance in MSE window sizes
    var_mse_pearson = np.nanvar(mse_pearson_vec['MSE']); print('mse_pearson_var: %f' % var_mse_pearson)
    var_mse_kendall = np.nanvar(mse_kendall_vec['MSE']); print('mse_kendall_var: %f' % var_mse_kendall)

    # Max-min in MSE window sizes
    print('mse_pearson_min_max: (%f, %f)' % (np.nanmin(mse_pearson_vec['MSE']), np.nanmax(mse_pearson_vec['MSE'])))
    print('mse_kendall_min_max: (%f, %f)' % (np.nanmin(mse_kendall_vec['MSE']), np.nanmax(mse_kendall_vec['MSE'])))
    """

    ##################################################################################################################
    ###                         Minimum Determinant Pearson and Kendall Moving Window                              ###
    ##################################################################################################################
    # Get information on the minimum determinants over all corrlation estimates for all window sizes [3, 100]
    delta_t = range(3, 101)
    det_min_vec = np.full(101, np.nan)
    proxy_type = 'pearson'
    """
    for dt in delta_t:
        # Load data Pearson/ Kendall
        det_data_vec = np.full(501, np.nan)
        filename = '%s_%i_estimate_uncertainty.pkl' % (proxy_type, dt)
        data = mm.load_data('bivariate_analysis/results_%s/%s' % (proxy_type, filename))
        # Compute determinants for every dataset
        for i, rho in enumerate(data['Rho_estimate']):
            det_data_vec[i+1] = preprocesser.determinant_LU_factorization(rho, 2)
        det_min_vec[dt] = np.nanmin(det_data_vec)
    mm.save_data('bivariate_analysis/determinant_min_%s.pkl' % proxy_type, det_min_vec)
    """
    """
    # Plot minimum determinants of Pearson and Kendal Moving Window estimates of correlation
    det_min_pearson = mm.load_data('bivariate_analysis/determinant_min_pearson.pkl')
    det_min_kendall = mm.load_data('bivariate_analysis/determinant_min_kendall.pkl')
    plt.figure(1)
    plt.plot(det_min_pearson, label='Pearson', linewidth=1, color='orange')
    plt.plot(det_min_kendall, label='Kendall', linewidth=1)
    plt.xlabel('window length')
    plt.ylabel('minimum det($R_t)$')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=2, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 100)
    plt.yticks(np.arange(-0.1, 1.1, 0.1))
    plt.ylim(-0.1, 1)
    plt.show()
    """

    ##################################################################################################################
    ###                                          Dataset creation                                                  ###
    ##################################################################################################################
    # Pearson and Kendall correlation moving window estimates as covariate and true correlation or moving window
    # estimate as proxy for output variable
    simulated_data_process = mm.load_data('/bivariate_analysis/correlated_sim_data.pkl')
    delta_t_min = 5
    delta_t_max = 6
    proxy_type = ['kendall']     # ['pearson', 'emw', 'kendall']
    """
    start_time = time.time()
    for dt, proxy_type in [(x,y) for x in range(delta_t_min, delta_t_max) for y in proxy_type]:
        print('(%i, %s)' % (dt, proxy_type))
        dataset, dataset_proxy = \
            preprocesser.generate_bivariate_dataset(ta, simulated_data_process, dt, proxy_type=proxy_type)
        mm.save_data('/bivariate_analysis/true_cor/%s/data/dataset_%s_%d.pkl' % (proxy_type, proxy_type, dt), dataset)
        mm.save_data('/bivariate_analysis/proxy_cor/%s/data/dataset_%s_%d.pkl' % (proxy_type, proxy_type, dt), dataset_proxy)
    print("%s: %f" % ('Execution time:', (time.time() - start_time)))
    """
    ##################################################################################################################
    ###    Estimation uncertainty in Pearson and Kendall correlation coefficient using machine learner estimates   ###
    ##################################################################################################################
    simulated_data_process = mm.load_data('/bivariate_analysis/correlated_sim_data.pkl')
    T = 500
    rho_true = simulated_data_process.tail(T).iloc[:, -1]
    rho_true.reset_index(drop=True, inplace=True)
    ciw = 99
    reps = 1000
    delta_t = [21]   # dt = {[3, 10], 21, 42, 63, 126, 251}  (and 84 possibly)
    model = ['knn']  # k-nearest neighbour: 'knn', random forest: 'rf'
    proxy_type = ['pearson', 'kendall']
    output_type = ['true', 'proxy']
    n_neighbours = [5]

    """
    for dt, proxy_type, model, k, output_type in [(x, y, z, k, o) for x in delta_t for y in proxy_type
                                     for z in model for k in n_neighbours for o in output_type]:
        start_time = time.time()
        print('(%i, %s, %s, %i)' % (dt, proxy_type, model, k))
        dataset = mm.load_data('bivariate_analysis/%s_cor/%s/data/dataset_mw_%i.pkl' % (output_type, proxy_type, dt))
        rho_estimates, lower_percentiles, upper_percentiles, sd_rho_estimates = \
        preprocesser.bootstrap_learner_estimate(data=dataset, reps=reps, model=model, n_neighbors=k)
        data_frame = pd.DataFrame({'Percentile_low': lower_percentiles, 'Percentile_up': upper_percentiles,
                                   'std rho estimate': sd_rho_estimates, 'Rho_estimate': rho_estimates})
        filename = '%s5_%s_%i_estimate_uncertainty_%s_corr.pkl' % (model, proxy_type, dt, output_type)
        mm.save_data('bivariate_analysis/%s_cor/%s/results_%s_%s_%s_cor/' % (output_type, proxy_type, model, proxy_type,
                                                                             output_type) + filename, data_frame)
        print("%s: %f" % ('Execution time', (time.time() - start_time)))
    """
    """
    # Figure with bootstrap uncertainty Nearest Neighbors
    for dt, proxy_type in [(x, y) for x in delta_t for y in proxy_type]:
        print('(%s, %i)' % (proxy_type, dt))
        data = mm.load_data('bivariate_analysis/proxy_cor/%s/results_knn_%s_proxy_cor/'
                            'knn5_%s_%i_estimate_uncertainty_proxy_corr.pkl' % (proxy_type, proxy_type, proxy_type, dt))
        rho_estimates = data['Rho_estimate']
        lower_percentiles = data['Percentile_low']
        upper_percentiles = data['Percentile_up']
        plt.figure()
        plt.plot(simulated_data_process['rho'], label='true correlation', linewidth=1, color='black')
        plt.plot(rho_estimates, label='KNN correlation', linewidth=1, color='red')
        plt.plot((upper_percentiles - lower_percentiles) - 1, label='%d%% interval (bootstrap)' % ciw,
                 linewidth=1, color='magenta')
        plt.xlabel('observation')
        plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True,
                   edgecolor='black')
        plt.xlim(0, T)
        plt.yticks(np.arange(-1, 1.00000001, 0.2))
        plt.ylim(-1, 1)
        plt.show()
    """
    """
    # Figure with bootstrap uncertainty Random Forest
    for proxy_type, output_type in [(x, y) for x in proxy_type for y in output_type]:
        filename = 'rf10_%s_21_estimate_uncertainty_rep_1000_%s_corr.pkl' % (proxy_type, output_type)
        print(filename)
        data = mm.load_data('bivariate_analysis/%s_cor/%s/results_rf_%s_%s_cor/%s' % (output_type, proxy_type,
                                                                                      proxy_type, output_type, filename))
        rho_estimates = data['Rho_estimate']
        lower_percentiles = data['Percentile_low']
        upper_percentiles = data['Percentile_up']
        plt.figure(1)
        plt.plot(simulated_data_process['rho'], label='true correlation', linewidth=1, color='black')
        plt.plot(rho_estimates, label='RF correlation', linewidth=1, color='red')
        plt.plot((upper_percentiles - lower_percentiles) - 1, label='%d%% interval (bootstrap)' % ciw,
                 linewidth=1, color='magenta')
        #plt.plot(lower_percentiles, label='%d%% interval (bootstrap)' % ciw, linewidth=1, color='magenta')
        #plt.plot(upper_percentiles, label="", linewidth=1, color='magenta')
        plt.xlabel('observation')
        plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True,
                   edgecolor='black')
        plt.xlim(0, T)
        plt.yticks(np.arange(-1, 1.1, 0.2))
        plt.ylim(-1, 1)
        plt.show()
     """

    ##################################################################################################################
    ###        Mean squared error of Pearson/Kendall correlation coefficient using machine learner estimates       ###
    ##################################################################################################################

    simulated_data_process = mm.load_data('/bivariate_analysis/correlated_sim_data.pkl')
    T = 500
    rho_true = simulated_data_process.tail(T).iloc[:, -1]
    rho_true.reset_index(drop=True, inplace=True)
    ciw = 99
    reps = 1000
    delta_t = [10]   #  range(3, 101)   # dt = {[3, 10], 21, 42, 63, 126, 251}  (and 84 possibly)
    model = ['rf']  # k-nearest neighbour: 'knn', random forest: 'rf'
    proxy_type = ['pearson']
    output_type = ['true']
    n_neighbour = [10, 100, 300, 600, 1000]  # 5, 10, 25, 50, 100, len_train, IDW
    rho_bias_squared = np.full(1001, np.nan)
    rho_var_vec = np.full(1001, np.nan)
    rho_mse_vec = np.full(1001, np.nan)

    """
    # Create dataframe with (interpolated) mse results, squared bias, variance for varying window lengths
    for model, n_neighbour, proxy_type, dt, output_type in [(w, k, x, y, z) for w in model for k in n_neighbour for
                                                            x in proxy_type for y in delta_t for z in output_type]:
        filename = '%s%i_%s_%i_estimate_uncertainty_rep_100_%s_corr.pkl' % (model, n_neighbour, proxy_type, dt, output_type)
        print(filename)
        data = mm.load_data('bivariate_analysis/%s_cor/%s/results_%s_%s_%s_cor/' % (output_type, proxy_type, model,
                                                                                    proxy_type, output_type) + filename)
        rho_estimates = data['Rho_estimate']
        rho_bias_squared[n_neighbour] = np.mean(np.power(rho_estimates-rho_true, 2))
        rho_var_vec[n_neighbour] = np.power(np.mean(data['std rho estimate']), 2)

    rho_mse_vec = np.array([np.sum(pair) for pair in zip(rho_bias_squared, rho_var_vec)])
    data_frame = pd.DataFrame({'bias_squared': rho_bias_squared, 'variance': rho_var_vec,
                               'MSE': rho_mse_vec})
    filename_save = 'mse_%s_%s_%s_cor_sensitivity_analysis_trees.pkl' % (model, proxy_type, output_type)
    print(filename_save)
    mm.save_data('bivariate_analysis/%s_cor/mse_results_%s_cor/' % (output_type, output_type) + filename_save, data_frame)
    """



    ## Load MSE data Pearson/ Kendall
    mse_pearson_vec = mm.load_data('bivariate_analysis/mse_pearson.pkl')
    mse_kendall_vec = mm.load_data('bivariate_analysis/mse_kendall.pkl')

    ## Load MSE data KNN
    # True Correlation
    mse_knn5_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn5_pearson_true_cor.pkl')
    mse_knn10_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn10_pearson_true_cor.pkl')
    mse_knn25_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn25_pearson_true_cor.pkl')
    mse_knn50_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn50_pearson_true_cor.pkl')
    mse_knn100_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn100_pearson_true_cor.pkl')
    mse_knn_len_train_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn_len_train_pearson_true_cor.pkl')
    mse_knn_IDW_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn_IDW_pearson_true_cor.pkl')

    mse_knn5_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn5_kendall_true_cor.pkl')
    mse_knn10_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn10_kendall_true_cor.pkl')
    mse_knn25_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn25_kendall_true_cor.pkl')
    mse_knn50_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn50_kendall_true_cor.pkl')
    mse_knn100_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn100_kendall_true_cor.pkl')
    mse_knn_len_train_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn_len_train_kendall_true_cor.pkl')
    mse_knn_IDW_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn_IDW_kendall_true_cor.pkl')

    # Proxy Correlation
    mse_knn5_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn5_pearson_proxy_cor.pkl')
    mse_knn10_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn10_pearson_proxy_cor.pkl')
    mse_knn25_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn25_pearson_proxy_cor.pkl')
    mse_knn50_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn50_pearson_proxy_cor.pkl')
    mse_knn100_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn100_pearson_proxy_cor.pkl')

    mse_knn_len_train_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn_len_train_pearson_proxy_cor.pkl')
    mse_knn_IDW_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn_IDW_pearson_proxy_cor.pkl')

    mse_knn5_kendall_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn5_kendall_proxy_cor.pkl')
    mse_knn_len_train_kendall_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn_len_train_kendall_proxy_cor.pkl')
    mse_knn_IDW_kendall_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn_IDW_kendall_proxy_cor.pkl')

    ## Load MSE data RF
    # True Correlation
    mse_rf10_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf10_pearson_true_cor.pkl')
    mse_rf100_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf100_pearson_true_cor.pkl')
    mse_rf300_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf300_pearson_true_cor.pkl')
    mse_rf1000_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf1000_pearson_true_cor.pkl')


    mse_rf10_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf10_kendall_true_cor.pkl')
    mse_rf100_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf100_kendall_true_cor.pkl')
    mse_rf300_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf300_kendall_true_cor.pkl')
    mse_rf1000_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf1000_kendall_true_cor.pkl')

    # Proxy Correlation
    mse_rf10_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_rf10_pearson_proxy_cor.pkl')

    mse_rf10_kendall_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_rf10_kendall_proxy_cor.pkl')



    # Figure without interpolation MSE
    """
    plt.figure(1)
    plt.plot(mse_pearson_vec['MSE'], label='Pearson', color='indigo', linewidth=1)
    #plt.plot(mse_kendall_vec['MSE'], label='Kendall', color='cyan', linestyle='--', linewidth=1)
    plt.plot(mse_knn5_pearson_proxy['MSE'], label='KNN(5)-Pearson', linewidth=1, color='brown')
    #plt.plot(mse_knn5_kendall_proxy['MSE'], label='KNN(5)-Kendall', linewidth=1, color='xkcd:azure')
    #plt.plot(mse_knn10_pearson_proxy['MSE'], label='KNN(10)', linewidth=1)
    #plt.plot(mse_knn25_pearson_proxy['MSE'], label='KNN(25)', linewidth=1)
    #plt.plot(mse_knn50_pearson_proxy['MSE'], label='KNN(50)', linewidth=1)
    plt.plot(mse_knn100_pearson_proxy['MSE'], label='KNN(100)', linewidth=1)
    plt.plot(mse_knn_IDW_pearson_proxy['MSE'], label='KNN(idw)-Pearson', color='black', linewidth=1)
    plt.plot(mse_rf10_pearson_proxy['MSE'], label='RF(10)', linewidth=1)
    #plt.plot(mse_knn_IDW_kendall_true['MSE'], label='KNN_kendall_idw', linewidth=1, color='xkcd:azure')
    #plt.plot(mse_knn_len_train_pearson_true['MSE'], label='KNN_pearson_len_train', linewidth=1)
    #plt.plot(mse_knn_len_train_pearson_proxy['MSE'], label='KNN_pearson_len_train', color='black', linewidth=1)
    #plt.plot(mse_knn_IDW_pearson_proxy['MSE'], label='KNN_pearson_IDW', color='black', linewidth=1)
    plt.xlabel('window length')
    plt.ylabel('MSE')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=7, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 100)
    plt.yticks(np.arange(0, 0.61, 0.1))
    plt.ylim(0, 0.60)
    plt.show()
    """
    # Figure without interpolation MSE decomposition
    """
    plt.figure(2)
    plt.plot(mse_knn_IDW_kendall_true['bias_squared'], label='Squared Bias', color='blue', linewidth=1)
    plt.plot(mse_knn_IDW_kendall_true['variance'], label='Variance', color='red', linewidth=1)
    plt.plot(mse_knn_IDW_kendall_true['MSE'], label='MSE', color='black', linestyle='--', linewidth=1)
    plt.xlabel('window length')
    plt.ylabel('MSE')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 100)
    plt.yticks(np.arange(0, 0.31, 0.02))
    plt.ylim(0, 0.2)
    plt.show()

    """
    # Figure with interpolation MSE decomposition sensitivity analysis
    """
    mse_knn_pearson_true_cor_sa = preprocesser.mse_knn_sensitivity_analysis()
    mm.save_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn_pearson_true_cor_sensitivity_analysis.pkl',
                 mse_knn_pearson_true_cor_sa)
    mse_knn_kendall_true_cor_sa = preprocesser.mse_knn_sensitivity_analysis(proxy_type='kendall')
    mm.save_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn_kendall_true_cor_sensitivity_analysis.pkl',
                 mse_knn_kendall_true_cor_sa)
    """
    """
    mse_knn_pearson_proxy_cor_sa = preprocesser.mse_knn_sensitivity_analysis(output_type='proxy')
    mm.save_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn_pearson_proxy_cor_sensitivity_analysis.pkl',
                 mse_knn_pearson_proxy_cor_sa)
    mse_knn_kendall_proxy_cor_sa = preprocesser.mse_knn_sensitivity_analysis(proxy_type='kendall', output_type='proxy')
    mm.save_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn_kendall_proxy_cor_sensitivity_analysis.pkl',
                 mse_knn_kendall_proxy_cor_sa)

    """
    """
    plt.figure(3)
    xs = np.arange(1001)
    s1mask = np.isfinite(mse_knn_pearson_proxy_cor_sa['bias_squared'])
    s2mask = np.isfinite(mse_knn_pearson_proxy_cor_sa['variance'])
    s3mask = np.isfinite(mse_knn_pearson_proxy_cor_sa['MSE'])
    plt.plot(xs[s1mask], mse_knn_pearson_proxy_cor_sa['bias_squared'][s1mask], label='Squared Bias', color='blue', linestyle='-', linewidth=1, marker='.')
    plt.plot(xs[s2mask], mse_knn_pearson_proxy_cor_sa['variance'][s2mask], label='Variance', color='red', linestyle='-', linewidth=1, marker='.')
    plt.plot(xs[s3mask], mse_knn_pearson_proxy_cor_sa['MSE'][s3mask], label='MSE', color='black', linestyle='--', linewidth=1, marker='.')

    plt.xlabel('number of neighbours')
    plt.ylabel('MSE')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True,
                  edgecolor='black')
    plt.xlim(0, 100)
    plt.xticks([5, 10, 25, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000])
    plt.yticks(np.arange(0, 0.21, 0.02))
    plt.ylim(0, 0.2)
    plt.show()
    """
    """
    # Variance in MSE window sizes for KNN with Pearson/ Kendall as covariates.
    # True Correlation
    #var_mse_knn5_pearson_true = np.nanvar(mse_knn5_pearson_true['MSE']); print('mse_knn5_pearson_var: %.8f' % var_mse_knn5_pearson_true)
    #var_mse_knn5_kendall_true = np.nanvar(mse_knn5_kendall_true['MSE']); print('mse_knn5_kendall_var: %.8f' % var_mse_knn5_kendall_true)
    #var_mse_knn_len_train_pearson_true = np.nanvar(mse_knn_len_train_pearson_true['MSE']); print('mse_knn_len_train_pearson_var: %.13f' % var_mse_knn_len_train_pearson_true)
    #var_mse_knn_IDW_pearson_true = np.nanvar(mse_knn_IDW_pearson_true['MSE']); print('mse_knn_IDW_pearson_var: %.9f' % var_mse_knn_IDW_pearson_true)
    #var_mse_knn_len_train_kendall_true = np.nanvar(mse_knn_len_train_kendall_true['MSE']); print('mse_knn_len_train_pearson_var: %f' % var_mse_knn_len_train_kendall_true)
    #var_mse_knn_IDW_kendall_true = np.nanvar(mse_knn_IDW_kendall_true['MSE']); print('mse_knn_IDW_pearson_var: %f' % var_mse_knn_IDW_kendall_true)
    # Proxy Correlation
    #var_mse_knn5_pearson_proxy = np.nanvar(mse_knn5_pearson_proxy['MSE']); print('mse_knn5_pearson_proxy_var: %.6f' % var_mse_knn5_pearson_proxy)
    #var_mse_knn5_kendall_proxy = np.nanvar(mse_knn5_kendall_proxy['MSE']); print('mse_knn5_kendall_proxy_var: %.6f' % var_mse_knn5_kendall_proxy)
    #var_mse_knn_len_train_pearson_proxy = np.nanvar(mse_knn_len_train_pearson_proxy['MSE']); print('mse_knn_len_train_pearson_proxy_var: %.8f' % var_mse_knn_len_train_pearson_proxy)
    #var_mse_knn_len_train_kendall_proxy = np.nanvar(mse_knn_len_train_kendall_proxy['MSE']); print('mse_knn_len_train_kendall_proxy_var: %.9f' % var_mse_knn_len_train_kendall_proxy)
    #var_mse_knn_IDW_pearson_proxy = np.nanvar(mse_knn_IDW_pearson_proxy['MSE']); print('mse_knn_IDW_pearson_proxy_var: %.8f' % var_mse_knn_IDW_pearson_proxy)
    #var_mse_knn_IDW_kendall_proxy = np.nanvar(mse_knn_IDW_kendall_proxy['MSE']); print('mse_knn_IDW_kendall_proxy_var: %.8f' % var_mse_knn_IDW_kendall_proxy)

    # Max-min in MSE window sizes for KNN with Pearson/ Kendall as covariates.
    # True Correlation
    #print('mse_knn5_pearson_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn5_pearson_true['MSE']), np.nanmax(mse_knn5_pearson_true['MSE'])))
    #print('mse_knn5_kendall_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn5_kendall_true['MSE']), np.nanmax(mse_knn5_kendall_true['MSE'])))
    #print('mse_knn_len_train_pearson_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn_len_train_pearson_true['MSE']), np.nanmax(mse_knn_len_train_pearson_true['MSE'])))
    #print('mse_knn_IDW_pearson_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn_IDW_pearson_true['MSE']), np.nanmax(mse_knn_IDW_pearson_true['MSE'])))
    #print('mse_knn_len_train_kendall_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn_len_train_kendall_true['MSE']), np.nanmax(mse_knn_len_train_kendall_true['MSE'])))
    #print('mse_knn_IDW_kendall_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn_IDW_kendall_true['MSE']), np.nanmax(mse_knn_IDW_kendall_true['MSE'])))
    # Proxy Correlation
    #print('mse_knn5_pearson_proxy_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn5_pearson_proxy['MSE']), np.nanmax(mse_knn5_pearson_proxy['MSE'])))
    #print('mse_knn5_kendall_proxy_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn5_kendall_proxy['MSE']), np.nanmax(mse_knn5_kendall_proxy['MSE'])))
    #print('mse_knn_len_train_pearson_proxy_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn_len_train_pearson_proxy['MSE']), np.nanmax(mse_knn_len_train_pearson_proxy['MSE'])))
    #print('mse_knn_len_train_kendall_proxy_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn_len_train_kendall_proxy['MSE']), np.nanmax(mse_knn_len_train_kendall_proxy['MSE'])))
    #print('mse_knn_IDW_pearson_proxy_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn_IDW_pearson_proxy['MSE']), np.nanmax(mse_knn_IDW_pearson_proxy['MSE'])))
    #print('mse_knn_IDW_kendall_proxy_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn_IDW_kendall_proxy['MSE']), np.nanmax(mse_knn_IDW_kendall_proxy['MSE'])))
    """
    """
    # Variance in MSE window sizes for RF with Pearson/ Kendall as covariates.
    # True Correlation
    #var_mse_rf10_pearson_true = np.nanvar(mse_rf10_pearson_true['MSE']); print('var_mse_rf10_pearson_true: %.8f' % var_mse_rf10_pearson_true)
    #var_mse_rf10_kendall_true = np.nanvar(mse_rf10_kendall_true['MSE']); print('var_mse_rf10_kendall_true: %.8f' % var_mse_rf10_kendall_true)
    # Proxy Correlation
    var_mse_rf10_pearson_proxy = np.nanvar(mse_rf10_pearson_proxy['MSE']); print('var_mse_rf10_pearson_proxy: %.6f' % var_mse_rf10_pearson_proxy)
    var_mse_rf10_kendall_proxy = np.nanvar(mse_rf10_kendall_proxy['MSE']); print('var_mse_rf10_kendall_proxy: %.6f' % var_mse_rf10_kendall_proxy)

    # Max-min in MSE window sizes for RF with Pearson/ Kendall as covariates.
    # True Correlation
    #print('mse_rf10_pearson_min_max: (%.4f, %.4f)' % (np.nanmin(mse_rf10_pearson_true['MSE']), np.nanmax(mse_rf10_pearson_true['MSE'])))
    #print('mse_rf10_kendall_min_max: (%.4f, %.4f)' % (np.nanmin(mse_rf10_kendall_true['MSE']), np.nanmax(mse_rf10_kendall_true['MSE'])))
    
    # Proxy Correlation
    print('mse_rf10_pearson_proxy_min_max: (%.4f, %.4f)' % (np.nanmin(mse_rf10_pearson_proxy['MSE']), np.nanmax(mse_rf10_pearson_proxy['MSE'])))
    print('mse_rf10_kendall_proxy_min_max: (%.4f, %.4f)' % (np.nanmin(mse_rf10_kendall_proxy['MSE']), np.nanmax(mse_rf10_kendall_proxy['MSE'])))
    """
    """
    # Figure without interpolation MSE
    plt.figure(4)
    plt.plot(mse_knn10_pearson_proxy['MSE'], label='KNN(10)-Pearson', linewidth=1)
    plt.plot(mse_pearson_vec['MSE'], label='Pearson', color='indigo', linewidth=1)
    #plt.plot(mse_kendall_vec['MSE'], label='Kendall', color='cyan', linestyle='--', linewidth=1)
    plt.plot(mse_knn_IDW_pearson_proxy['MSE'], label='KNN(idw)-Pearson', color='black', linewidth=1)
    plt.plot(mse_knn100_pearson_proxy['MSE'], label='KNN(100)-Pearson', color='red', linewidth=1)
    plt.plot(mse_rf10_pearson_proxy['MSE'], label='RF(10)-Pearson', color='goldenrod', linewidth=1)
    #plt.plot(mse_rf10_kendall_proxy['MSE'], label='RF(10)-Kendall', color='xkcd:teal', linewidth=1)
    plt.xlabel('window length')
    plt.ylabel('MSE')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.13), ncol=3, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 100)
    plt.yticks(np.arange(0, 0.61, 0.1))
    plt.ylim(0, 0.6)
    plt.show()
    """

    # Figure without interpolation MSE decomposition
    """
    mse_dt_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_dt_pearson_true_cor.pkl')
    mse_rf10_2_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf10_2_pearson_true_cor.pkl')
    mse_rf10_3_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf10_3_pearson_true_cor.pkl')
    
    plt.figure(5)
    plt.plot(mse_rf10_kendall_proxy['bias_squared'], label='Squared Bias', color='blue', linewidth=1)
    plt.plot(mse_rf10_kendall_proxy['variance'], label='Variance', color='red', linewidth=1)
    plt.plot(mse_rf10_kendall_proxy['MSE'], label='MSE', color='black', linestyle='--', linewidth=1)
    #plt.plot(mse_dt_pearson_true, label='dt_squared_bias', linewidth=1)
    plt.xlabel('window length')
    plt.ylabel('MSE')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 100)
    plt.yticks(np.arange(0, 0.61, 0.02))
    plt.ylim(0, 0.3)
    plt.show()
    """
    """
    # Figure with interpolation MSE decomposition sensitivity analysis number of covariates
    mse_rf_pearson_true_cor_sa = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf300_1_to_3_pearson_true_cor.pkl')
    plt.figure(3)
    xs = np.arange(4)
    s1mask = np.isfinite(mse_rf_pearson_true_cor_sa['bias_squared'])
    s2mask = np.isfinite(mse_rf_pearson_true_cor_sa['variance'])
    s3mask = np.isfinite(mse_rf_pearson_true_cor_sa['MSE'])
    plt.plot(xs[s1mask], mse_rf_pearson_true_cor_sa['bias_squared'][s1mask], label='Squared Bias', color='blue', linestyle='-', linewidth=1, marker='.')
    plt.plot(xs[s2mask], mse_rf_pearson_true_cor_sa['variance'][s2mask], label='Variance', color='red', linestyle='-', linewidth=1, marker='.')
    plt.plot(xs[s3mask], mse_rf_pearson_true_cor_sa['MSE'][s3mask], label='MSE', color='black', linestyle='--', linewidth=1, marker='.')

    plt.xlabel('number of covariates')
    plt.ylabel('MSE')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True,
                  edgecolor='black')
    plt.xlim(0, 3)
    plt.xticks([0, 1, 2, 3])
    plt.yticks(np.arange(0, 0.21, 0.02))
    plt.ylim(0, 0.2)
    plt.show()
    """
    """
    # Figure with interpolation MSE decomposition sensitivity analysis number of trees
    mse_rf_pearson_true_cor_sa_trees = preprocesser.mse_rf_sensitivity_analysis(rho_true=rho_true)
    mse_rf_kendall_true_cor_sa_trees = preprocesser.mse_rf_sensitivity_analysis(
        rho_true=rho_true, proxy_type='kendall', output_type='true', type='trees')

    mse_rf_pearson_proxy_cor_sa_trees = preprocesser.mse_rf_sensitivity_analysis(rho_true=rho_true, output_type='proxy')
    mse_rf_kendall_proxy_cor_sa_trees = preprocesser.mse_rf_sensitivity_analysis(
        rho_true=rho_true, proxy_type='kendall', output_type='proxy', type='trees')
    plt.figure(4)
    xs = np.arange(1001)
    s1mask = np.isfinite(mse_rf_kendall_true_cor_sa_trees['bias_squared'])
    s2mask = np.isfinite(mse_rf_kendall_true_cor_sa_trees['variance'])
    s3mask = np.isfinite(mse_rf_kendall_true_cor_sa_trees['MSE'])
    plt.plot(xs[s1mask], mse_rf_kendall_true_cor_sa_trees['bias_squared'][s1mask], label='Squared Bias', color='blue',
             linestyle='-', linewidth=1, marker='.')
    plt.plot(xs[s2mask], mse_rf_pearson_true_cor_sa_trees['variance'][s2mask], label='Variance', color='red', linestyle='-',
             linewidth=1, marker='.')
    plt.plot(xs[s3mask], mse_rf_pearson_true_cor_sa_trees['MSE'][s3mask], label='MSE', color='black', linestyle='--',
             linewidth=1, marker='.')
    plt.xlabel('number of estimators')
    plt.ylabel('MSE')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 1000)
    plt.xticks([10, 100, 300, 600, 1000])
    plt.yticks(np.arange(0, 0.21, 0.02))
    plt.ylim(0, 0.2)
    plt.show()
    """
    ##################################################################################################################
    ###                                   Minimum Determinant Learning Algorithms                                       ###
    ##################################################################################################################
    # Rho_estimate
    # Get information on the minimum determinants over all correlation estimates for all window sizes [3, 100]
    delta_t = range(3, 101)
    det_min_vec = np.full(101, np.nan)
    proxy_type = 'pearson'
    output_type = 'true'
    learner = 'rf'

    """                    
    for dt in delta_t:
        # Load data Pearson/ Kendall
        det_data_vec = np.full(501, np.nan)
        filename = '%s10_%s_%i_estimate_uncertainty_rep_1000_%s_corr.pkl' % (learner, proxy_type, dt, output_type)
        print(filename)
        data = mm.load_data('bivariate_analysis/%s_cor/%s/results_%s_%s_%s_cor/%s'
                            % (output_type, proxy_type, learner, proxy_type, output_type, filename))
        # Compute determinants for every dataset
        for i, rho in enumerate(data['Rho_estimate']):
            det_data_vec[i+1] = preprocesser.determinant_LU_factorization(rho, 2)
        det_min_vec[dt] = np.nanmin(det_data_vec)
    filename_save = 'determinant_min_%s10_%s_%s_cor.pkl' % (learner, proxy_type, output_type)
    mm.save_data('bivariate_analysis/%s_cor/det_results_%s_cor/%s' % (output_type, output_type, filename_save), det_min_vec)
    """

    # Plot minimum determinants of KNN estimates of correlation
    # True Cor
    det_min_knn5_pearson = mm.load_data('bivariate_analysis/true_cor/det_results_true_cor/determinant_min_knn5_pearson_true_cor.pkl')
    det_min_knn5_kendall = mm.load_data('bivariate_analysis/true_cor/det_results_true_cor/determinant_min_knn5_kendall_true_cor.pkl')
    det_min_knn_len_train_pearson = mm.load_data('bivariate_analysis/true_cor/det_results_true_cor/determinant_min_knn_len_train_pearson_true_cor.pkl')
    det_min_knn_len_train_kendall = mm.load_data('bivariate_analysis/true_cor/det_results_true_cor/determinant_min_knn_len_train_kendall_true_cor.pkl')
    det_min_knn_IDW_pearson = mm.load_data('bivariate_analysis/true_cor/det_results_true_cor/determinant_min_knn_IDW_pearson_true_cor.pkl')
    det_min_knn_IDW_kendall = mm.load_data('bivariate_analysis/true_cor/det_results_true_cor/determinant_min_knn_IDW_kendall_true_cor.pkl')
    # Proxy Cor
    det_min_knn5_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/det_results_proxy_cor/determinant_min_knn5_pearson_proxy_cor.pkl')
    det_min_knn5_kendall_proxy = mm.load_data('bivariate_analysis/proxy_cor/det_results_proxy_cor/determinant_min_knn5_kendall_proxy_cor.pkl')
    det_min_knn_len_train_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/det_results_proxy_cor/determinant_min_knn_len_train_pearson_proxy_cor.pkl')
    det_min_knn_len_train_kendall_proxy = mm.load_data('bivariate_analysis/proxy_cor/det_results_proxy_cor/determinant_min_knn_len_train_kendall_proxy_cor.pkl')
    det_min_knn_IDW_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/det_results_proxy_cor/determinant_min_knn_IDW_pearson_proxy_cor.pkl')
    det_min_knn_IDW_kendall_proxy = mm.load_data('bivariate_analysis/proxy_cor/det_results_proxy_cor/determinant_min_knn_IDW_kendall_proxy_cor.pkl')

    """
    plt.figure(1)
    plt.plot(det_min_knn_IDW_pearson_proxy, label='KNN(idw)-Pearson', linewidth=1, color='orange')
    plt.plot(det_min_knn_IDW_kendall_proxy, label='KNN(idw)-Kendall', linewidth=1)
    plt.plot(det_min_knn_len_train_pearson_proxy, label='KNN(unif)-Pearson', linewidth=1)
    plt.plot(det_min_knn_len_train_kendall_proxy, label='KNN(unif)-Kendall', linewidth=1)
    plt.xlabel('window length')
    plt.ylabel('minimum det($R_t)$')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=2, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 100)
    plt.yticks(np.arange(-0.1, 1.1, 0.1))
    plt.ylim(-0.1, 1)
    plt.show()
    """
    """