예제 #1
0
    def __init__(self, red, key, user):
        self.key = key
        self.red = red

        data = json.loads(user)
        self.product_id = data.get('product_id')
        self.url = data.get('url')
        self.email = data.get('email')
        self.guid = data.get('guid')
        self.spider_name = 'tb_comment'
        self.spargs = data

        self.sql = SqlHelper()
        self.spargs['red'] = self.red
        self.spargs['sql'] = self.sql

        if not os.path.exists('log'):
            os.makedirs('log')

        configure_logging(install_root_handler = False)
        logging.basicConfig(
                filename = 'log/%s.log' % self.product_id,
                format = '%(levelname)s %(asctime)s: %(message)s',
                level = logging.DEBUG
        )
예제 #2
0
    def __init__(self, *a , **kw):
        super(RecipeDetail, self).__init__(*a, **kw)

        self.dir_name = 'log/%s' % self.name
        self.sql = SqlHelper()
        self.init()
        utils.make_dir(self.dir_name)
예제 #3
0
    def GET(self):
        try:
            sql = SqlHelper()

            inputs = web.input()
            name = inputs.get('name')

            proxy = Proxy()
            proxy.set_value(
                ip=inputs.get('ip'),
                port=inputs.get('port'),
                country=inputs.get('country', None),
                anonymity=inputs.get('anonymity', None),
                https=inputs.get('https', 'no'),
                speed=inputs.get('speed', -1),
                source=inputs.get('source', name),
            )

            utils.sql_insert_proxy(sql, name, proxy)

            command = "SELECT ip FROM {0} WHERE ip={1} AND port={2}".format(
                name, inputs.get('ip'), inputs.get('port'))
            res = sql.query_one(command)
            return res is None
        except:
            pass

        return False
예제 #4
0
    def __init__(self, *a, **kw):
        super(GameUrls, self).__init__(*a, **kw)

        self.dir_game = 'log/%s' % self.name
        self.sql = SqlHelper()
        self.init()

        utils.make_dir(self.dir_game)
예제 #5
0
    def __init__(self, *a, **kw):
        super(BaseSpider, self).__init__(*a, **kw)

        self.urls = []
        self.headers = {}
        self.timeout = 10

        self.sql = SqlHelper()

        self.dir_log = 'log/proxy/%s' % self.name
예제 #6
0
    def __init__(self, *a, **kw):
        super(GameInfo, self).__init__(*a, **kw)

        self.dir_game = 'log/%s' % self.name
        self.sql = SqlHelper()
        self.init()

        utils.make_dir(self.dir_game)

        self.error_count = 0
예제 #7
0
    def __init__(self, name=None, **kwargs):
        super(Validator, self).__init__(name, **kwargs)
        self.sql = SqlHelper()

        self.dir_log = 'log/validator/%s' % self.name
        self.timeout = 10

        self.urls = []
        self.headers = None
        self.success_mark = ''
예제 #8
0
def randitem(spargs):
    guid = spargs.get('guid', 0)
    utils.push_redis(guid, 0, '正在随机产生商品链接', save_to_mysql=False)

    url = 'https://diviner.taobao.com/diviner?p=610009&callback=jsonpCallbackMoreGood&lid=1&uuid=122270672' \
          '.1492415671516609876050.1492415672.1492415672.1492415672.1&pin=&lim=100&ec=utf-8&_=1492415813682'
    headers = {
        'Host':
        'diviner.taobao.com',
        'Referer':
        'https://www.taobao.com/',
        'User-Agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:52.0) Gecko/20100101 Firefox/52.0'
    }
    cookies = {
        '__jda':
        '122270672.1492415671516609876050.1492415672.1492415672.1492415672.1',
        '__jdb': '122270672.1.1492415671516609876050|1.1492415672',
        '__jdc': '122270672',
        '__jdv': '122270672|direct|-|none|-|1492415671524',
        '__jdu': '1492415671516609876050',
    }

    r = requests.get(url=url, headers=headers, cookies=cookies, timeout=20)
    pattern = re.compile('"sku":(\d+),', re.S)
    ids = re.findall(pattern, r.text)
    id = random.choice(ids)

    url = 'https://item.taobao.com/%s.html' % str(id)
    utils.push_redis(guid,
                     0,
                     '生成商品链接:<a href="%s" target="_blank">%s' % (url, url),
                     save_to_mysql=False)

    sql = SqlHelper()
    command = "SELECT id FROM {table} WHERE id={product_id}". \
        format(table = config.tb_item_table, product_id = id)
    result = sql.query_one(command)

    # 如果数据库中没有,则重新抓取
    if result == None:
        cmd = 'cd {dir};python manage.py real_time_analysis -a name={name} -a guid={guid} ' \
              '-a product_id={product_id} -a url={url};'. \
            format(url = str(url), name = 'tb', dir = settings.BASE_DIR, guid = guid,
                   product_id = id)
        subprocess.Popen(cmd, shell=True)
    else:
        # 如果数据库中存在则,直接读取数据库中数据
        command = "SELECT * FROM {0} WHERE product_id={1} ORDER BY id". \
            format(config.analysis_item_table, id)
        result = sql.query(command)
        for res in result:
            utils.push_redis(guid, res[1], res[2], res[3], save_to_mysql=False)
예제 #9
0
 def __init__(self):
     super(Crawler, self).__init__()
     self.album_prefix = 'https://mm.taobao.com/self/album/open_album_list.htm?_charset=utf-8&user_id%20={0}&page={1}'
     self.image_prefix = 'https://mm.taobao.com/album/json/get_album_photo_list.htm?user_id={0}&album_id={1}&page={2}'
     self.image_pattern = re.compile('''img.*290x10000.jpg''', re.U)
     self.image_name_pattern = re.compile('''"picId":"(.*?)"''', re.U)
     self.model_pattern = re.compile(
         '''<a class="lady-name" href="(.*?)".*>(.*?)</a>''', re.U)
     self.album_pattern = re.compile('''.*album_id=(.*?)&.*''', re.U)
     self.links = []
     self.ids = []
     self.names = []
     self.sql = SqlHelper()
예제 #10
0
    def GET(self):
        try:
            sql = SqlHelper()
            inputs = web.input()
            name = inputs.get('name')
            ip = inputs.get('ip')
            command = "DELETE FROM {0} WHERE ip=\'{1}\'".format(name, ip)
            sql.execute(command)

            command = "SELECT ip FROM {0} WHERE ip=\'{1}\'".format(name, ip)
            res = sql.query_one(command)
            return res is None
        except:
            pass
        return False
예제 #11
0
    def __init__(self, name=None, **kwargs):
        super(JDSpider, self).__init__(name, **kwargs)
        self.product_id = kwargs.get('product_id', -1)
        self.log('product_id:%s' % self.product_id)
        self.item_table = 'item_%s' % self.product_id
        self.product_page = '%s_page' % self.product_id

        self.log_dir = 'log/%s' % self.product_id
        self.is_record_page = False
        if self.is_record_page:
            utils.make_dir(self.log_dir)

        self.sql = SqlHelper()
        self.red = redis.StrictRedis(host=config.redis_host,
                                     port=config.redis_part,
                                     db=config.redis_db,
                                     password=config.redis_pass)
예제 #12
0
    def handle(self, *args, **options):
        reload(sys)
        sys.setdefaultencoding('utf-8')
        os.chdir(sys.path[0])

        spargs = utils.arglist_to_dict(options['spargs'])

        if not os.path.exists('log'):
            os.makedirs('log')

        configure_logging(install_root_handler=False)
        logging.basicConfig(filename='log/%s.log' % spargs.get('user_id'),
                            format='%(levelname)s %(asctime)s: %(message)s',
                            level=logging.ERROR)

        guid = spargs.get('guid', '0')
        user_id = spargs.get('user_id', '0')

        logging.warn('user_id')
        if guid == '0' or user_id == '0':
            utils.log('分析数据传入参数不对,接收到的参数为: spargs:%s' % spargs)
            utils.push_redis(guid=guid,
                             user_id=user_id,
                             info='分析数据传入参数不对,接收到的参数为:%s' % spargs)
            utils.push_redis(guid=guid, user_id=user_id, info='finish')
            return

        utils.log('开始分析:%s' % spargs)
        sql = SqlHelper()
        red = redis.StrictRedis(host=config.redis_host,
                                port=config.redis_part,
                                db=config.redis_db,
                                password=config.redis_pass)
        spargs['sql'] = sql
        spargs['red'] = red

        # 运行爬虫
        logging.warn(spargs)
        runspider(spargs)

        # 开启分析
        logging.warn(spargs)
        analysis = RealTimeAnalysis(**spargs)
        analysis.run()
예제 #13
0
    def GET(self):
        try:
            sql = SqlHelper()

            inputs = web.input()
            name = inputs.get('name')
            command = "SELECT * FROM {0}".format(name)
            result = sql.query(command)
            data = [{
                'ip': item[1],
                'port': item[2],
                'speed': item[6]
            } for item in result]
            data = json.dumps(data, indent=4)
            return data
        except:
            pass

        return []
예제 #14
0
    def GET(self):
        try:
            sql = SqlHelper()
            inputs = web.input()
            name = inputs.get('name')
            anonymity = inputs.get('anonymity', None)
            https = inputs.get('https', None)
            order = inputs.get('order', 'speed')
            sort = inputs.get('sort', 'asc')
            count = inputs.get('count', 100)

            command = ''
            if anonymity is None and https is None:
                command = "SELECT * FROM {name} ORDER BY {order} {sort} LIMIT {count}". \
                    format(name = name, order = order, sort = sort, count = count)
            elif anonymity is not None and https is None:
                command = "SELECT * FROM {name} WHERE anonymity=\'{anonymity}\' ORDER BY {order} {sort} " \
                          "LIMIT {count}". \
                    format(name = name, anonymity = anonymity, order = order, sort = sort, count = count)
            elif anonymity is None and https is not None:
                command = "SELECT * FROM {name} WHERE https=\'{https}\' ORDER BY {order} {sort} LIMIT {count}". \
                    format(name = name, https = https, order = order, sort = sort, count = count)
            elif anonymity is not None and https is not None:
                command = "SELECT * FROM {name} WHERE anonymity=\'{anonymity}\' AND https=\'{https}\' ORDER BY " \
                          "{order} {sort} limit {count}". \
                    format(name = name, anonymity = anonymity, https = https, order = order, sort = sort, count = count)
            result = sql.query(command)
            data = [{
                'id': item[0],
                'ip': item[1],
                'port': item[2],
                'anonymity': item[4],
                'https': item[5],
                'speed': item[6],
                'save_time': str(item[8])
            } for item in result]

            data = json.dumps(data, indent=4)
            return data
        except Exception, e:
            utils.log('select exception msg:%s' % e)
            pass
예제 #15
0
    def GET(self):
        try:
            sql = SqlHelper()

            inputs = web.input()
            name = inputs.get('name')
            anonymity = inputs.get('anonymity', None)
            https = inputs.get('https', None)
            sort = inputs.get('sort', 'speed')
            count = inputs.get('count', 100)

            command = ''
            if anonymity is None and https is None:
                command = "SELECT * FROM {0} ORDER BY {1} LIMIT {2}".format(
                    name, sort, count)
            elif anonymity is not None and https is None:
                command = "SELECT * FROM {0} WHERE anonymity=\'{1}\' ORDER BY {2} LIMIT {3}". \
                    format(name, anonymity, sort, count)
            elif anonymity is None and https is not None:
                command = "SELECT * FROM {0} WHERE https=\'{1}\' ORDER BY {2} LIMIT {3}". \
                    format(name, https, sort, count)
            elif anonymity is not None and https is not None:
                command = "SELECT * FROM {0} WHERE anonymity=\'{1}\' AND https=\'{2}\' ORDER BY {3} limit {4}". \
                    format(name, anonymity, https, sort, count)

            result = sql.query(command)
            data = [{
                'ip': item[1],
                'port': item[2],
                'speed': item[6]
            } for item in result]
            data = json.dumps(data, indent=4)
            return data
        except:
            pass

        return []
예제 #16
0
    def __init__(self, *a, **kwargs):
        super(AssetStoreSpider, self).__init__(*a, **kwargs)

        # 存储插件下载的目录
        self.dir_plugins = 'Plugins/'
        self.dir_all = self.dir_plugins + 'all'

        utils.make_dir(self.dir_plugins)
        utils.make_dir(self.dir_all)

        # 所有插件的一个列表
        self.plugin_list = []

        self.sql = SqlHelper()
        self.table_name = config.assetstore_table_name

        self.priority_adjust = 2

        # unity 的版本
        self.unity_version = ''

        # 请求 header
        self.headers = {
            'Accept': '*/*',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
            'Connection': 'keep-alive',
            'Host': 'www.assetstore.unity3d.com',
            'Referer': 'https://www.assetstore.unity3d.com/en/',
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:50.0) Gecko/20100101 Firefox/50.0',
            'X-Kharma-Version': self.unity_version,
            'X-Requested-With': 'UnityAssetStore',
            'X-Unity-Session': '26c4202eb475d02864b40827dfff11a14657aa41',
        }

        self.init()
예제 #17
0
if __name__ == '__main__':
    if not os.path.exists('log'):
        os.makedirs('log')

    if not os.path.exists('temp'):
        os.makedirs('temp')

    reload(sys)
    sys.setdefaultencoding('utf-8')

    logging.basicConfig(filename='log/job.log',
                        format='%(levelname)s %(asctime)s: %(message)s',
                        level=logging.DEBUG)

    sql = SqlHelper()
    red = redis.StrictRedis(host='localhost', port=6379, db=10)

    init()

    wx = MyWXBot()
    t1 = threading.Thread(target=wx.run_wx)
    t2 = threading.Thread(target=wx.user_query_job)
    t3 = threading.Thread(target=wx.crawl_boss_job)
    t4 = threading.Thread(target=wx.crawl_lagou_job)
    t5 = threading.Thread(target=wx.crawl_liepin_job)
    t1.start()
    t2.start()
    t3.start()
    t4.start()
    t5.start()
예제 #18
0
    def __init__(self):
        self.sql = SqlHelper()

        self.weather_table_name = config.weather_table
        self.user_table_name = config.user_table
예제 #19
0
def runspider(request):
    data = {
        'status': 'failure',
        'guid': '0',
        'info': '',
    }

    try:
        # 正式环境用 post 请求
        url = request.POST.get('url')
        force = request.POST.get('force', 'false')
        pattern = re.compile('user-rate-')
        urls = re.split(pattern, url)
        user_id = urls[1]
        pattern = re.compile('\w+', re.S)
        user_id = re.search(pattern, user_id).group()
        sql = SqlHelper()

        utils.log('user_id:%s' % user_id)

        if 'rate.taobao.com' in url and user_id != None:
            data['status'] = 'success'
            data['guid'] = str(random.randint(1000000000000,
                                              9999999999999)) + '_' + str(
                                                  random.randint(100, 999))
            data['info'] = '成功接收数据,正在为您抓取并分析数据,精彩稍候呈现',

            command = "SELECT id FROM {table} WHERE id={user_id}". \
                format(table = config.tb_item_table, user_id = user_id)
            result = sql.query_one(command)

            if result == None:
                name = 'tb_comment'
                cmd = 'python manage.py real_time_analysis -a name={name} -a guid={guid} ' \
                      '-a user_id={user_id} -a url={url};'. \
                    format(url = str(url), name = name, dir = settings.BASE_DIR, guid = data.get('guid'),
                           user_id = user_id)

                logging.warn(cmd)
                subprocess.Popen(cmd, shell=True)
            else:
                if force == 'false':
                    utils.log('数据库中存在数据,从数据库中取出分析结果')
                    command = "SELECT * FROM {0} WHERE user_id={1} ORDER BY id". \
                        format(config.analysis_item_table, user_id)
                    result = sql.query(command)
                    for res in result:
                        utils.push_redis(data.get('guid'),
                                         res[1],
                                         res[2],
                                         res[3],
                                         save_to_mysql=False)
                else:
                    command = "DELETE FROM {0} WHERE produce_id={1}".format(
                        config.analysis_item_table, user_id)
                    sql.execute(command)
                    #重新分析数据
                    cmd = 'cd {dir};python manage.py analysis -a url={url} -a name={name} -a guid={guid} -a ' \
                          'user_id={user_id};'. \
                        format(url = url, name = 'tb', dir = settings.BASE_DIR, guid = data.get('guid'),
                               user_id = user_id)

                    subprocess.Popen(cmd, shell=True)
        else:
            data[
                'info'] = '传入网址有误,请检查后重新输入,请输入以下格式的网址:\n%s' % 'https://rate.taobao.com/user-rate-UvGv0MFc0vFILvgTT.htm'
    except Exception, e:
        logging.error('run spider exception:%s' % e)
        data['info'] = '出现错误,错误原因:%s' % e
예제 #20
0
파일: views.py 프로젝트: ztg498/jd_analysis
def runspider(request):
    data = {
        'status': 'failure',
        'guid': '0',
        'info': '',
    }

    try:
        # 正式环境用 post 请求
        url = request.POST.get('url')
        force = request.POST.get('force', 'false')
        pattern = re.compile('\d+', re.S)
        product_id = re.search(pattern, url).group()
        sql = SqlHelper()

        utils.log('product_id:%s' % product_id)

        if 'item.jd.com' in url and product_id != None:
            data['status'] = 'success'
            data['guid'] = str(uuid.uuid4())
            data['info'] = '成功接收数据,正在为您抓取并分析数据,精彩稍候呈现',

            command = "SELECT id FROM {table} WHERE id={product_id}". \
                format(table = config.jd_item_table, product_id = product_id)
            result = sql.query_one(command)

            if result == None:
                name = 'jd'
                cmd = 'cd {dir};python manage.py real_time_analysis -a name={name} -a guid={guid} ' \
                      '-a product_id={product_id} -a url={url};'. \
                    format(url = str(url), name = name, dir = settings.BASE_DIR, guid = data.get('guid'),
                           product_id = product_id)

                subprocess.Popen(cmd, shell=True)
            else:
                if force == 'false':
                    utils.log('数据库中存在数据,从数据库中取出分析结果')
                    command = "SELECT * FROM {0} WHERE product_id={1} ORDER BY id". \
                        format(config.analysis_item_table, product_id)
                    result = sql.query(command)
                    for res in result:
                        utils.push_redis(data.get('guid'),
                                         res[1],
                                         res[2],
                                         res[3],
                                         save_to_mysql=False)
                else:
                    command = "DELETE FROM {0} WHERE produce_id={1}".format(
                        config.analysis_item_table, product_id)
                    sql.execute(command)
                    #重新分析数据
                    cmd = 'cd {dir};python manage.py analysis -a url={url} -a name={name} -a guid={guid} -a ' \
                          'product_id={product_id};'. \
                        format(url = url, name = 'jd', dir = settings.BASE_DIR, guid = data.get('guid'),
                               product_id = product_id)

                    subprocess.Popen(cmd, shell=True)
        else:
            data[
                'info'] = '传入网址有误,请检查后重新输入,请输入以下格式的网址:\n%s' % 'https://item.jd.com/3995645.html'
    except Exception, e:
        logging.error('run spider exception:%s' % e)
        data['info'] = '出现错误,错误原因:%s' % e
예제 #21
0
def main(n):
    _tc = TimeCnt()
    _tc.cnt_time()
    #model_labels_2 = model_labels_2
    #model_labels = model_labels
    #model_columns_base = model_columns_base
    #print("> this is sql test")
    #shandong_ = SqlHelper(Config_map_shandong)
    #guangxi_ = SqlHelper(Config_guangxi)
    #storm_shandong_= SqlHelper(Configstormshandong)
    #storm_110_ = SqlHelper(Configooo)
    _tc.cnt_time()
    today = todayStr()

    #from load_mysqL_from_localcpk import load_mongodb_conn
    #bond_risk_ = load_mysqL_from_localcpk.load_mongodb_conn()
    mysql_bond_risk_ = SqlHelper(Config_bond_risk)
    #cursor = bond_risk_.find()
    #for i in cursor:
        #print(i)
        #pdb.set_trace()
    #    timeFormat(i['date_input'])
    label_120_ = mysql_bond_risk_.execute("select label from middleTable where (to_days(now()) - to_days(date)>=%d);"% n)
    compname_120_ = mysql_bond_risk_.execute("select compname from middleTable where (to_days(now()) - to_days(date)>=%d);"% n)
    #label_120_ = mysql_bond_risk_.execute("select label from middleTable where (to_days(now()) - to_days(date_input) <=720 and to_days(now()) - to_days(date)<=%d);"% n)
    #_tc.cnt_time()
    #compname_120_ = mysql_bond_risk_.execute("select compname from middleTable where (to_days(now()) - to_days(date_input) <=720 and to_days(now()) - to_days(date)<=%d);"% n)
    #label_120_, compname_120_ = get_label_time_window(bond_risk_, n, n-120)
    set_ = set()
    [set_.add(i) for i in label_120_]
    label_lst_ = list(set_)
    _tc.cnt_time()
    set_.clear()
    [set_.add(i) for i in compname_120_]
    compname_lst_ = list(set_)
    _dic = get_label_120(mysql_bond_risk_, compname_lst_, label_lst_, n)
    _tc.cnt_time()
    _panel = pd.Panel(_dic)
    _panel = _panel.fillna(0.0)
    #filter_data_by_time(bond_risk_, today)
    _tc.cnt_time()

    df_4_model = pd.DataFrame(index=compname_lst_, columns = model_labels + model_labels_2)
    df_4_model = df_4_model.fillna(0.0)
    df_4_model = df_4_model.astype(np.float64)
    _index = list(df_4_model.index)
    _columns = model_labels
    _columns_2 = model_labels_2
    #print("> ready to get data")
    _cnt = 0
    for i in _index:
        _cnt+=1
        if _cnt % 100 ==1:
            pass
            #print(">>>> !!! handle the,", i, _cnt)
        #if _cnt > 300:
            #break
            #print("> handle the,", i, _cnt)
        for c in _columns:
            df_4_model.loc[i,c] = cell_fill(_panel, i,c)
        df_4_model.loc[i, "企业名称"] = i
        df_4_model.loc[i, "发布日期"] = datetime.datetime.now()
        df_4_model.loc[i, "credit_recent"] = 0
        df_4_model.loc[i, "credit_ago"] = 0
        df_4_model.loc[i, "credit_trend"] = 0

        df_4_model.loc[i, "60"] = _panel[i].loc[60,:].sum()
        df_4_model.loc[i, "120"] = _panel[i].loc[120,:].sum()
        df_4_model.loc[i, "180"] = _panel[i].loc[180,:].sum()
        df_4_model.loc[i, "债券风险60"] = group_cnt_key_word("债券风险60",i,_panel)
        df_4_model.loc[i, "债券风险120"] = group_cnt_key_word("债券风险120",i,_panel)
        df_4_model.loc[i, "债券风险180"] = group_cnt_key_word("债券风险180",i,_panel)
        df_4_model.loc[i, "个人风险60"] = group_cnt_key_word("个人风险60",i,_panel)
        df_4_model.loc[i, "个人风险120"] = group_cnt_key_word("个人风险120",i,_panel)
        df_4_model.loc[i, "个人风险180"] = group_cnt_key_word("个人风险180",i,_panel)
        df_4_model.loc[i, "财务风险60"] = group_cnt_key_word("财务风险60",i,_panel)
        df_4_model.loc[i, "财务风险120"] = group_cnt_key_word("财务风险120",i,_panel)
        df_4_model.loc[i, "财务风险180"] = group_cnt_key_word("财务风险180",i,_panel)
        df_4_model.loc[i, "经营风险60"] = group_cnt_key_word("经营风险60",i,_panel)
        df_4_model.loc[i, "经营风险120"] = group_cnt_key_word("经营风险120",i,_panel)
        df_4_model.loc[i, "经营风险180"] = group_cnt_key_word("经营风险180",i,_panel)
        df_4_model.loc[i, "行业风险60"] = group_cnt_key_word("行业风险60",i,_panel)
        df_4_model.loc[i, "行业风险120"] = group_cnt_key_word("行业风险120",i,_panel)
        df_4_model.loc[i, "行业风险180"] = group_cnt_key_word("行业风险180",i,_panel)
        df_4_model.loc[i, "企业风险60"] = group_cnt_key_word("企业风险60",i,_panel)
        df_4_model.loc[i, "企业风险120"] = group_cnt_key_word("企业风险120",i,_panel)
        df_4_model.loc[i, "企业风险180"] = group_cnt_key_word("企业风险180",i,_panel)
        #df_4_model = df_4_model.applymap(lambda x : np.NaN if x==0 else x)
        df_4_model.loc[i, "sub120_60"] = df_4_model.loc[i, "120"] - df_4_model.loc[i, "60"]
        df_4_model.loc[i, "sub180_120"] = df_4_model.loc[i, "180"] - df_4_model.loc[i, "120"]
        #df_4_model = df_4_model.applymap(lambda x : np.NaN if x==-1 else x)
        #df_4_model = df_4_model.applymap(lambda x : np.NaN if x==0 else x)

    _x = df_4_model.drop(["企业名称","发布日期","Label"],1)
    _z = pd.read_csv("/home/siyuan/bond_risk/_z.csv").drop(["Unnamed: 0","发布日期","Label"],1)
    #_z.index = _z["企业名称"]
    _z = _z.drop("企业名称", axis=1)
    _x.columns = list(_z.columns)
    # !! filter
    #_x = _x[(_x["sub120_60"]>0) & (_x["60"]>0)]
    #_x = _x[(_x["60"]>0)]
    _x = _x[(_x["120"]>0)]
    train_separator = len(_x.index)
    #print(train_separator)
    _pred_data = pd.concat([_x, _z], axis=0)

    _pred_data = set_dummy(_pred_data, False)
    # output predict label
    bst = xgb.Booster()
    bst.load_model("/home/siyuan/data/xgb.model")
    #pdb.set_trace()

    #_lz = pd.read_csv("/home/siyuan/bond_risk/_z.csv")["Label"]
    result_ = predict(bst, _pred_data, _pred_data.iloc[1])
    dict_ = dict(zip(list(_pred_data.index), result_))
    dict_res = dict(zip(list(_pred_data.index)[:train_separator], result_[:train_separator]))
    #dict_res = dict(zip(list(_pred_data.index), result_))

    #print(collections.Counter(list(result_)))
    #print(collections.Counter(list(result_)[:train_separator]))
    cnt = 0
    #pdb.set_trace()
    #print(dict_res)
    for i in dict_res.keys():
        #sql_ = "INSERT INTO resultTable VALUES('', '%s', CURTIME(), '%s');"%(i,str(format(dict_res[i],'.9e')))
        sql_ = "INSERT INTO resultTable VALUES('', '%s', CURTIME(), '%s');"%(i,str(format(dict_res[i],'.9e')))
        #print(sql_)
        sql_res_ = mysql_bond_risk_.execute(sql_)
        #print(sql_res_)
        cnt+=1
    pdb.set_trace()
    mysql_bond_risk_.connect.commit()