Exemple #1
0
def department_count_spider():
    department_count_list = []
    department_count = SuperSpider(
        host='47.102.40.81',
        passwd='Abc12345',
        db='bryframe',
        table_name='department_count',
        field_list=('spider_date', 'up_date', 'name', 'list_time', 'buy_time',
                    'buy_sum', 'sell_time', 'sell_sum'))
    month_ago = department_count.date_ago(30)
    page = 1
    while True:
        try:
            json_data = department_count.get_html(
                f'http://data.eastmoney.com/DataCenter_V3/stock2016/TraderStatistic/pagesize=50,page={page},sortRule=-1,sortType=,startDate={month_ago},endDate={department_count.spider_date},gpfw=0,js=var%20data_tab_1.html?rt=25754789',
                'GB2312')
            data_list = department_count.json_to_py(json_data,
                                                    deal=True)['data']
        except:
            print(f'第{page}页获取失败')
            page += 1
            continue
        if not data_list or page == 500:
            break
        print(f'第{page}页')
        for data in data_list:
            department_count.up_date = department_count.spider_date
            department_count.name = data['SalesName']
            if department_count.name not in department_count_list:
                department_count_list.append(department_count.name)
            else:
                print(f'{department_count.name}-数据重复')
                continue
            sql = f'select name from department_count where name="{department_count.name}" and spider_date="{department_count.spider_date}"'
            same_data = department_count.sql_search(sql)
            if same_data:
                department_count.sql_search(
                    f'delete from department_count where name="{department_count.name}" and spider_date="{department_count.spider_date}"'
                )
                print(
                    f'重新爬取-{department_count.spider_date}-{department_count.name}'
                )
            department_count.list_time = department_count.to_null(
                data['UpCount'])
            department_count.buy_time = department_count.to_null(
                data['BCount'])
            department_count.buy_sum = department_count.to_null(
                data['SumActBMoney'])
            department_count.sell_time = department_count.to_null(
                data['SCount'])
            department_count.sell_sum = department_count.to_null(
                data['SumActSMoney'])
            department_count.data_save()
            print(
                f'证券营业部上榜统计:{department_count.up_date}-{department_count.name}-导入完成'
            )
        page += 1
    department_count.spider_end()
    print('end:证券营业部上榜统计')
Exemple #2
0
def stock_count_spider():
    stock_count_list = []
    stock_count = SuperSpider(host='47.102.40.81',
                              passwd='Abc12345',
                              db='bryframe',
                              table_name='stock_count',
                              field_list=('spider_date', 'up_date', 'code',
                                          'name', 'list_time', 'buy_sum',
                                          'sell_sum', 'buy_amount'))
    month_ago = stock_count.date_ago(30)
    page = 1
    while True:
        try:
            json_data = stock_count.get_html(
                f'http://data.eastmoney.com/DataCenter_V3/stock2016/StockStatistic/pagesize=50,page={page},sortRule=-1,sortType=,startDate={month_ago},endDate={stock_count.spider_date},gpfw=0,js=var%20data_tab_3.html?rt=25754758',
                'GB2312')
            data_list = stock_count.json_to_py(json_data, deal=True)['data']
        except:
            print(f'第{page}页获取失败')
            page += 1
            continue
        if not data_list or page == 500:
            break
        print(f'第{page}页')
        for data in data_list:
            stock_count.up_date = data['Tdate']
            stock_count.code = data['SCode']
            stock_count.name = data['SName']
            if (stock_count.up_date, stock_count.code) not in stock_count_list:
                stock_count_list.append(
                    (stock_count.up_date, stock_count.code))
            else:
                print(
                    f'{stock_count.up_date}-{stock_count.code}-{stock_count.name}-数据重复'
                )
                continue
            sql = f'select code from stock_count where code="{stock_count.code}" and spider_date="{stock_count.spider_date}" and up_date="{stock_count.up_date}"'
            same_data = stock_count.sql_search(sql)
            if same_data:
                stock_count.sql_search(
                    f'delete from stock_count where code="{stock_count.code}" and spider_date="{stock_count.spider_date}" and up_date="{stock_count.up_date}"'
                )
                print(
                    f'重新爬取-{stock_count.spider_date}-{stock_count.code}-{stock_count.name}'
                )
            stock_count.list_time = stock_count.to_null(data['SumCount'])
            stock_count.buy_sum = stock_count.to_null(data['Bmoney'])
            stock_count.sell_sum = stock_count.to_null(data['Smoney'])
            stock_count.buy_amount = stock_count.to_null(data['JmMoney'])
            stock_count.data_save()
            print(
                f'个股龙虎榜统计:{stock_count.up_date}-{stock_count.code}-{stock_count.name}-导入完成'
            )
        page += 1
    stock_count.spider_end()
    print('end:个股龙虎榜统计')
def department_track_spider():
    department_track = SuperSpider(
        host='47.102.40.81',
        passwd='Abc12345',
        db='bryframe',
        table_name='department_track',
        field_list=('spider_date', 'up_date', 'code', 'name', 'list_time',
                    'buy_sum', 'buy_time', 'sell_time', 'buy_amount',
                    'up_down'))
    month_ago = department_track.date_ago(30)
    page = 1
    while True:
        try:
            json_data = department_track.use_requests_to_html(
                f'http://data.eastmoney.com/DataCenter_V3/stock2016/JgStatistic/pagesize=50,page={page},sortRule=-1,sortType=,startDate={month_ago},endDate={department_track.spider_date},gpfw=0,js=var%20data_tab_3.html?rt=25754592',
                'GB2312')
            data_list = department_track.json_to_py(json_data,
                                                    deal=True)['data']
        except:
            print(f'第{page}页获取失败')
            page += 1
            continue
        if not data_list or page == 500:
            break
        print(f'第{page}页')
        for data in data_list:
            department_track.up_date = department_track.spider_date
            department_track.code = data['SCode']
            department_track.name = data['SName']
            department_track.list_time = department_track.to_null(
                data['UPCount'])
            department_track.buy_sum = department_track.to_null(
                data['JGBMoney'])
            department_track.buy_time = department_track.to_null(
                data['JGBCount'])
            department_track.sell_time = department_track.to_null(
                data['JGSCount'])
            department_track.buy_amount = department_track.to_null(
                data['JGPBuy'])
            department_track.up_down = department_track.to_null(
                data['RChange1M'])
            department_track.data_save()
            print(
                f'机构席位买卖追踪:{department_track.up_date}-{department_track.code}-{department_track.name}-导入完成'
            )
        page += 1
    department_track.spider_end()
    print('end:机构席位买卖追踪')
Exemple #4
0
def bonus_data_spider():
    bonus_data = SuperSpider(
        host='139.224.115.44',
        passwd='A9Vg+Dr*nP^fR=1V',
        db='bryframe3',
        table_name='bonus_data',
        field_list=('spider_date', 'bonus_report_date', 'code', 'name',
                    'cash_bonus_rate', 'transfer_rate', 'plan_announce_date',
                    'stock_register_date', 'remove_date', 'plan_scheduler',
                    'latest_announce_date'))
    date_list = bonus_data.data_search(
        'http://data.eastmoney.com/yjfp/201812.html',
        '//select[@id="sel_bgq"]/option/text()', 'gb2312')
    year_ago_datetime = bonus_data.to_datetime(bonus_data.date_ago(365))
    date_list2 = []
    for aim_date in date_list:
        if year_ago_datetime <= bonus_data.to_datetime(str(aim_date)):
            date_list2.append(aim_date)
        else:
            break
    for use_date in date_list2:
        bonus_data.bonus_report_date = use_date
        page = 1
        while True:
            print(f'第{page}页')
            try:
                json_data = bonus_data.get_html(
                    f'http://data.eastmoney.com/DataCenter_V3/yjfp/getlist.ashx?js=var%20aTnZIWfZ&pagesize=50&page={page}&sr=-1&sortType=YAGGR&mtk=%C8%AB%B2%BF%B9%C9%C6%B1&filter=(ReportingPeriod=^{use_date}^)&rt=51742239',
                    'GB2312')
                data_list = bonus_data.json_to_py(json_data, deal=True)['data']
            except:
                print(f'第{page}页获取失败')
                page += 1
                continue
            if not data_list or page == 500:
                break
            for data in data_list:
                bonus_data.code = data['Code']
                bonus_data.name = data['Name']
                bonus_data.latest_announce_date = bonus_data.to_null(
                    data['NoticeDate'][:10])
                sql = f'select code from bonus_data where code="{bonus_data.code}" and spider_date="{bonus_data.spider_date}" and latest_announce_date="{bonus_data.latest_announce_date}"'
                same_data = bonus_data.sql_search(sql)
                if same_data:
                    bonus_data.sql_search(
                        f'delete from bonus_data where code="{bonus_data.code}" and spider_date="{bonus_data.spider_date}" and latest_announce_date="{bonus_data.latest_announce_date}"'
                    )
                    print(
                        f'重新爬取-{bonus_data.spider_date}-{bonus_data.code}-{bonus_data.name}'
                    )
                bonus_data.plan_announce_date = bonus_data.to_null(
                    data['ResultsbyDate'][:10])
                bonus_data.stock_register_date = bonus_data.to_null(
                    data['GQDJR'][:10])
                bonus_data.remove_date = bonus_data.to_null(data['CQCXR'][:10])
                bonus_data.plan_scheduler = data['ProjectProgress']
                group_data = data['AllocationPlan']
                try:
                    bonus_data.cash_bonus_rate = '10' + bonus_data.re_find(
                        r'派[\d\.]+', group_data).__next__().group() + '元(含税)'
                except:
                    bonus_data.cash_bonus_rate = 'null'
                try:
                    transfer_rate1 = bonus_data.re_find(
                        r'转[\d\.]+', group_data).__next__().group()
                except:
                    transfer_rate1 = ''
                try:
                    transfer_rate2 = bonus_data.re_find(
                        r'送[\d\.]+', group_data).__next__().group()
                except:
                    transfer_rate2 = ''
                if not transfer_rate1 and not transfer_rate2:
                    bonus_data.transfer_rate = 'null'
                else:
                    bonus_data.transfer_rate = '10' + transfer_rate2 + transfer_rate1
                bonus_data.data_save()
                print(
                    f'{bonus_data.bonus_report_date}-{bonus_data.code}-{bonus_data.name}-导入完成'
                )
            page += 1
    bonus_data.spider_end()