class FundMapping(object):
    def __init__(self):
        self._collection = Mongodb('192.168.250.200', 27017, 'fund', 'base_fund')
        self._url = 'http://fund.csrc.gov.cn/web/classification_show.organization'

    def get_fund_mapping(self):
        # sub_code, sub_name, main_code, main_name
        sub_to_main_mapping = []
        html = requests.get(self._url, timeout=30.0).content
        document = PyQuery(unicode(html, 'utf-8'))

        fund_blocks = [document.items('.aa'), document.items('.dd')]
        for each_block in fund_blocks:
            for class_tag in each_block:
                items_list = [item.text() for item in class_tag.items('td')]
                sub_to_main_mapping.append((items_list[1], items_list[3]))
        return dict(sub_to_main_mapping)

    def update_to_mongo(self):
        fund_mapping = self.get_fund_mapping()

        for item in self._collection.query(kwargs={'code': 1}).sort([('_id', 1)]):
            key = item['code'][:6]
            main_fund_code = fund_mapping.get(key)
            if main_fund_code is not None:
                regex = re.compile(r'{0}'.format(main_fund_code))
                main_fund_sid = self._collection.get({'code': regex}, {'sid': 1})
                print 'main:', main_fund_sid
                _main = (main_fund_sid or {}).get('sid', '')
                self._collection.update({'_id': item['_id']}, setdata={'main': _main})
def third_update():
    coll_in = Mongodb('192.168.251.95', 27017, 'news', 'announcement_hk_chz')
    coll_cat = Mongodb('192.168.251.95', 27017, 'ada', 'dict_announce_catalog_hk')
    coll_secu = Mongodb('192.168.251.95', 27017, 'ada', 'base_stock')
    kt = 0
    cdctuo = ThirdUpdate().main()
    cd_dt_cat_tit_url_ori = cdctuo if cdctuo else []
    for codes, dt, cat, title, url, cat_origin in cd_dt_cat_tit_url_ori:
        kt += 1
        for code in codes:
            secu = get_secu(code, coll_secu)
            if secu and not coll_in.get({'sid': url, 'secu.0.cd': secu[0]['cd']}, {'title': 1}):
                print 'kt:', kt, '|', code, '|',  dt, '|', url, '\n|', title

                try:
                    hk_data = post_dict(secu, dt, cat, title, url, cat_origin, coll_cat)
                    coll_in.insert(hk_data)
                except Exception as e:
                    print 'Error:', e.message

                # 创建索引
                # inds_mon = coll_in.get({'sid': url}, {'title': 1})
                # ind_url = "http://192.168.250.205:17081/indexer/services/indexes/delta.json?" \
                #           "indexer=announce_hkz&taskids="
                # if inds_mon:
                #     jdata = BaseDownloadHtml().get_html(ind_url + str(inds_mon['_id']))[0]
                #     if json.loads(jdata)['code'] == 200:
                #         print '\tcreate index is ok!\n\n'
    coll_in.disconnect()
    coll_cat.disconnect()
    coll_secu.disconnect()
Beispiel #3
0
    def main(self, query=None):
        if query is None:
            query_date = [str(datetime.date.today())]
        else:
            query_date = query

        flag = False
        min_date = min(query_date)
        coll = Mongodb('192.168.251.95', 27017, 'news', 'research_report_def')
        url = 'http://datainterface.eastmoney.com//EM_DataCenter/js.aspx?'
        query_string = 'type=SR&sty=GGSR&ps=50&p=%s&mkt=0&stat=0&cmd=2&code=&rt='
        for page in range(1, 20):
            py_data = json.loads(self.get_html(url + query_string % str(page), encoding=True)[1:-1])
            for data in py_data:
                code, agency = data['secuFullCode'][:6], data['insName']
                date_time, url_info_code = data['datetime'][:10], data['infoCode']
                report_url = 'http://data.eastmoney.com/report/%s/%s.html' % (date_time.replace('-', ''), url_info_code)

                if date_time in query_date:
                    src = self.rr_research_org_code(agency) or ''  # get src
                    secu = self.base_stock_code(code) or ''  # get secu
                    if coll.get({'url': report_url}, {'titl': 1}) is None:
                        try:
                            now_html = self.get_html(report_url, encoding=True)
                            title = self.remove_tag(self.__title.findall(now_html)[0])
                            content = self.remove_tag(self.__content.findall(now_html)[0])

                            to_data = {
                                'url': report_url, 'titl': {'szh': title, 'en': ''}, 'bio': {'en': '', 'szh': content},
                                'rdt': date_time, 'upu': '', 'typ': '30001', 'stat': 1, 'upt': datetime.datetime.now(),
                                'crt': datetime.datetime.now(),
                            }

                            to_data.update({'src': src, 'secu': secu})
                            if not src or not secu:
                                vn_src = '' if src else agency
                                vn_secu = '' if secu else code
                                to_data['vn'] = '^'.join([vn_src, vn_secu])
                            else:
                                to_data['vn'] = None
                            coll.insert(to_data)
                            print '[%s  %s FROM %s] -->>> Now insert mongodb!' % (code, date_time, agency)
                        except Exception as e:
                            print 'title: %s, url: %s' % (data['title'], report_url), 'Error:', e
                    else:
                        print '[%s  %s FROM %s] -->>> mongodb table is existed' % (code, date_time, agency)
                elif date_time < min_date:
                    flag = True
                    break
            if flag:
                break
        coll.disconnect()
Beispiel #4
0
    def main(self):
        if not self._validity:
            print "SZX this is Saturday or Monday!"
            return 0

        coll_in = Mongodb("192.168.251.95", 27017, "ada", "base_margin_trading")
        coll_stock = Mongodb("192.168.251.95", 27017, "ada", "base_stock")
        coll_fund = Mongodb("192.168.251.95", 27017, "fund", "base_fund")

        url = "http://www.szse.cn/szseWeb/FrontController.szse?randnum=&"
        t = lambda v: "%.4f" % float(v)
        for page in range(1, 30):
            break_point = False
            html = self.get_html(url + self._query_string.format(self._query_date, page), encoding=True)
            for it in self.extract(html):
                # print it[0], it[1], it[2], it[3], it[4], it[5], it[6]
                break_point = True
                secu_cd = secu_code(it[0], coll_stock, coll_fund)
                fiba_bre = szx_fiba_bre(secu_cd, coll_in, self._query_date)
                sema_bre = szx_sema_bre(secu_cd, coll_in, self._query_date)

                # 本日融资偿还额 = 前日融资余额 + 本日融资买入- 本日融资余额(元) (fi.re = fi.ba(上期) + fi.bu - fi.ba)
                # 融券偿还量 = 融券卖出量 + 融券余量(上期) - 融券余量 (se.re = se.so + se.ma(上期) - se.ma)
                szx_fs_data = {
                    "secu": secu_cd or it[0],
                    "date": self._query_date,
                    "total": t(it[6]),
                    "stat": 2,
                    "typ": "szx",
                    "crt": datetime.now(),
                    "fi": {"ba": t(it[2]), "bu": t(it[1]), "re": t(float(it[1]) + fiba_bre - float(it[2]))},
                    "se": {
                        "ba": t(it[5]),
                        "ma": t(it[4]),
                        "so": t(it[3]),
                        "re": t(float(it[3]) + sema_bre - float(it[4])),
                    },
                    "upt": datetime.now(),
                }
                print szx_fs_data
                if not coll_in.get({"secu": secu_cd or it[0], "date": self._query_date, "typ": "szx"}):
                    print coll_in.insert(szx_fs_data)

            if not break_point:
                break
            print u"szx [%s] 融资融券交易明细 day update: %d page done!" % (self._query_date, page)
            # break

        coll_in.disconnect()
        coll_stock.disconnect()
        coll_fund.disconnect()
Beispiel #5
0
    def insert_db(self, total_data):
        coll_in = Mongodb('192.168.251.95', 27017, 'ada', 'base_margin_trading')
        coll_stock = Mongodb('192.168.251.95', 27017, 'ada', 'base_stock')
        coll_fund = Mongodb('192.168.251.95', 27017, 'fund', 'base_fund')
        sql_db = MySQLClient("192.168.251.95", "python_team", "python_team", "ada-fd")

        print '\tnow start to insert mongodb, waiting......'
        d = (lambda v: '%.4f' % float(v))
        for pdt in total_data:
            # 信用交易日期	标的证券代码	标的证券简称	本日融资余额(元)	本日融资买入额(元)
            # 本日融资偿还额(元) 本日融券余量	本日融券卖出量	本日融券偿还量
            secu_cd = secu_code(pdt[1], coll_stock, coll_fund)
            trade_date = '-'.join([pdt[0][:4], pdt[0][4:6], pdt[0][6:]])
            uid = str(uuid.uuid5(uuid.NAMESPACE_DNS, ''.join(self._valid(pdt)).encode('u8')))

            data = {
                'secu': secu_cd or pdt[1], 'date': trade_date, 'total': d(int(pdt[3])), 'stat': 2,
                'typ': 'sha', 'crt': datetime.now(), 'uuid': uid,
                'fi': {
                    'ba': d(pdt[3]),
                    'bu': d(pdt[4]),
                    're': d(pdt[5])
                },
                'se': {
                    'ba': '0.0000',
                    'ma': d(pdt[6]),
                    'so': d(pdt[7]),
                    're': d(pdt[8])
                },
                'upt': datetime.now()
            }

            if coll_in.get({'uuid': uid,  'typ': 'sha'}, {'secu': 1}):
                continue
            elif secu_cd is None:
                coll_in.insert(data)
            else:
                seba = sha_seba(secu_cd, pdt[6], trade_date, sql_db)
                if seba is not None:
                    data['total'] = d(int(pdt[3]) + seba)
                    data['se']['ba'] = d(seba)
                    coll_in.insert(data)

        coll_in.disconnect()
        coll_stock.disconnect()
        sql_db.disconnect()
        print '\tinsert all done!'
def update():
    coll_in = Mongodb('192.168.251.95', 27017, 'news', 'announcement_hk_chz')
    coll_cat = Mongodb('192.168.251.95', 27017, 'ada', 'dict_announce_catalog_hk')
    coll_secu = Mongodb('192.168.251.95', 27017, 'ada', 'base_stock')
    count = 0
    for code, query in codes_date:
        ktt = 0
        count += 1
        validate(code, query)
        print '[%s-->>%s,%s]' % (count, code, query), ':waiting few minutes......\n'
        dctu = PoskUpdate(code, query).main()  # codes, date, cat, title, url
        for codes, dt, cat, title, url, cat_origin in dctu:
            ktt += 1
            print '\t[%s ->> ktt:%s]' % (code, ktt), '|', codes, '|', dt, '|', title, '|', url

            for code_ in codes:
                secu = get_secu(code_, coll_secu)
                print 'secu:', secu
                if secu and not coll_in.get({'sid': url}, {'title': 1}):
                    try:
                        hk_data = post_dict(secu, dt, cat, title, url, cat_origin, coll_cat)
                        coll_in.insert(hk_data)
                    except Exception as e:
                        print '\t[%s] |%s|upload error: %s!' % (code_, dt, e.message)

                    # inds_mon = coll_in.get({'sid': url}, {'title': 1})
                    # ind_url = "http://192.168.250.205:17081/indexer/services/indexes/delta.json?" \
                    #           "indexer=announce_hkz&taskids="
                    # if inds_mon:  # 创建索引
                    #     jdata = BaseDownloadHtml().get_html(ind_url + str(inds_mon['_id']))[0]
                    #     if json.loads(jdata)['code'] == 200:
                    #         print '\tcreate index is ok!\n\n'

                if ktt % 80 == 0:
                        sleep(2 * 60)

    coll_in.disconnect()
    coll_cat.disconnect()
    coll_secu.disconnect()
Beispiel #7
0
    def main(self):
        if not self._validity:
            print 'SZX this is Saturday or Monday!'
            return 0

        coll_in = Mongodb('192.168.251.95', 27017, 'ada',
                          'base_margin_trading')
        coll_stock = Mongodb('192.168.251.95', 27017, 'ada', 'base_stock')
        coll_fund = Mongodb('192.168.251.95', 27017, 'fund', 'base_fund')

        url = 'http://www.szse.cn/szseWeb/FrontController.szse?randnum=&'
        t = lambda v: '%.4f' % float(v)
        for page in range(1, 30):
            break_point = False
            html = self.get_html(
                url + self._query_string.format(self._query_date, page),
                encoding=True)
            for it in self.extract(html):
                # print it[0], it[1], it[2], it[3], it[4], it[5], it[6]
                break_point = True
                secu_cd = secu_code(it[0], coll_stock, coll_fund)
                fiba_bre = szx_fiba_bre(secu_cd, coll_in, self._query_date)
                sema_bre = szx_sema_bre(secu_cd, coll_in, self._query_date)

                # 本日融资偿还额 = 前日融资余额 + 本日融资买入- 本日融资余额(元) (fi.re = fi.ba(上期) + fi.bu - fi.ba)
                # 融券偿还量 = 融券卖出量 + 融券余量(上期) - 融券余量 (se.re = se.so + se.ma(上期) - se.ma)
                szx_fs_data = {
                    'secu': secu_cd or it[0],
                    'date': self._query_date,
                    'total': t(it[6]),
                    'stat': 2,
                    'typ': 'szx',
                    'crt': datetime.now(),
                    'fi': {
                        'ba': t(it[2]),
                        'bu': t(it[1]),
                        're': t(float(it[1]) + fiba_bre - float(it[2]))
                    },
                    'se': {
                        'ba': t(it[5]),
                        'ma': t(it[4]),
                        'so': t(it[3]),
                        're': t(float(it[3]) + sema_bre - float(it[4]))
                    },
                    'upt': datetime.now()
                }
                print szx_fs_data
                if not coll_in.get({
                        'secu': secu_cd or it[0],
                        'date': self._query_date,
                        'typ': 'szx'
                }):
                    print coll_in.insert(szx_fs_data)

            if not break_point:
                break
            print u'szx [%s] 融资融券交易明细 day update: %d page done!' % (
                self._query_date, page)
            # break

        coll_in.disconnect()
        coll_stock.disconnect()
        coll_fund.disconnect()