Example #1
0
def lhb():
    # sql_date = "select distinct `date` from `indexdb` WHERE `date`>='2017-01-01' ORDER BY `date` ASC "
    # list_date = pd.read_sql(sql_date,localconn())['date'].values
    print("LHB:正在获取成交回报信息...")
    today =datetime.date.today()
    list_date=[today]
    errorlist =[]
    for date in list_date:
        df_lhbdetail = pd.DataFrame()
        try:
            lhb_list = get_lhblist(str(date),proxy=0)
            print(str(date),len(lhb_list))
            if len(lhb_list) ==0:
                errorlist.append((str(date),0))
            for code in lhb_list:
                # print(str(date),code)
                tmp_lhbdetail = get_lhbdetail(code,str(date),proxy=0)
                # tmp_lhbdetail =tmp_lhbdetail.drop_duplicates()
                df_lhbdetail = pd.concat((tmp_lhbdetail,df_lhbdetail))
                sleep(random()/10+1)
        except Exception as e:
            errorlist.append((str(date),code,e))
        df_lhbdetail.to_csv('./data/lhb/'+str(date)+'.csv',encoding='utf-8')
        try:
            df_lhbdetail.to_sql('lhb',conn(),schema='stockdata',if_exists='append',
                                index=True,chunksize=10000)
        except Exception as e:
            print(e)

    df_error = pd.DataFrame(errorlist)
    df_error.to_csv(path()+'/data/lhb/error.csv')
    print("LHB:更新完毕!")
Example #2
0
def analysis():
    # print("UNUSUAL: Running...")
    today = datetime.date.today()
    now = datetime.datetime.today() - datetime.timedelta(minutes=5)
    data = json.loads(unusual())
    table = [re.split(",", ele) for ele in data]
    for elem in table:
        elem[0] = elem[0][:-1]
    df = pd.DataFrame(
        table,
        columns=['code', 'name', 'time', 'tcode', 'type', 'data', 'goodorbad'])
    js = df.to_json(orient='records', force_ascii=False).encode('utf-8')
    if int(time.strftime("%H%M%S")) > 150000:
        with open(path() + "/data/unusual/" + str(today) + ".jz", 'wb') as f:
            f.write(gzip.compress(js, compresslevel=9))
    df['date'] = str(today)
    df['datetime'] = df['date'] + " " + df['time']
    df['datetime'] = pd.to_datetime(df['datetime'])
    df = df[['datetime', 'code', 'type', 'data', 'goodorbad']]
    df = df[df['datetime'] >= now]

    engine = conn()

    for param in df.values:

        sql_update = "insert ignore into `unusual`(`datetime`, `code`, `type`, `data`, `goodorbad`) value(%s,%s,%s,%s,%s)"
        param = (str(param[0]), param[1], param[2], param[3], param[4])

        engine.execute(sql_update, tuple(param))

    # print("UNUSUAL: Done!")
    # df.to_sql('unusual',localconn(),flavor='mysql',schema='stockdata',if_exists='append',index=False)
    # counts = Counter(df['code'].values).items()
    # print(pd.DataFrame(list(counts),columns=['code','times']).to_json(orient='records',force_ascii= False))
    return df
Example #3
0
    def amorank(self):
        df_data = self.df_dayline
        df_data = df_data.sort_values(by=['amo'], ascending=False)
        df_data['amorank'] = pd.Series(np.arange(len(df_data['date'])) + 1,
                                       index=df_data.index)
        df_data = df_data[['code', 'date', 'amorank']]
        df_data = df_data.reset_index(drop=True)
        result = []
        df_list = calc.adjfactor(self)
        taresultlist1, taresultlist2 = calc.tamodel(self)
        # print(taresultlist)
        print("CALC:正在按成交额进行排序...")
        for i in range(len(df_data)):
            code = df_data['code'][i]
            date = df_data['date'][i]
            fAmorank = df_data['amorank'][i]
            sql_refar = "select `amorank` from `usefuldata` WHERE `code` ='%s' and `date`<'%s' and `amorank` is NOT NULL" \
                        " ORDER BY `date` DESC LIMIT 0,1" % (code, date)
            df_refar = pd.read_sql(sql_refar, self.con)
            if df_refar.empty == False:
                ref_ar = df_refar.values[0][0]
                ARaise = fAmorank - ref_ar
            else:
                ARaise = np.nan
            percentage = df_list[df_list['code'] ==
                                 code]['percentage'].values[0]

            if code in taresultlist1 and code in taresultlist2:
                taresult = '1,2'
            elif code in taresultlist1 and code not in taresultlist2:
                taresult = '1'
            elif code in taresultlist2 and code not in taresultlist1:
                taresult = '2'
            else:
                taresult = '0'

        # taresult = '1,2' if code in taresultlist1  else '0'
            result.append([code, date, fAmorank, ARaise, percentage, taresult])
        result = pd.DataFrame(result,
                              columns=[
                                  'code', 'date', 'amorank', 'araise',
                                  'percentage', 'taresult'
                              ])
        print("CALC:正在将成交量信息写入数据库...")
        errorlist = []
        try:
            result.to_sql('usefuldata',
                          self.con,
                          schema='stockdata',
                          if_exists='append',
                          index=False,
                          chunksize=10000)
        except Exception as e:
            print(e)
            errorlist.append(e)

        dferrorlist = pd.DataFrame(errorlist)
        dferrorlist.to_csv(path() + '/error/amorank.csv')
        return result
Example #4
0
def mo(pages, proxy=0):
    today = datetime.date.today()  #- datetime.timedelta(days=1)
    error = []
    df = pd.DataFrame()

    for page in pages:
        print("MO:Page:", page)

        try:
            url = "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=GG&sty=GGMX&p=%s&ps=1000" % (
                page)
            html = myspyder(url, proxy=proxy).content.decode('utf-8')[1:-1]
            sleep(random() / 10 + 3)
            table = re.findall(r'\"([^"]+)\"', html)
            list = [re.split("\,", line) for line in table]
            list = pd.DataFrame(list,
                                columns=[
                                    '变动比例', '董监高人员姓名', 'code', '变动人', '持股种类',
                                    '日期', '变动股数', '变动后持股数', '成交均价', '名称',
                                    '变动人与董监高的关系', 11, '变动方式', '变动金额', '职务', 15
                                ])
            list = list[[
                'code', '日期', '变动人', '持股种类', '变动股数', '变动后持股数', '成交均价',
                '变动人与董监高的关系', '变动方式', '变动金额', '职务', '变动比例', '董监高人员姓名'
            ]]
            list.to_csv(path() + "/data/managerial_ownership/" + str(page) +
                        ".csv",
                        encoding='utf-8')
            df = pd.concat((list, df), ignore_index=True)
        except Exception as e:
            print(e)
            error.append(page)

    df = df.drop_duplicates()
    df['日期'] = df['日期'].astype('datetime64[ns]')
    df = df[df['日期'] >= today]
    engine = conn()
    if df.empty != True:
        for elem in df.values:
            sql_update_managerial="INSERT IGNORE INTO `managerial`(`code`, `日期`, `变动人`, `持股种类`, `变动股数`, " \
                                  "`变动后持股数`, `成交均价`, `变动人与董监高的关系`, `变动方式`, `变动金额`, `职务`," \
                                  " `变动比例`, `董监高人员姓名`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            params = [str(param) for param in elem]

            engine.execute(sql_update_managerial, params)
            # df.to_sql('managerial', conn, flavor='mysql', schema='stockdata', if_exists='append', index=False,
            #           chunksize=10000)

            # conns.commit()
            # df.to_sql('managerial',conns,flavor='mysql',schema='stockdata',if_exists='append',index=False,
            #           chunksize=10000)
    return error
Example #5
0
def stcn_news():
    """
    :param ser: local/server/both
    :return:
    """
    import datetime
    # ===================
    lastday = datetime.date.today() - datetime.timedelta(days=2)
    sql_check = "select `link` from `news` where `datetime`>='%s'" % (lastday)

    engine = conn()
    #读取最近两天地址,以减少写入次数

    linklist = pd.read_sql(sql_check, engine)['link'].values

    # ================================================= #
    pages = range(1, 2)
    url = "http://kuaixun.stcn.com/index_%s.shtml"
    source = 'stcn.com'
    urllist = [url % (page_id) for page_id in pages]
    newsresult = pd.DataFrame()
    errorlist = []
    for i in range(len(urllist)):
        try:
            result = get_news(urllist[i], proxy=0)
            newsresult = pd.concat((result, newsresult), ignore_index=True)
            sleep(random() / 10 + 1)
        except Exception as e:
            print(e)
            errorlist.append(e)

    for j in range(len(newsresult)):
        try:
            stype = newsresult['type'][j]
            title = newsresult['title'][j]
            link = newsresult['link'][j]
            datetime = newsresult['datetime'][j]
            if link not in linklist:
                sql_update = "insert ignore INTO `news`(`source`, `type`, `title`, `link`, `datetime`) VALUES (%s,%s,%s,%s,%s)"
                param = (source, stype, title, link, str(datetime))
                # cur = conn.cursor()
                engine.execute(sql_update, param)

                # conn.commit()

            else:
                pass
        except Exception as e:
            print(e)
            errorlist.append(e)
    dfErrorList = pd.DataFrame(errorlist)
    dfErrorList.to_csv(path() + '/error/update_news.csv')
Example #6
0
def update_capitalchange(ser):
    """
    :param ser: server,local or both
    :return:
    """
    sql_last = "select `变动日期` from `capitalchange` ORDER BY `变动日期` DESC  limit 1"
    lastdate = pd.read_sql(sql_last, localconn()).values[0][0]
    # print(lastdate)
    Errorlist = []
    sqli = "INSERT IGNORE INTO `capitalchange`(`股票代码`, `变动日期`, `变动原因`, `总股本_变动`, `流通A股_变动`, " \
           "`流通B股_变动`, `总股本_前值`, `流通A股_前值`, `流通B股_前值`, `总股本`, `流通A股`, `流通B股`) VALUES " \
           "(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
    table = pd.read_csv(path() + "/data/dzhdata/capitalchange.csv",
                        dtype='object')
    table['变动日期'] = pd.to_datetime(table['变动日期'])
    # print(table.dtypes)
    table = table[table['变动日期'] > lastdate].values
    # print(table)
    # with open(path() + "/data/dzhdata/capitalchange.csv", encoding='utf-8') as f:
    #     f_csv = csv.DictReader(f)
    for elem in table:
        try:
            # param = (row['股票代码'], row['变动日期'],row['变动原因'],row['总股本_变动'],row['流通A股_变动'],
            #          row['流通B股_变动'],row['总股本_前值'], row['流通A股_前值'], row['流通B股_前值'], row['总股本'],
            #          row['流通A股'],  row['流通B股'])
            # print(param)
            param = [str(elem[i]) for i in range(len(elem))]
            if ser == "server":
                conn = serverconn()
                cur = conn.cursor()
                cur.execute(sqli, tuple(param))
                conn.commit()
            elif ser == "local":
                conn = localconn()
                cur = conn.cursor()
                cur.execute(sqli, tuple(param))
                conn.commit()
            else:
                conn1 = serverconn()
                cur = conn1.cursor()
                cur.execute(sqli, tuple(param))
                conn1.commit()
                conn2 = localconn()
                cur = conn2.cursor()
                cur.execute(sqli, tuple(param))
                conn2.commit()
        except Exception as e:
            print(param[0], e)
            Errorlist.append((param[0], e))
    # f.close()
    return Errorlist
Example #7
0
def update_financial(filename,conn=localconn()):
    file_path = path()+"/data/dzhfinancial/"+filename+".csv"
    df_data = pd.read_csv(file_path,encoding='gbk',dtype='object')
    list_data=df_data.values
    errorlist = []

    for i in range(len(list_data)):
        sql = "INSERT IGNORE INTO `financial`(`名称`, `报表日期`, `代码`, `摊薄每股收益`, `净资产收益率`, `每股经营活动现金流量`, `每股净资产`, `每股资本公积金`, `每股未分配利润`, `每股主营收入`, `扣除非经常损益每股收益`, `货币资金`, `交易性金融资产`, `应收票据`, `应收账款`, `预付款项`, `应收利息`, `应收股利`, `其他应收款`, `应收关联公司款`, `存货`, `消耗性生物资产`, `一年内到期的非流动资产`, `其他流动资产`, `流动资产合计`, `可供出售金融资产`, `持有至到期投资`, `长期应收款`, `长期股权投资`, `投资性房地产`, `固定资产`, `在建工程`, `工程物资`, `固定资产清理`, `生产性生物资产`, `油气资产`, `无形资产`, `开发支出`, `商誉`, `长期待摊费用`, `递延所得税资产`, `其他非流动资产`, `非流动资产合计`, `资产总计`, `短期借款`, `交易性金融负债`, `应付票据`, `应付账款`, `预收账款`, `应付职工薪酬`, `应交税费`, `应付利息`, `应付股利`, `其他应付款`, `应付关联公司款`, `一年内到期的非流动负债`, `其他流动负债`, `流动负债合计`, `长期借款`, `应付债券`, `长期应付款`, `专项应付款`, `预计负债`, `递延所得税负债`, `其他非流动负债`, `非流动负债合计`, `负债合计`, `实收资本或股本`, `资本公积`, `库存股`, `盈余公积`, `未分配利润`, `外币报表折算差额`, `非正常经营项目收益调整`, `股东权益合计不含少数股东权益`, `少数股东权益`, `股东权益合计含少数股东权益`, `负债和股东权益合计`, `营业收入`, `营业成本`, `营业税金及附加`, `销售费用`, `管理费用`, `堪探费用`, `财务费用z`, `资产减值损失`, `公允价值变动净收益`, `投资收益`, `对联合营企业的投资收益`, `影响营业利润的其他科目`, `营业利润`, `补贴收入`, `营业外收入`, `营业外支出`, `非流动资产处置净损失`, `影响利润总额的其他科目`, `利润总额`, `所得税费用`, `影响净利润的其他科目`, `净利润含少数股东损益`, `净利润不含少数股东损益`, `少数股东损益`, `销售商品、提供劳务收到的现金`, `收到的税费返还`, `收到的其他与经营活动有关的现金`, `经营活动现金流入小计`, `购买商品、接受劳务支付的现金`, `支付给职工以及为职工支付的现金`, `支付的各项税费`, `支付的其他与经营活动有关的现金`, `经营活动现金流出小计`, `经营活动产生的现金流量净额`, `收回投资所收到的现金`, `取得投资收益所收到的现金`, `处置固定、无形和其他长期资产收回的现金净额`, `处置子公司及其他营业单位收到的现金净额`, `收到的其他与投资活动有关的现金`, `投资活动现金流入小计`, `购建固定资产、无形资产和其他长期资产支付的现金`, `投资所支付的现金`, `取得子公司及其他营业单位支付的现金净额`, `支付其他与投资活动有关的现金`, `投资活动现金流出小计`, `投资活动产生的现金流量净额`, `吸收投资所收到的现金`, `子公司吸收少数股东权益性投资收到的现金`, `取得借款收到的现金`, `收到其他与筹资活动有关的现金`, `筹资活动现金流入小计`, `偿还债务支付的现金`, `分配股利、利润或偿付利息支付的现金`, `子公司支给付少数股东的股利、利润`, `支付其他与筹资活动有关的现金`, `筹资活动现金流出小计`, `筹资活动产生的现金流量净额`, `汇率变动对现金的影响`, `其他原因对现金的影响`, `现金及现金等价物净增加额`, `期初现金及现金等价物余额`, `期末现金及现金等价物余额`, `净利润`, `加:资产减值准备`, `固定资产折旧、油气资产折耗、生产性生物资产折旧`, `无形资产摊销`, `长期待摊费用摊销`, `处置固定资产、无形资产和其他长期资产的损失`, `固定资产报废损失`, `公允价值变动损失`, `财务费用l`, `投资损失`, `递延所得税资产减少`, `递延所得税负债增加`, `存货的减少`, `经营性应收项目的减少`, `经营性应付项目的增加`, `其他`, `债务转为资本`, `一年内到期的可转换公司债券`, `融资租入固定资产`, `现金的期末余额`, `现金的期初余额`, `现金等价物的期末余额`, `现金等价物的期初余额`, `流动比率`, `速动比率`, `现金比率`, `负债权益比率`, `股东权益比率1`, `股东权益对负债比率`, `权益乘数`, `长期债务与营运资金比`, `长期负债比率1`, `利息支付倍数`, `股东权益与固定资产比`, `固定资产对长期负债比`, `有形净值债务率`, `清算价值比率`, `债务保障率`, `现金流量比率`, `每股有形资产净值`, `每股营运资金`, `债务总额EBITDA`, `营业周期`, `存货周转天数`, `应收账款周转天数`, `流动资产周转天数`, `总资产周转天数`, `存货周转率`, `应收账款周转率`, `流动资产周转率`, `固定资产周转率`, `总资产周转率`, `净资产周转率`, `股东权益周转率`, `营运资金周转率`, `存货同比增长率`, `应收帐款同比增长率`, `主营业务收入增长率`, `营业利润增长率`, `利润总额增长率`, `净利润增长率`, `净资产增长率`, `流动资产增长率`, `固定资产增长率`, `总资产增长率`, `摊薄每股收益增长率`, `每股净资产增长率`, `每股经营性现金流量增长率`, `三年平均净资收益率`, `总资产净利润率`, `投入资本回报率ROIC`, `成本费用利润率`, `营业利润率`, `主营业务成本率`, `销售净利率`, `总资产报酬率`, `销售毛利率`, `三项费用比重`, `营业费用率`, `管理费用率`, `财务费用率`, `非主营比重`, `营业利润比重`, `每股息税折旧摊销前利润`, `每股息税前利润EBIT`, `EBITDA主营业务收入`, `资产负债率`, `股东权益比率`, `长期负债比率`, `股东权益与固定资产比率`, `负债与所有者权益比率`, `长期资产与长期资金比率`, `资本化比率`, `资本固定化比率`, `固定资产比重`, `经营现金净流量对销售收入比率`, `资产的经营现金流量回报率`, `经营现金净流量与净利润的比率`, `经营现金净流量对负债比率`, `每股营业现金流量`, `每股经营活动现金流量净额`, `每股投资活动产生现金流量净额`, `每股筹资活动产生现金流量净额`, `每股现金及现金等价物净增加额`, `现金流量满足率`, `现金营运指数`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
        sql_param =tuple(list_data[i])
        with conn as con:
            try:
                con.execute(sql,sql_param)
            except Exception as e:
                errorlist.append(e)
                print(e)

    return errorlist
Example #8
0
def update_dzhcontrol(ser):
    """
    :param ser: server,local or both
    :return:
    """
    Errorlist = []
    sqli = "UPDATE `basedata` SET `实际控制人名称` = %s, `实际控制人类型` = %s,`央企控制人名称`=%s,`控股股东名称`=%s,`控股股东类型`=%s WHERE `basedata`.`证券代码` = %s ;"
    with open(path() + "/data/dzhdata/control.csv", encoding='utf-8') as f:
        f_csv = csv.DictReader(f)
        for row in f_csv:
            try:
                print(row['证券代码'], row['实际控制人名称'], row['实际控制人类型'],
                      row['央企控制人名称'], row['控股股东名称'], row['控股股东类型'])
                if ser == "server":
                    conn = serverconn()
                    cur = conn.cursor()
                    cur.execute(
                        sqli, (row['实际控制人名称'], row['实际控制人类型'], row['央企控制人名称'],
                               row['控股股东名称'], row['控股股东类型'], row['证券代码']))
                    conn.commit()
                elif ser == "local":
                    conn = localconn()
                    cur = conn.cursor()
                    cur.execute(
                        sqli, (row['实际控制人名称'], row['实际控制人类型'], row['央企控制人名称'],
                               row['控股股东名称'], row['控股股东类型'], row['证券代码']))
                    conn.commit()
                else:
                    conn1 = serverconn()
                    cur = conn1.cursor()
                    cur.execute(
                        sqli, (row['实际控制人名称'], row['实际控制人类型'], row['央企控制人名称'],
                               row['控股股东名称'], row['控股股东类型'], row['证券代码']))
                    conn1.commit()
                    conn2 = localconn()
                    cur = conn2.cursor()
                    cur.execute(
                        sqli, (row['实际控制人名称'], row['实际控制人类型'], row['央企控制人名称'],
                               row['控股股东名称'], row['控股股东类型'], row['证券代码']))
                    conn2.commit()
            except Exception as e:
                print(row['证券代码'], e)
                Errorlist.append((row['证券代码'], e))
    f.close()
    return Errorlist
Example #9
0
def update_embasedata(stocklist, proxy=0):
    Errorlist = []

    engine = conn()
    for i in range(len(stocklist)):
        sqli = "UPDATE  `basedata` SET  `证券简称` = %s,  `公司名称` = %s,  `英文名称` = %s, " \
               " `曾用名` = %s,  `公司简介` = %s,  `成立日期` = %s,  `工商登记号` = %s,  `注册资本` = %s,  `法人代表` = %s, " \
               " `所属证监会行业` = %s,  `员工总数` = %s,  `总经理` = %s,  `董事会秘书` = %s,  `省份` = %s,  `城市` = %s,  " \
               "`注册地址` = %s,  `办公地址` = %s,  `邮编` = %s,  `电话` = %s,  `传真` = %s,  `电子邮件` = %s,  " \
               "`公司网站` = %s,  `审计机构` = %s,  `法律顾问` = %s,  `经营分析` = %s,  `简史` = %s,  `核心题材` = %s " \
               "WHERE `basedata`.`证券代码` = %s"
        symbol = stocklist['证券代码'][i] + "01" if stocklist['证券代码'][i][
            0] == "6" else stocklist['证券代码'][i] + "02"
        sName = stocklist['证券简称'][i]
        try:
            Intr = myspyder(
                'http://soft-f9.eastmoney.com/soft/gp3.php?code=%s' % (symbol),
                proxy=proxy).content
            Conc = myspyder(
                'http://soft-f9.eastmoney.com/soft/gp30.php?code=%s' %
                (symbol),
                proxy=proxy).content
            IntrSoup = bs(Intr, 'html5lib')
            ConcSoup = bs(Conc, 'html5lib')
            stockdata = []
            stockdata.append(sName)
            for tr in IntrSoup.find_all(width=880):
                stockdata.append(txt_pre(tr.text.strip()))
            point = ConcSoup.p
            del point['style']
            stockdata.append(str(point))
            stockdata.append(symbol[:-2])
            print("BASEDATA:", symbol[:-2], sName, i + 1, "/", len(stocklist),
                  round((i + 1) / (len(stocklist)) * 100, 2))

            engine.execute(sqli, tuple(stockdata))
            sleep((random() / 10 + 1))
        except Exception as e:
            print(symbol[:-2], sName, e)
            Errorlist.append((symbol[:-2], sName, e))
    Errorlist = pd.DataFrame(Errorlist, columns=['证券代码', '证券简称', 'error'])
    Errorlist.to_csv(path() + '/error/update_basedata.csv')
    return Errorlist
Example #10
0
def update_stocklist(ser='both'):
    table = pd.read_csv(path() + "/data/dzhdata/stocklist.csv",
                        dtype='object').values
    stocklist = pd.read_sql("select * from `stocklist`",
                            localconn())['证券简称'].values
    Errorlist = []
    for row in table:

        if row[1] in stocklist:
            pass
        else:
            print(row[1])
            sql = "update `stocklist` set `证券简称`=%s,`拼音缩写`=%s WHERE `证券代码`=%s"
            pinyin = getpinyin(row[1])

            pinyin = pinyin if '银行' not in row[1] else pinyin.replace(
                'YX', 'YH')
            param = [row[1], pinyin, row[0]]
            print(param)
            # try:
            if ser == "server":
                conn = serverconn()
                cur = conn.cursor()
                cur.execute(sql, tuple(param))
                conn.commit()
            elif ser == "local":
                conn = localconn()
                cur = conn.cursor()
                cur.execute(sql, tuple(param))
                conn.commit()
            else:
                conn1 = serverconn()
                cur = conn1.cursor()
                cur.execute(sql, tuple(param))
                conn1.commit()
                conn2 = localconn()
                cur = conn2.cursor()
                cur.execute(sql, tuple(param))
                conn2.commit()
            # except Exception as e:
            #     print(param[0], e)
            #     Errorlist.append((param[0], e))
    return Errorlist
Example #11
0
                cur.execute(sql, tuple(param))
                conn2.commit()
            # except Exception as e:
            #     print(param[0], e)
            #     Errorlist.append((param[0], e))
    return Errorlist


if __name__ == '__main__':
    gbk_to_utf8()
    # concept_errorlist = update_dzhconcept(ser='both') # 注意导出数据是否完整 文件编码
    # dfErrorList = pd.DataFrame( concept_errorlist)
    # dfErrorList.to_csv(path() + '/error/dzhconcept.csv')
    # print(dfErrorList)
    # control_errorlist = update_dzhcontrol(ser='both') # 注意导出数据是否完整 文件编码
    # dfErrorList2 = pd.DataFrame( control_errorlist)
    # dfErrorList2.to_csv(path() + '/error/dzhcontrol.csv')
    # print(dfErrorList2)
    capitalchange_errorlist = update_capitalchange(ser='both')  # 改抬头,删字段,数字格式
    dfErrorList3 = pd.DataFrame(capitalchange_errorlist)
    dfErrorList3.to_csv(path() + '/error/capitalchange.csv')
    print(dfErrorList3)
    # buyback_errorlist = update_buyback(ser='both') # 改抬头,删字段,数字格式,文件编码
    # dfErrorList4 = pd.DataFrame(buyback_errorlist)
    # dfErrorList4.to_csv(path() + '/error/update_buyback.csv')
    # print(dfErrorList4)
    # incentive_errorlist = update_incentive(ser='both') # 改抬头,删字段,数字格式,文件编码
    # dfErrorList5 = pd.DataFrame(incentive_errorlist)
    # dfErrorList5.to_csv(path() + '/error/update_incentive.csv')
    # print(dfErrorList5)
    update_stocklist(ser='both')
Example #12
0
def update_incentive(ser):
    """
    :param ser: server,local or both
    :return:
    """
    sql_last = "select `薪酬委员会预案公告日` from `incentive` ORDER BY `薪酬委员会预案公告日` DESC  limit 1"
    lastdate = pd.read_sql(sql_last, localconn()).values[0][0]
    Errorlist = []
    sqli = "INSERT IGNORE INTO `incentive`(`股票代码`, `本期计划制定年度`, `本期计划激励次数`, `方案进度`, `激励标的物`, " \
           "`标的股票来源`, `激励总数_万`, `激励总数占当时总股本的比例`, `计划授权授予股票价格`, `本期计划有效期_年`, " \
           "`股权激励授予条件说明`, `薪酬委员会预案公告日`, `董事会修订方案日`, `股东大会通过日`, `独立财务顾问`, `律师事务所`," \
           " `备注`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
    table = pd.read_csv(path() + "/data/dzhdata/incentive.csv", dtype='object')
    # print(table)
    table['薪酬委员会预案公告日'] = pd.to_datetime(table['薪酬委员会预案公告日'])
    # print(table.dtypes)
    table = table[table['薪酬委员会预案公告日'] > lastdate]
    if ser == "local" or ser == "both":
        table.to_sql('incentive',
                     localconn(),
                     flavor='mysql',
                     schema='stockdata',
                     index=False,
                     if_exists='append')
    if ser == "server" or ser == "both":
        table.to_sql('incentive',
                     serverconn(),
                     flavor='mysql',
                     schema='stockdata',
                     index=False,
                     if_exists='append')
    # with open(path() + "/data/dzhdata/incentive.csv", encoding='utf-8') as f:
    #     f_csv = csv.DictReader(f)
    # for elem in table:
    #     try:
    #         # param = (row['股票代码'], row['本期计划制定年度'], row['本期计划激励次数'], row['方案进度'], row['激励标的物'],
    #         #          row['标的股票来源'], row['激励总数_万'], row['激励总数占当时总股本的比例'], row['计划授权授予股票价格'],
    #         #          row['本期计划有效期_年'], row['股权激励授予条件说明'], row['薪酬委员会预案公告日'], row['董事会修订方案日'],
    #         #          row['股东大会通过日'], row['独立财务顾问'], row['律师事务所'], row['备注'])
    #         param = [str(elem[i]) for i in range(len(elem))]
    #
    #         if ser == "server":
    #             conn = serverconn()
    #             cur = conn.cursor()
    #             cur.execute(sqli, tuple(param))
    #             conn.commit()
    #         elif ser == "local":
    #             conn = localconn()
    #             cur = conn.cursor()
    #             cur.execute(sqli, tuple(param))
    #             conn.commit()
    #         else:
    #             conn1 = serverconn()
    #             cur = conn1.cursor()
    #             cur.execute(sqli, tuple(param))
    #             conn1.commit()
    #             conn2 = localconn()
    #             cur = conn2.cursor()
    #             cur.execute(sqli, tuple(param))
    #             conn2.commit()
    #     except Exception as e:
    #         print(param[0],e)
    #         Errorlist.append((param[0], e))
    # # f.close()
    return Errorlist
Example #13
0
def get_forecast(proxy=0,lastday=0,update=1):
    """
    :param ser: 选择更新的数据库,local/server/both
    :param proxy: 设置是否使用ip代理,0为不开启,1为开启
    :param lastday: 上次更新距离今天多长时间,默认当天为0
    :param update: 设置更新还是重建,更新设为1,其他为重建
    :return:
    """
    errorList=[]
    today = datetime.date.today()-datetime.timedelta(days=lastday)
    print("FORECAST:",today)
    iyear =int(str(today)[0:4])
    imonth= int(str(today)[5:7])
    List_stock = get_stocklist()
    iLong = int((round(len(List_stock) / 1000, 0) + 1) * 1000)
    Q4 = datetime.datetime(iyear-1,12,31).strftime('%Y-%m-%d') if imonth <=2 else \
        datetime.datetime(iyear,12,31).strftime('%Y-%m-%d')
    Q3 = datetime.datetime(iyear-1,9,30).strftime('%Y-%m-%d') if imonth < 8 else \
        datetime.datetime(iyear,9,30).strftime('%Y-%m-%d')
    Q2 = datetime.datetime(iyear-1,6,30).strftime('%Y-%m-%d') if imonth < 5 else \
        datetime.datetime(iyear, 6, 30).strftime('%Y-%m-%d')
    Q1 = datetime.datetime(iyear, 3, 31).strftime('%Y-%m-%d')
    List_Quarter =[Q1,Q2,Q3,Q4]
    df_forecast = pd.DataFrame()
    for Quarter in List_Quarter:
        url = 'http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=SR&sty=YJYG&fd=%s&st=4&sr=true&p=1&ps=%s' \
              % (Quarter, iLong)
        content = "error!"
        times_retry = 3
        while content =="error!" and times_retry!=0:
            content = myspyder(url,proxy=proxy).content.decode('utf-8')
            times_retry -= 1
        print("FORECAST:",Quarter,"数据抓取完毕,正在对数据进行处理...")
        try:
            return_list = re.findall("\"(.*?)\"", content)
            fctable = []
            for j in range(len(return_list)):
                appd = re.split("\,", return_list[j])
                fctable.append(appd)
            fctable = pd.DataFrame(fctable,
                                   columns=['code', '股票简称', '业绩变动', '变动幅度', '预告类型', '同期净利润'
                                       , '预喜预悲', 'date','财报日期'])
            fctable = fctable[['code', '业绩变动', '变动幅度', '预告类型', '同期净利润', 'date', '财报日期']]
            df_forecast = pd.concat((df_forecast, fctable)).sort_values('date', ascending=False).drop_duplicates()
        except Exception as e:
            errorList.append(e)
    df_forecast['date'] = df_forecast['date'].astype('datetime64', error='ignore')
    df_forecast['财报日期'] = df_forecast['财报日期'].astype('datetime64', error='ignore')
    print("FORECAST: 数据处理完毕,正在更新数据库...")

    if update == 1:
        df_forecast = df_forecast[df_forecast['date']>=today]
    else:
        df_forecast = df_forecast


    engine = conn()


    for j in range(len(df_forecast)):
        try:
            Scode = df_forecast.get_value(j,'code')
            Sdate = str(df_forecast.get_value(j,'date'))
            Schange = df_forecast.get_value(j, '业绩变动')
            Spercent = df_forecast.get_value(j, '变动幅度')
            Stype = df_forecast.get_value(j, '预告类型')
            Sprofit = df_forecast.get_value(j, '同期净利润')
            Srepdate = str(df_forecast.get_value(j, '财报日期'))
            ul = re.split("~", Spercent)
            if len(ul) == 2:
                upper = ul[1].replace('%', '')
                lower = ul[0].replace('%', '')
            elif len(ul) == 1 and ul[0] != '':
                upper = ul[0].replace('%', '')
                lower = None
            else:
                upper = None
                lower = None

            sql_update = "INSERT IGNORE INTO `forecast`(`code`, `date`, `业绩变动`, `变动幅度`, `预告类型`, `同" \
                         "期净利润`, `财报日期`,`上限`,`下限`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            param = (Scode, Sdate, Schange, Spercent, Stype, Sprofit, Srepdate, upper, lower)

            engine.execute(sql_update,param)

        except Exception as e:
            errorList.append(e)
    dfErrorList = pd.DataFrame({'error': errorList})
    dfErrorList.to_csv(path() + '/error/update_forecast.csv')
    print("FORECAST: 更新数据库完毕!")
    return dfErrorList
Example #14
0
            pinyin = getpinyin(name) if '银行' not in name else getpinyin(
                name).replace('YX', 'YH')
            market = '上海证券交易所' if code[0] == '6' else '深圳证券交易所'
            if code not in stocklist or code in ipocheck:
                try:
                    sql_xg = "INSERT ignore INTO `stocklist`(`证券代码`, `证券简称`, `上市市场`,`拼音缩写`) VALUES (%s,%s,%s,%s)"
                    sql_ipo = "update `basedata` set `首发日期`=%s ,`首发价格`=%s WHERE `证券代码`=%s"

                    engine.execute(sql_xg, (code, name, market, pinyin))
                    # conn.commit()
                    engine.execute(sql_ipo, (str(ipodate), ipoprice, code))
                    # conn.commit()

                    print("STOCKLIST:", code, ":更新成功!")
                except Exception as e:
                    print("STOCKLIST:", code, ":更新失败!", e)
                    Errorlist.append(code)

        output = str(datetime.date.today()) + (" 更新完成!" if len(Errorlist) == 0
                                               else " 更新出错!请检查!")
        print("STOCKLIST:", output)
        return Errorlist
    except:
        return ['数据获取失败...']


if __name__ == "__main__":
    error = update_stocklist(proxy=0)
    df_error = pd.DataFrame(error)
    df_error.to_csv(path() + '/error/update_stocklist.csv')
Example #15
0
def update_buyback(ser):
    """
        :param ser: server,local or both
        :return:
        """
    sql_last = "select `董事会通过日` from `buyback` ORDER BY `董事会通过日` DESC  limit 1"
    lastdate = pd.read_sql(sql_last, localconn()).values[0][0]
    Errorlist = []
    # sqli="insert ignore into `buyback`(`证劵代码`, `方案进度`, `董事会通过日`, `股东大会通过日`, `国资委通过日`, " \
    #     "`证监会通过日`, `回购资金上限_CNY`, `回购价格上限_CNY`, `回购股份预计_万`, `占总股本`, `占实际流通股`, `股份种类`," \
    #     " `回购资金来源`, `回购股份方式`, `回购股份实施期限`, `备注`) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"

    table = pd.read_csv(path() + "/data/dzhdata/buyback.csv", dtype='object')
    # print(table)
    table['董事会通过日'] = pd.to_datetime(table['董事会通过日'])
    # print(table.dtypes)
    table = table[table['董事会通过日'] > lastdate].values
    table = pd.DataFrame(table,
                         columns=[
                             '证劵代码', '方案进度', '董事会通过日', '股东大会通过日', '国资委通过日',
                             '证监会通过日', '回购资金上限_CNY', '回购价格上限_CNY', '回购股份预计_万',
                             '占总股本', '占实际流通股', '股份种类', '回购资金来源', '回购股份方式',
                             '回购股份实施期限', '备注'
                         ])
    table['董事会通过日'] = pd.to_datetime(table['董事会通过日'])
    table['回购资金上限_CNY'] = table['回购资金上限_CNY'].astype('float')
    table['回购价格上限_CNY'] = table['回购价格上限_CNY'].astype('float')
    table['回购股份预计_万'] = table['回购股份预计_万'].astype('float')
    table['占实际流通股'] = table['占实际流通股'].astype('float')
    table['占总股本'] = table['占总股本'].astype('float')

    print(table.dtypes)
    print(table)
    # with open(path() + "/data/dzhdata/buyback.csv", encoding='utf-8') as f:
    #     f_csv = csv.DictReader(f)
    if ser == "local" or ser == "both":
        table.to_sql('buyback',
                     localconn(),
                     flavor='mysql',
                     schema='stockdata',
                     index=False,
                     if_exists='append')
    if ser == "server" or ser == "both":
        table.to_sql('buyback',
                     serverconn(),
                     flavor='mysql',
                     schema='stockdata',
                     index=False,
                     if_exists='append')

    # for param in table:
    #     try:
    #         # param = (row['证劵代码'], row['方案进度'], row['董事会通过日'], row['股东大会通过日'], row['国资委通过日'],
    #         #          row['证监会通过日'], row['回购资金上限_CNY'], row['回购价格上限_CNY'], row['回购股份预计_万'],
    #         #          row['占总股本'], row['占实际流通股'],row['股份种类'],row['回购资金来源'],row['回购股份方式'],
    #         #          row['回购股份实施期限'],row['备注'])
    #         # param = [str(elem[i]) for i in range(len(elem))]
    #         if ser == "server":
    #             conn = serverconn()
    #             cur = conn.cursor()
    #             cur.execute(sqli,tuple(param))
    #             conn.commit()
    #         elif ser == "local":
    #             conn = localconn()
    #             cur = conn.cursor()
    #             cur.execute(sqli, tuple(param))
    #             conn.commit()
    #         else:
    #             conn1 = serverconn()
    #             cur = conn1.cursor()
    #             cur.execute(sqli, tuple(param))
    #             conn1.commit()
    #             conn2 = localconn()
    #             cur = conn2.cursor()
    #             cur.execute(sqli, tuple(param))
    #             conn2.commit()
    #     except Exception as e:
    #         print(param[0],e)
    #         Errorlist.append((param[0], e))
    # f.close()
    return Errorlist
Example #16
0
def news_content():

    # ===================
    engine = conn()
    today = datetime.date.today() - datetime.timedelta(days=5)
    # ===================set requests================== #
    # rqs = rq.session()
    # rqs.keep_alive = False
    # ===================get news content================ #

    sql_news_null = text(
        "SELECT * FROM `news` WHERE `content` IS NULL OR (`title` LIKE :ud and `datetime`>=:dt)"
    )
    df_listurl = pd.read_sql(sql_news_null,
                             engine,
                             params={
                                 "ud": "%更新中%",
                                 "dt": str(today)
                             })

    list_url = df_listurl['link'].values
    list_title = df_listurl['title'].values

    list_content = df_listurl['content'].values
    errorlist = []
    # list_url=['http://kuaixun.stcn.com/2017/1110/13761584.shtml']
    #ip_list =pd.read_csv('ip.csv')['ip'].values

    for i in range(len(list_url)):
        time.sleep(random.random() / 10 + 3)
        newsid = re.split("\.", re.split("\/", list_url[i])[-1])[0]
        newurl = "http://app.stcn.com/?app=article&controller=article&action=fulltext&contentid=%s" % (
            newsid)
        try:
            html = myspyder(newurl, proxy=0).content.decode('utf-8')[1:-2]
            newscontent = json.loads(html)['content']
            # print(newscontent)
            # newsSoup = bs(html, 'html.parser')
            # newsSouptitle = newsSoup.select(".intal_tit")[0].h2.text
            # newsSoup = newsSoup.select(".txt_con")[0]
            # [s.extract() for s in newsSoup('a')]
            # [s.extract() for s in newsSoup('script')]
            # [s.extract() for s in newsSoup('div')]
            # newscontent = str(newsSoup)
            newscontent = "".join(
                re.split("\|STCNTTTP\|.+\|STCNTTTP\|", newscontent))
            if newscontent == list_content[i]:
                pass
            else:
                print("NEW:", list_title[i])
                # print(newscontent)
                sql_update_newscontent = "update `news` set `content`=%s WHERE `link`=%s"
                param = (newscontent, list_url[i])

                engine.execute(sql_update_newscontent, param)

        except Exception as e:
            # print(url,e)
            errorlist.append((list_url[i], e))
    df_errorlist = pd.DataFrame(errorlist, columns=['link', 'error'])
    df_errorlist.to_csv(path() + '/error/update_newscontent.csv')
Example #17
0
def spo(proxy=0):
    """
    http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=SR&sty=ZF&p=1&ps=5000&st=5

    update the spo data from eastmoney.com to server and local.

    :param ser: local/server/both
    :param proxy: user proxy set proxy=1 if not proxy=0,default 0
    :return: errorlist
    """
    print("SPO: Running...")
    errorlist = []
    today = datetime.date.today() - datetime.timedelta(days=100)
    url = "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=SR&sty=ZF&p=1&ps=1000&st=5"
    html = myspyder(url,proxy=proxy).content
    table = eval(html.decode('utf-8'))
    list =[]
    for ele in table:
        list.append(re.split("\,",ele))
    df_spo = pd.DataFrame(list,columns=['code','name','发行方式','发行总数','发行价格','现价','发行日期','增发上市日期',
                                        '8','增发代码','网上发行','中签号公布日','中签率','13','14','15','16'])
    df_spo = df_spo[['code','name','发行方式','发行总数','发行价格','发行日期','增发上市日期','增发代码','网上发行',
                     '中签号公布日','中签率']]
    df_spo = df_spo.drop_duplicates()
    # df_spo = df_spo.replace('-','')
    # print(df_spo)

    df_spo['发行日期']=df_spo['发行日期'].astype('datetime64[ns]')
    spo = df_spo[df_spo['发行日期']>=today]
    spo.to_csv(path() + '/data/spo_done/spo_' + str(today) + '.csv',encoding='utf-8')

    engine = conn()
    try:


        for elem in spo.values:
            sql_update_spo = "INSERT IGNORE INTO `spo_done`(`code`, `name`, `发行方式`, `发行总数`, `发行价格`, " \
                             "`发行日期`, `增发上市日期`, `增发代码`, `网上发行`, `中签号公布日`, `中签率`) VALUES" \
                             " (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            params = []
            for param in elem:
                if param!='-' :
                    params.append(str(param))
                else:
                    params.append(None)

            # if ser == 'local' or ser == 'both':
                # conn = localconn()
                # cur = conn.cursor()
            result=engine.execute(sql_update_spo,params)
                # result.close()
                # conn.commit()
                # spo.to_sql('spo_done',localconn(),flavor='mysql',schema='stockdata',if_exists='append',
                #            index=False,chunksize=10000)
            # if ser == 'server' or ser == 'both':
                # conns = serverconn()
                # curs = conns.cursor()
                # results=conns.execute(sql_update_spo, params)
                # results.close()
                # conns.commit()
                # spo.to_sql('spo_done',serverconn(),flavor='mysql',schema='stockdata',if_exists='append',
                #            index=False,chunksize=10000)
        # if ser == 'local' or ser == 'both':
        #     conn.close()
        # if ser == 'server' or ser == 'both':
        #     conns.close()
        print("SPO: Done!")
    except Exception as e:
        print("SPO:",e)
        errorlist.append(e)
    return errorlist
Example #18
0
            result=engine.execute(sql_update_spo,params)
                # result.close()
                # conn.commit()
                # spo.to_sql('spo_done',localconn(),flavor='mysql',schema='stockdata',if_exists='append',
                #            index=False,chunksize=10000)
            # if ser == 'server' or ser == 'both':
                # conns = serverconn()
                # curs = conns.cursor()
                # results=conns.execute(sql_update_spo, params)
                # results.close()
                # conns.commit()
                # spo.to_sql('spo_done',serverconn(),flavor='mysql',schema='stockdata',if_exists='append',
                #            index=False,chunksize=10000)
        # if ser == 'local' or ser == 'both':
        #     conn.close()
        # if ser == 'server' or ser == 'both':
        #     conns.close()
        print("SPO: Done!")
    except Exception as e:
        print("SPO:",e)
        errorlist.append(e)
    return errorlist

""" 
"http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=SR&sty=ZF&p=1&ps=50&st=5"
"""
if __name__ == '__main__':
    errorlist = spo(proxy=0)
    df = pd.DataFrame(errorlist)
    df.to_csv(path()+'/error/update_spo.csv')