Beispiel #1
0
    def exec_mask(self):
        df = self.__result
        # total count
        total = len(df)
        r = None
        for k, mask in self.stat.items():
            self.stat[k] = mask.sum() / total
            if r is None:
                r = mask
            else:
                r = r & mask
            print('{}\t:{}'.format(k, r.sum()))
        df = df[r]

        self.stat['total'] = total
        self.stat['all'] = len(df)
        self.stat['all_ratio'] = len(df) / total

        df_stat = pd.DataFrame(self.stat, index=pd.Series([self.__y]))
        df_stat.insert(0, 'm', self.__m)
        df_stat.insert(0, 'y', self.__y)

        df.to_sql(self.standard_table_name,
                  get_engine(),
                  index=False,
                  if_exists='append')
        df_stat.to_sql(self.standard_start_table_name,
                       get_engine(),
                       index=False,
                       if_exists='append')

        self.stat = df_stat
        self.__result = df
Beispiel #2
0
    def delete(self):
        sqls = """delete from his where TABLE_name in ('balancesheet','income','cashflow','fina_indicator','dividend','one_fina','one_index','all');
truncate table balancesheet;
truncate table income;
truncate table cashflow;
truncate table fina_indicator;
truncate table dividend;
truncate table dividend_stat;""".split('\n')
        for sql in sqls:
            get_engine().execute(sql)
Beispiel #3
0
def loop_codes():
    param = get_calc_param()
    # codes = pd.read_sql_query("select DISTINCT ts_code from i_data where debt_to_assets < 50 ts_code = '600177.SH' ;", get_engine())
    codes = pd.read_sql_query("""select DISTINCT ts_code from i_data   ;""",
                              get_engine())
    mps = []
    for i, row in codes.iterrows():
        t = my_calc(row['ts_code'], param)
        mps.append(t)
    df = pd.DataFrame(mps)
    df.to_sql('i_calc', get_engine(), index=False, if_exists='replace')
Beispiel #4
0
def get_index_distinct_codes(index_code):
    sql = "select DISTINCT(con_code) from index_weight where index_code = '{}'"
    sql = sql.format(index_code)
    df = pd.read_sql_query(sql, get_engine())
    if not len(df):
        print('error index_code:', index_code)
    return df
Beispiel #5
0
def get_nincome_roe_pe_meta(ts_code):
    sql_abc = """select 
	s.ts_code,f.y,f.m,f.end_date,
	i.n_income_attr_p ,
	f.roe,
	m.close ,m.pe,m.pe_ttm,
	d.close as c2 ,d.pe p2 ,d.pe_ttm pt2
from 
	stock_basic s,fina_indicator f, daily_basic_month m ,daily_basic d,income i
where 
	s.ts_code = f.ts_code and f.ts_code = m.ts_code and m.ts_code = d.ts_code and d.ts_code = i.ts_code
	and f.y = m.y  and m.y = i.y 
	and f.m = m.m  and m.m = i.m
	and s.ts_code = '{}'
	and f.y > 2010 
order by f.y ,f.m ;""".format(ts_code)
    all_abc = pd.read_sql_query(sql_abc, get_engine())
    df = all_abc[all_abc.m == 3]
    df['yl'] = df['y']
    df = df.set_index(['y'])
    for i in range(1, 5):
        t = all_abc[all_abc.m == 3 * i]
        t = t.set_index(['y'])
        df['n{}'.format(i * 3)] = t['n_income_attr_p']
        df['r{}'.format(i * 3)] = t['roe']
        df['pe{}'.format(i * 3)] = t['pe']
        df['pe_ttm{}'.format(i * 3)] = t['pe_ttm']

    return df
Beispiel #6
0
def get_analyse_collection(y, m, pe_thresthold, year_start):
    sql = """select
        # l.name,
      l.ts_code,l.list_status,l.list_date,l.delist_date,
      b.y,b.m,
      b.total_share,b.total_liab,b.total_cur_assets,b.total_assets,b.total_ncl,b.total_hldr_eqy_inc_min_int,
      
      b.intan_assets,b.r_and_d,b.goodwill,b.lt_amor_exp,b.defer_tax_assets,b.total_hldr_eqy_exc_min_int ,
      
      f.dt_eps,f.eps,f.current_ratio,f.quick_ratio,f.tangible_asset,
      m.close,m.pe,1/m.pe as ep,m.dv_ratio,m.total_mv,
      d.stk_div,d.cash_div
from
      stock_list l,
      stock_balancesheet b,
      stock_fina_indicator f,
      stock_month_matrix_basic m,
      stock_dividend d
where
      l.ts_code = b.ts_code and l.list_date <= b.end_date and l.list_status = 'L' and 
      b.ts_code = f.ts_code and b.y = f.y and b.m = f.m and 
      b.ts_code = d.ts_code and b.y = d.y and 
      b.ts_code = m.ts_code and b.y = m.y and b.m =m.m and m.pe > 0 and 
            # b.total_hldr_eqy_inc_min_int is not null and 
		    b.y ={} and
			b.m ={} and 
			m.pe <= {}  and
            l.list_date <= '{}-12-31'
					
			;"""
    sql = sql.format(y, m, pe_thresthold, year_start)
    df = pd.read_sql_query(sql, get_engine())
    if not len(df):
        print('error sql:')
    return df
Beispiel #7
0
    def _update_ts(self):
        cal_date = self._get_nearest_cal_date()
        if cal_date is None:
            return
        trade_date = cal_date.strftime('%Y%m%d')
        df = get_pro().daily_basic(ts_code='', trade_date=trade_date)

        dtype = {
            'ts_code': VARCHAR(length=10),
            'trade_date': DATE(),
            'close': FLOAT(),
            'y': INT(),
            'm': INT(),
            'turnover_rate': FLOAT(),
            'turnover_rate_f': FLOAT(),
            'volume_ratio': FLOAT(),
            'pe': FLOAT(),
            'pe_ttm': FLOAT(),
            'pb': FLOAT(),
            'ps': FLOAT(),
            'ps_ttm': FLOAT(),
            'dv_ratio': FLOAT(),
            'dv_ttm': FLOAT(),
            'total_share': FLOAT(),
            'float_share': FLOAT(),
            'free_share': FLOAT(),
            'total_mv': FLOAT(),
            'circ_mv': FLOAT()
        }
        df.to_sql(self.get_table_name(),
                  get_engine(),
                  dtype=dtype,
                  index=False,
                  if_exists='append')
Beispiel #8
0
def init_stock_price_monthly(ts_code, force=None):
    table_name = 'stock_price_monthly'

    if not need_pull_check(ts_code, table_name, force):
        print('need not 2 pull {} -> {}'.format(table_name, ts_code))
        return
    else:
        print('start 2 pull {} -> {} .'.format(table_name, ts_code))

    df = get_pro().monthly(
        ts_code=ts_code,
        fields='ts_code,trade_date,open,high,low,close,vol,amount')
    if not len(df):
        return

    df_add_y_m(df, 'trade_date')
    dtype = {
        'ts_code': VARCHAR(length=10),
        'trade_date': DATE(),
        'y': INT,
        'm': INT,
        'open': DECIMAL(precision=8, scale=2),
        'high': DECIMAL(precision=8, scale=2),
        'low': DECIMAL(precision=8, scale=2),
        'close': DECIMAL(precision=8, scale=2),
        'vol': BIGINT(),
        'amount': BIGINT()
    }
    df.to_sql(table_name,
              get_engine(),
              dtype=dtype,
              index=False,
              if_exists='append')
Beispiel #9
0
def get_trade_date(y, m):
    sql = '''select * from trade_cal where y ={} and m = {};'''
    sql = sql.format(y, m)
    df = pd.read_sql_query(sql, get_engine())
    first = None if len(df) == 0 else df['first'][0]
    last = None if len(df) == 0 else df['last'][0]
    return first, last
Beispiel #10
0
    def _init_ts(self):
        y_start = 1990

        __pool = ThreadPoolExecutor(max_workers=MULTIPLE,
                                    thread_name_prefix="test_")
        fs = []
        i = 0
        for y_i in range(31)[::-1]:
            y = y_start + y_i
            first, last = dao.get_trade_date(y, 0)
            if not first:
                continue
            print("{}-{}".format(y, 0))
            first = first.strftime('%Y%m%d')
            last = last.strftime('%Y%m%d')
            f1 = __pool.submit(get_pro().index_weight,
                               index_code=self.biz_code,
                               start_date=first,
                               end_date=first)
            f2 = __pool.submit(get_pro().index_weight,
                               index_code=self.biz_code,
                               start_date=last,
                               end_date=last)
            fs.append(f1)
            fs.append(f2)
            i += 2
            if i > 197:
                print('198次后休息60秒')
                time.sleep(60)
                i = 0

        df = None
        for f2 in fs:
            temp_df = f2.result()
            if len(temp_df):
                if df is None:
                    df = temp_df
                else:
                    df = df.append(temp_df, ignore_index=True)

        df_add_y_m(df, 'trade_date')

        dtype = {
            'index_code': VARCHAR(length=10),
            'con_code': VARCHAR(length=10),
            'y': INT,
            'm': INT,
            'trade_date': DATE(),
            'weight': DECIMAL(precision=10, scale=6)
        }

        df = df.reindex(
            columns='index_code,con_code,y,m,trade_date,weight'.split(','))

        df.to_sql(self.get_table_name(),
                  get_engine(),
                  dtype=dtype,
                  index=False,
                  if_exists='append')
Beispiel #11
0
def get_list_all(limit=None, sql=None):
    sql = ("select * from stock_list " +
           ('' if limit is None else 'limit {}'.format(limit)) +
           ';') if sql is None else sql
    df = pd.read_sql_query(sql, get_engine())
    if not len(df):
        print('error sql:')
    return df
Beispiel #12
0
def init_month_matrix_basic():
    table_name = 'stock_month_matrix_basic'
    sql = 'select * from trade_date where m != 0 ;'
    yms = pd.read_sql_query(sql, get_engine())

    df = None
    for i, row in yms.iterrows():
        first_trade_date_str = row['first'].strftime('%Y%m%d')
        last_last_date_str = row['last'].strftime('%Y%m%d')
        data = __pro.daily_basic(ts_code='', trade_date=last_last_date_str)
        print(last_last_date_str)
        if df is None:
            df = data
        else:
            df = df.append(data)
    df_add_y_m(df, 'trade_date')
    df.reset_index(drop=True)
    df = df.iloc[::-1]
    dtype = {
        'ts_code': VARCHAR(length=10),
        'trade_date': DATE(),
        'close': FLOAT(),
        'y': INT(),
        'm': INT(),
        'turnover_rate': FLOAT(),
        'turnover_rate_f': FLOAT(),
        'volume_ratio': FLOAT(),
        'pe': FLOAT(),
        'pe_ttm': FLOAT(),
        'pb': FLOAT(),
        'ps': FLOAT(),
        'ps_ttm': FLOAT(),
        'dv_ratio': FLOAT(),
        'dv_ttm': FLOAT(),
        'total_share': FLOAT(),
        'float_share': FLOAT(),
        'free_share': FLOAT(),
        'total_mv': FLOAT(),
        'circ_mv': FLOAT()
    }
    df.to_sql(table_name,
              get_engine(),
              dtype=dtype,
              index=False,
              if_exists='append')
Beispiel #13
0
    def load(self):
        # load market
        load_market = f"""select ts_code,y,m,close from daily_basic_month 
                            where ts_code = '{self.ts_code}' and {self.end} >= y and y>= {self.start}  
                                    and m = 6 order by y ,m;"""
        load_premium = f"""select * from dividend_stat where ts_code = '{self.ts_code}' and {self.end} >= y and y>= {self.start}  """

        market_df = pd.read_sql_query(load_market, get_engine())
        premium_df = pd.read_sql_query(load_premium, get_engine())

        if len(market_df) < (self.end - self.start + 1):
            raise LookupError('Not A Good Data Sample.')

        premium_dict = {}
        for i, row in premium_df.iterrows():
            premium_dict[row['y']] = row
        self._market = market_df
        self._premium = premium_dict
Beispiel #14
0
 def _delete_ts(self):
     if self.operate == CTL_OPERATE_TRUNCATE:
         sql = 'drop table if exists {}'.format(self.get_table_name())
     elif self.operate == CTL_OPERATE_APPEND:
         sql = "delete from {} where {} = '{}'".format(
             self.get_table_name(), self.biz_col_name, self.biz_code)
     get_engine().execute(sql)
     logging.info('==delete_ts ' + sql)
     if self.calc_table:
         if self.operate == CTL_OPERATE_TRUNCATE:
             sql = 'drop table if exists {}'.format(self.get_table_name() +
                                                    self.calc_table)
         elif self.operate == CTL_OPERATE_APPEND:
             sql = "delete from {} where {} = '{}'".format(
                 self.get_table_name() + self.calc_table, self.biz_col_name,
                 self.biz_code)
         get_engine().execute(sql)
         logging.info('==delete_ts ' + sql)
Beispiel #15
0
def get_fina(ts_code, start, end, m):
    if ts_code is str:
        sql = """select * from stock_fina_indicator where ts_code = '{}' and y between {} and {} and m = {} order by y desc"""
        sql = sql.format(ts_code, start, end, m)
    else:
        sql = """select ts_code,end_date,y,m,eps,dt_eps from stock_fina_indicator where ts_code in ({}) and y between {} and {} and m = {}  order by y desc"""
        sql = sql.format(','.join(["'%s'" % item for item in ts_code]), start,
                         end, m)
    df = pd.read_sql_query(sql, get_engine())
    return df
Beispiel #16
0
    def _init_ts(self):
        pass
        if self.biz_code == 'tangchao':
            con_codes = '600519.SH,002304.SZ,002415.SZ,002027.SZ,000596.SZ'.split(
                ',')
        elif self.biz_code == 'manxl':
            con_codes = '600519.SH,002304.SZ,002415.SZ,002027.SZ,000596.SZ'.split(
                ',')

        sql_template = "INSERT INTO index_weight VALUES ('{}', '{}', {}, {}, '{}-{}-31', 0 );"

        sqls = []
        for con_code in con_codes:
            sqls.append(
                sql_template.format(self.biz_code, con_code, 2020, 12, 2020,
                                    12))

        for sql in sqls:
            get_engine().execute(sql)
Beispiel #17
0
    def _get_con_codes(self):
        sql = """select * from index_weight where index_code = '{}'
                and trade_date = (select max(trade_date) from  index_weight where index_code = '{}')""".format(
            self.biz_code,
            self.biz_code,
        )

        df = pd.read_sql_query(sql, get_engine())
        if len(df) == 0:
            raise Exception('index not initialed.')
        return df['con_code']
Beispiel #18
0
def need_pull_check(code, table_name, force=None, condition_column='ts_code'):
    if force is None:
        sql = "select count(*) from {} where {} = '{}';".format(
            table_name, condition_column, code)
        # con = get_engine().connect()
        try:
            # size = get_engine().execute(sql).fetchone()[0]
            df = pd.read_sql_query(sql, get_engine())
            size = df.iloc[0, 0]
        except Exception as e:
            if 'f405' == e.code:
                return True
            else:
                print(e)
                exit(4)
        # finally:
        #     con.close()

        return False if size > 0 else True
    else:
        if 'delete' == force:
            sql = "delete from {} where {} = '{}';".format(
                table_name, condition_column, code)
        elif 'drop' == force:
            sql = "drop table {};".format(table_name)
        else:
            print('need_pull_check {} force flag error:{}'.format(
                table_name, force))
            exit(4)

        try:
            r = pd.read_sql_query(sql, get_engine()).iloc[0, 0]
            print('force clean {} rows 4 {}'.format(r.rowcount, code))
        except Exception as e:
            if 'f405' == e.code or 'e3q8' == e.code:
                return True
            else:
                print(e)
                exit(4)

        return True
Beispiel #19
0
    def _get_nearest_cal_date(self):

        if time.localtime().tm_hour < 19:
            di = 1
        else:
            di = 0
        sql = f'select cal_date from trade_cal_detail  where cal_date  <= curdate() - {di} order by cal_date desc limit 1;'
        df = pd.read_sql_query(sql, get_engine())
        cal_date = df.iloc[0]['cal_date']
        if self.his is not None and cal_date <= self.his.end_date:
            return None
        return cal_date
Beispiel #20
0
def get_standard_stat(y, m):
    try:
        sql = 'select * from standard_stat where y = {} and m = {};'
        sql = sql.format(y, m)
        df = pd.read_sql_query(sql, get_engine())
    except Exception as e:
        if 'f405' == e.code:
            return None
        else:
            print(e)
            return None
    return df
Beispiel #21
0
def get_balancesheet_df(ts_code, flag=None, y=None):
    if flag == 'assert':
        if not y:
            sql = balancesheet_get_stock_info_his_by_ts_code.format(**locals())
        else:
            sql = balancesheet_get_stock_industry_info.format(**locals())
    elif flag == 'liab':
        if not y:
            sql = balancesheet_get_stock_info_his_by_ts_code_2.format(
                **locals())
        else:
            sql = balancesheet_get_stock_industry_info_2.format(**locals())
    else:
        raise AttributeError('flag neither assert nor liab.')

    df = pd.read_sql_query(sql, get_engine())

    if 'y' in df.columns:
        df = df.set_index(['y'])
    elif 'ts_code' in df.columns:
        print('mark')
        df.loc[df[df['ts_code'] == ts_code].index[0],
               ['ts_code']] = ts_code + '(*)'
        df = df.set_index(['ts_code'])
    else:
        raise AttributeError('Nether y nor ts_code must in dataframe!')

    column_list = df.columns.values.tolist()
    last_col = column_list[len(column_list) - 1]

    for col in df.columns.values.tolist():
        # delete nan
        df[col] = df[col].apply(lambda x: 0 if x != x or x is None else x)
        # calc last_col
        if last_col != col:
            df[last_col] = df[last_col] - df[col]

    sm = df.sum(axis=1)
    flag1 = sm != 0
    flag2 = df[df.columns[len(df.columns) - 1]] != 0
    sm = sm[flag1 & flag2]
    df = df[flag1 & flag2]

    if len(df) == 0:
        return None

    for col in df.columns.values.tolist():
        df[col] = round(df[col] * 100 / sm, 2)

    return df
Beispiel #22
0
def init_table_indexes():
    sql = "SELECT TABLE_NAME FROM information_schema.TABLES where table_schema = '{}' and table_type = 'BASE TABLE';"
    sql = sql.format(SCHEMA)
    df = pd.read_sql_query(sql, get_engine())
    target_list = set([
        'end_date', 'index_code', 'con_code', 'ts_code', 'trade_date', 'y',
        'm', 't', 'list_status', 'list_date', 'delist_date', 'total_share',
        'total_liab', 'total_cur_assets', 'total_assets', 'dt_eps',
        'current_ratio', 'quick_ratio', 'close', 'stk_div', 'cash_div'
    ])

    sql_tpl_create_index = 'create index idx_{}_{} on {} ({});'
    e = get_engine()
    for table_name in df.iloc[:, 0]:
        sql_index = 'show index from {};'.format(table_name)
        sql_columns = 'show columns from {};'.format(table_name)
        df_index = pd.read_sql_query(sql_index, get_engine())
        df_columns = pd.read_sql_query(sql_columns, get_engine())
        df_index = set(df_index['Column_name'].values.tolist())
        df_columns = set(df_columns['Field'].values.tolist())
        target_columns = target_list & (df_columns - df_index)

        for column in target_columns:
            sql = sql_tpl_create_index.format(table_name, column, table_name,
                                              column)
            print(sql)
            e.execute(sql)

    print('-' * 32, 'add index')
    df = df[df.TABLE_NAME != 'trade_date_detail']
    df = df[df.TABLE_NAME != 'stock_dividend_detail']

    key_list = ['index_code', 'con_code', 'ts_code', 'cal_date', 'y', 'm']
    prim_keys = set(key_list)
    sql_prim = "select * from information_schema.TABLE_CONSTRAINTS  where CONSTRAINT_SCHEMA = '{}' and table_name = '{}';"
    sql_add_prim = 'alter table {}.{} add primary key({});'
    for table_name in df.iloc[:, 0]:
        sql = sql_prim.format(SCHEMA, table_name)
        prim = pd.read_sql_query(sql, get_engine())
        if len(prim) > 0:
            continue
        sql_columns = 'show columns from {};'.format(table_name)
        df_columns = pd.read_sql_query(sql_columns, get_engine())
        df_columns = set(df_columns['Field'].values.tolist())

        p = prim_keys & df_columns
        key_temp = key_list[:]
        for key in key_list:
            if not key in p:
                key_temp.remove(key)

        if key_temp:
            sql = sql_add_prim.format(SCHEMA, table_name, ','.join(key_temp))
            print(sql)
            # print(table_name, key_temp)
            e.execute(sql)
Beispiel #23
0
def init_fund_nav(ts_code, force=None):
    table_name = 'fund_nav'

    if not need_pull_check(ts_code, table_name, force):
        print('need not 2 pull {} -> {}'.format(table_name, ts_code))
        return
    else:
        print('start 2 pull {} -> {} .'.format(table_name, ts_code))

    df = get_pro().fund_nav(ts_code=ts_code)
    if not len(df):
        return

    df_add_y_m(df, 'end_date')

    df.to_sql(table_name, get_engine(), index=False, if_exists='append')
Beispiel #24
0
def get_pe_low(y, m, percent):
    sql = """select *from (
		select 
				@row_num:=@row_num+1 as row_num ,
        a.*
        
    from 
			(select CASE when pe is null then 9999 else pe end as pea  from {}.stock_month_matrix_basic where y = {} and m ={} order by pea asc) a,
			(select @row_num:=0) b  
    order by 
        pea
) base
where 
    base.row_num <= (@row_num*{})
		order by pea desc limit 1"""
    sql = sql.format(SCHEMA, y, m, percent)
    df = pd.read_sql_query(sql, get_engine())
    return df.iloc[0, 1]
Beispiel #25
0
    def _init_ts(self):
        print('start init list...')
        fileds = 'ts_code,symbol,name,area,industry,fullname,market,exchange,curr_type,list_status,list_date,delist_date,is_hs'
        d_l = get_pro().query(self.interface,
                              exchange='',
                              list_status='L',
                              fields=fileds)
        print('L', len(d_l))
        d_d = get_pro().query(self.interface,
                              exchange='',
                              list_status='D',
                              fields=fileds)
        print('D', len(d_d))
        d_p = get_pro().query(self.interface,
                              exchange='',
                              list_status='P',
                              fields=fileds)
        print('P', len(d_p))
        df = d_l.append(d_d).append(d_p)
        print('all size:', len(df))

        dtype = {
            'ts_code': VARCHAR(length=10),
            'symbol': VARCHAR(length=8),
            'name': VARCHAR(length=20),
            'area': VARCHAR(length=10),
            'industry': VARCHAR(length=32),
            'fullname': VARCHAR(length=32),
            'market': VARCHAR(length=10),
            'exchange': VARCHAR(length=10),
            'curr_type': VARCHAR(length=5),
            'list_status': VARCHAR(length=1),
            'list_date': DATE(),
            'delist_date': DATE(),
            'is_hs': VARCHAR(length=1)
        }

        df.to_sql(self.get_table_name(),
                  get_engine(),
                  dtype=dtype,
                  index=False,
                  if_exists='replace')
Beispiel #26
0
def kkk():
    sql = """
    select 
        b.y,b.total_hldr_eqy_inc_min_int as e,
        i.n_income as r,
        i.n_income /total_hldr_eqy_exc_min_int as roe,
        d.cash_div_tax*d.base_share as s
    from 
        balancesheet b,income i , fina_indicator f,dividend d
    where 1=1
        and b.ts_code = i.ts_code and i.ts_code = f.ts_code and f.ts_code = d.ts_code
        and b.y = i.y and i.y = f.y and f.y	= d.y
        and b.m = i.m and i.m = f.m
        and b.ts_code = '{}' and b.y between {} and {} and b.m = 12 
        order by b.y asc;
    
    """.format(TEST_TS_CODE_GSYH, 2006, 2010)

    df = pd.read_sql_query(sql, get_engine())
    return df
Beispiel #27
0
def init_stock_list_all():
    print('start init list...')
    fileds = 'ts_code,symbol,name,area,industry,fullname,market,exchange,curr_type,list_status,list_date,delist_date,is_hs'
    d_l = __pro.stock_basic(exchange='', list_status='L', fields=fileds)
    print('L', len(d_l))
    d_d = __pro.stock_basic(exchange='', list_status='D', fields=fileds)
    print('D', len(d_d))
    d_p = __pro.stock_basic(exchange='', list_status='P', fields=fileds)
    print('P', len(d_p))
    df = d_l.append(d_d).append(d_p)
    print('all size:', len(df))

    dtype = {
        'ts_code': VARCHAR(length=10),
        'symbol': VARCHAR(length=8),
        'name': VARCHAR(length=20),
        'area': VARCHAR(length=10),
        'industry': VARCHAR(length=32),
        'fullname': VARCHAR(length=32),
        'market': VARCHAR(length=10),
        'exchange': VARCHAR(length=10),
        'curr_type': VARCHAR(length=5),
        'list_status': VARCHAR(length=1),
        'list_date': DATE(),
        'delist_date': DATE(),
        'is_hs': VARCHAR(length=1)
    }

    # df.reset_index(drop=True)
    # df = df.reindex(columns='ts_code,end_date,ex_date,div_proc,stk_div,cash_div'.split(','))
    df.to_sql('stock_list',
              get_engine(),
              dtype=dtype,
              index=False,
              if_exists='replace')

    print('finished init list.')
Beispiel #28
0
def init_fina_indicator(ts_code, force=None):
    table_name = 'stock_fina_indicator'

    if not need_pull_check(ts_code, table_name, force):
        print('need not 2 pull {} -> {}'.format(table_name, ts_code))
        return
    else:
        print('start 2 pull {} -> {} .'.format(table_name, ts_code))

    dtype = {
        'ts_code': VARCHAR(length=10),
        'ann_date': DATE(),
        'end_date': DATE(),
        'y': INT(),
        'm': INT(),
        'eps': FLOAT(),
        'dt_eps': FLOAT(),
        'total_revenue_ps': FLOAT(),
        'revenue_ps': FLOAT(),
        'capital_rese_ps': FLOAT(),
        'surplus_rese_ps': FLOAT(),
        'undist_profit_ps': FLOAT(),
        'extra_item': FLOAT(),
        'profit_dedt': FLOAT(),
        'gross_margin': FLOAT(),
        'current_ratio': FLOAT(),
        'quick_ratio': FLOAT(),
        'cash_ratio': FLOAT(),
        'invturn_days': FLOAT(),
        'arturn_days': FLOAT(),
        'inv_turn': FLOAT(),
        'ar_turn': FLOAT(),
        'ca_turn': FLOAT(),
        'fa_turn': FLOAT(),
        'assets_turn': FLOAT(),
        'op_income': FLOAT(),
        'valuechange_income': FLOAT(),
        'interst_income': FLOAT(),
        'daa': FLOAT(),
        'ebit': FLOAT(),
        'ebitda': FLOAT(),
        'fcff': FLOAT(),
        'fcfe': FLOAT(),
        'current_exint': FLOAT(),
        'noncurrent_exint': FLOAT(),
        'interestdebt': FLOAT(),
        'netdebt': FLOAT(),
        'tangible_asset': FLOAT(),
        'working_capital': FLOAT(),
        'networking_capital': FLOAT(),
        'invest_capital': FLOAT(),
        'retained_earnings': FLOAT(),
        'diluted2_eps': FLOAT(),
        'bps': FLOAT(),
        'ocfps': FLOAT(),
        'retainedps': FLOAT(),
        'cfps': FLOAT(),
        'ebit_ps': FLOAT(),
        'fcff_ps': FLOAT(),
        'fcfe_ps': FLOAT(),
        'netprofit_margin': FLOAT(),
        'grossprofit_margin': FLOAT(),
        'cogs_of_sales': FLOAT(),
        'expense_of_sales': FLOAT(),
        'profit_to_gr': FLOAT(),
        'saleexp_to_gr': FLOAT(),
        'adminexp_of_gr': FLOAT(),
        'finaexp_of_gr': FLOAT(),
        'impai_ttm': FLOAT(),
        'gc_of_gr': FLOAT(),
        'op_of_gr': FLOAT(),
        'ebit_of_gr': FLOAT(),
        'roe': FLOAT(),
        'roe_waa': FLOAT(),
        'roe_dt': FLOAT(),
        'roa': FLOAT(),
        'npta': FLOAT(),
        'roic': FLOAT(),
        'roe_yearly': FLOAT(),
        'roa2_yearly': FLOAT(),
        'roe_avg': FLOAT(),
        'opincome_of_ebt': FLOAT(),
        'investincome_of_ebt': FLOAT(),
        'n_op_profit_of_ebt': FLOAT(),
        'tax_to_ebt': FLOAT(),
        'dtprofit_to_profit': FLOAT(),
        'salescash_to_or': FLOAT(),
        'ocf_to_or': FLOAT(),
        'ocf_to_opincome': FLOAT(),
        'capitalized_to_da': FLOAT(),
        'debt_to_assets': FLOAT(),
        'assets_to_eqt': FLOAT(),
        'dp_assets_to_eqt': FLOAT(),
        'ca_to_assets': FLOAT(),
        'nca_to_assets': FLOAT(),
        'tbassets_to_totalassets': FLOAT(),
        'int_to_talcap': FLOAT(),
        'eqt_to_talcapital': FLOAT(),
        'currentdebt_to_debt': FLOAT(),
        'longdeb_to_debt': FLOAT(),
        'ocf_to_shortdebt': FLOAT(),
        'debt_to_eqt': FLOAT(),
        'eqt_to_debt': FLOAT(),
        'eqt_to_interestdebt': FLOAT(),
        'tangibleasset_to_debt': FLOAT(),
        'tangasset_to_intdebt': FLOAT(),
        'tangibleasset_to_netdebt': FLOAT(),
        'ocf_to_debt': FLOAT(),
        'ocf_to_interestdebt': FLOAT(),
        'ocf_to_netdebt': FLOAT(),
        'ebit_to_interest': FLOAT(),
        'longdebt_to_workingcapital': FLOAT(),
        'ebitda_to_debt': FLOAT(),
        'turn_days': FLOAT(),
        'roa_yearly': FLOAT(),
        'roa_dp': FLOAT(),
        'fixed_assets': FLOAT(),
        'profit_prefin_exp': FLOAT(),
        'non_op_profit': FLOAT(),
        'op_to_ebt': FLOAT(),
        'nop_to_ebt': FLOAT(),
        'ocf_to_profit': FLOAT(),
        'cash_to_liqdebt': FLOAT(),
        'cash_to_liqdebt_withinterest': FLOAT(),
        'op_to_liqdebt': FLOAT(),
        'op_to_debt': FLOAT(),
        'roic_yearly': FLOAT(),
        'total_fa_trun': FLOAT(),
        'profit_to_op': FLOAT(),
        'q_opincome': FLOAT(),
        'q_investincome': FLOAT(),
        'q_dtprofit': FLOAT(),
        'q_eps': FLOAT(),
        'q_netprofit_margin': FLOAT(),
        'q_gsprofit_margin': FLOAT(),
        'q_exp_to_sales': FLOAT(),
        'q_profit_to_gr': FLOAT(),
        'q_saleexp_to_gr': FLOAT(),
        'q_adminexp_to_gr': FLOAT(),
        'q_finaexp_to_gr': FLOAT(),
        'q_impair_to_gr_ttm': FLOAT(),
        'q_gc_to_gr': FLOAT(),
        'q_op_to_gr': FLOAT(),
        'q_roe': FLOAT(),
        'q_dt_roe': FLOAT(),
        'q_npta': FLOAT(),
        'q_opincome_to_ebt': FLOAT(),
        'q_investincome_to_ebt': FLOAT(),
        'q_dtprofit_to_profit': FLOAT(),
        'q_salescash_to_or': FLOAT(),
        'q_ocf_to_sales': FLOAT(),
        'q_ocf_to_or': FLOAT(),
        'basic_eps_yoy': FLOAT(),
        'dt_eps_yoy': FLOAT(),
        'cfps_yoy': FLOAT(),
        'op_yoy': FLOAT(),
        'ebt_yoy': FLOAT(),
        'netprofit_yoy': FLOAT(),
        'dt_netprofit_yoy': FLOAT(),
        'ocf_yoy': FLOAT(),
        'roe_yoy': FLOAT(),
        'bps_yoy': FLOAT(),
        'assets_yoy': FLOAT(),
        'eqt_yoy': FLOAT(),
        'tr_yoy': FLOAT(),
        'or_yoy': FLOAT(),
        'q_gr_yoy': FLOAT(),
        'q_gr_qoq': FLOAT(),
        'q_sales_yoy': FLOAT(),
        'q_sales_qoq': FLOAT(),
        'q_op_yoy': FLOAT(),
        'q_op_qoq': FLOAT(),
        'q_profit_yoy': FLOAT(),
        'q_profit_qoq': FLOAT(),
        'q_netprofit_yoy': FLOAT(),
        'q_netprofit_qoq': FLOAT(),
        'equity_yoy': FLOAT(),
        'rd_exp': FLOAT(),
        'update_flag': VARCHAR(length=1)
    }
    columns = 'ts_code,ann_date,end_date,eps,dt_eps,total_revenue_ps,revenue_ps,capital_rese_ps,surplus_rese_ps,undist_profit_ps,extra_item,profit_dedt,gross_margin,current_ratio,quick_ratio,cash_ratio,invturn_days,arturn_days,inv_turn,ar_turn,ca_turn,fa_turn,assets_turn,op_income,valuechange_income,interst_income,daa,ebit,ebitda,fcff,fcfe,current_exint,noncurrent_exint,interestdebt,netdebt,tangible_asset,working_capital,networking_capital,invest_capital,retained_earnings,diluted2_eps,bps,ocfps,retainedps,cfps,ebit_ps,fcff_ps,fcfe_ps,netprofit_margin,grossprofit_margin,cogs_of_sales,expense_of_sales,profit_to_gr,saleexp_to_gr,adminexp_of_gr,finaexp_of_gr,impai_ttm,gc_of_gr,op_of_gr,ebit_of_gr,roe,roe_waa,roe_dt,roa,npta,roic,roe_yearly,roa2_yearly,roe_avg,opincome_of_ebt,investincome_of_ebt,n_op_profit_of_ebt,tax_to_ebt,dtprofit_to_profit,salescash_to_or,ocf_to_or,ocf_to_opincome,capitalized_to_da,debt_to_assets,assets_to_eqt,dp_assets_to_eqt,ca_to_assets,nca_to_assets,tbassets_to_totalassets,int_to_talcap,eqt_to_talcapital,currentdebt_to_debt,longdeb_to_debt,ocf_to_shortdebt,debt_to_eqt,eqt_to_debt,eqt_to_interestdebt,tangibleasset_to_debt,tangasset_to_intdebt,tangibleasset_to_netdebt,ocf_to_debt,ocf_to_interestdebt,ocf_to_netdebt,ebit_to_interest,longdebt_to_workingcapital,ebitda_to_debt,turn_days,roa_yearly,roa_dp,fixed_assets,profit_prefin_exp,non_op_profit,op_to_ebt,nop_to_ebt,ocf_to_profit,cash_to_liqdebt,cash_to_liqdebt_withinterest,op_to_liqdebt,op_to_debt,roic_yearly,total_fa_trun,profit_to_op,q_opincome,q_investincome,q_dtprofit,q_eps,q_netprofit_margin,q_gsprofit_margin,q_exp_to_sales,q_profit_to_gr,q_saleexp_to_gr,q_adminexp_to_gr,q_finaexp_to_gr,q_impair_to_gr_ttm,q_gc_to_gr,q_op_to_gr,q_roe,q_dt_roe,q_npta,q_opincome_to_ebt,q_investincome_to_ebt,q_dtprofit_to_profit,q_salescash_to_or,q_ocf_to_sales,q_ocf_to_or,basic_eps_yoy,dt_eps_yoy,cfps_yoy,op_yoy,ebt_yoy,netprofit_yoy,dt_netprofit_yoy,ocf_yoy,roe_yoy,bps_yoy,assets_yoy,eqt_yoy,tr_yoy,or_yoy,q_gr_yoy,q_gr_qoq,q_sales_yoy,q_sales_qoq,q_op_yoy,q_op_qoq,q_profit_yoy,q_profit_qoq,q_netprofit_yoy,q_netprofit_qoq,equity_yoy,rd_exp,update_flag'
    df = __pro.fina_indicator(ts_code=ts_code,
                              start_date='19901201',
                              end_date='20210101',
                              columns=columns)

    if len(df) == 0:
        print('=' * 32, 'code:{}'.format(ts_code))
        print('error exit middle')
        exit(4)

    # clean
    # df = df.drop_duplicates(["end_date"], keep="first")
    df = drop_more_nan_row(df, 'end_date')

    df_add_y_m(df, 'end_date')

    df.reset_index(drop=True)

    df.to_sql(table_name,
              get_engine(),
              dtype=dtype,
              index=False,
              if_exists='append')
Beispiel #29
0
def init_cashflow(ts_code, force=None):
    table_name = 'stock_cashflow'

    if not need_pull_check(ts_code, table_name, force):
        print('need not 2 pull {} -> {}'.format(table_name, ts_code))
        return
    else:
        print('start 2 pull {} -> {} .'.format(table_name, ts_code))

    dtype = {
        'ts_code': VARCHAR(length=10),
        'ann_date': DATE(),
        'f_ann_date': DATE(),
        'y': INT(),
        'm': INT(),
        'end_date': DATE(),
        'comp_type': VARCHAR(length=1),
        'report_type': VARCHAR(length=1),
        'net_profit': BIGINT(),
        'finan_exp': BIGINT(),
        'c_fr_sale_sg': BIGINT(),
        'recp_tax_rends': BIGINT(),
        'n_depos_incr_fi': BIGINT(),
        'n_incr_loans_cb': BIGINT(),
        'n_inc_borr_oth_fi': BIGINT(),
        'prem_fr_orig_contr': BIGINT(),
        'n_incr_insured_dep': BIGINT(),
        'n_reinsur_prem': BIGINT(),
        'n_incr_disp_tfa': BIGINT(),
        'ifc_cash_incr': BIGINT(),
        'n_incr_disp_faas': BIGINT(),
        'n_incr_loans_oth_bank': BIGINT(),
        'n_cap_incr_repur': BIGINT(),
        'c_fr_oth_operate_a': BIGINT(),
        'c_inf_fr_operate_a': BIGINT(),
        'c_paid_goods_s': BIGINT(),
        'c_paid_to_for_empl': BIGINT(),
        'c_paid_for_taxes': BIGINT(),
        'n_incr_clt_loan_adv': BIGINT(),
        'n_incr_dep_cbob': BIGINT(),
        'c_pay_claims_orig_inco': BIGINT(),
        'pay_handling_chrg': BIGINT(),
        'pay_comm_insur_plcy': BIGINT(),
        'oth_cash_pay_oper_act': BIGINT(),
        'st_cash_out_act': BIGINT(),
        'n_cashflow_act': BIGINT(),
        'oth_recp_ral_inv_act': BIGINT(),
        'c_disp_withdrwl_invest': BIGINT(),
        'c_recp_return_invest': BIGINT(),
        'n_recp_disp_fiolta': BIGINT(),
        'n_recp_disp_sobu': BIGINT(),
        'stot_inflows_inv_act': BIGINT(),
        'c_pay_acq_const_fiolta': BIGINT(),
        'c_paid_invest': BIGINT(),
        'n_disp_subs_oth_biz': BIGINT(),
        'oth_pay_ral_inv_act': BIGINT(),
        'n_incr_pledge_loan': BIGINT(),
        'stot_out_inv_act': BIGINT(),
        'n_cashflow_inv_act': BIGINT(),
        'c_recp_borrow': BIGINT(),
        'proc_issue_bonds': BIGINT(),
        'oth_cash_recp_ral_fnc_act': BIGINT(),
        'stot_cash_in_fnc_act': BIGINT(),
        'free_cashflow': BIGINT(),
        'c_prepay_amt_borr': BIGINT(),
        'c_pay_dist_dpcp_int_exp': BIGINT(),
        'incl_dvd_profit_paid_sc_ms': BIGINT(),
        'oth_cashpay_ral_fnc_act': BIGINT(),
        'stot_cashout_fnc_act': BIGINT(),
        'n_cash_flows_fnc_act': BIGINT(),
        'eff_fx_flu_cash': BIGINT(),
        'n_incr_cash_cash_equ': BIGINT(),
        'c_cash_equ_beg_period': BIGINT(),
        'c_cash_equ_end_period': BIGINT(),
        'c_recp_cap_contrib': BIGINT(),
        'incl_cash_rec_saims': BIGINT(),
        'uncon_invest_loss': BIGINT(),
        'prov_depr_assets': BIGINT(),
        'depr_fa_coga_dpba': BIGINT(),
        'amort_intang_assets': BIGINT(),
        'lt_amort_deferred_exp': BIGINT(),
        'decr_deferred_exp': BIGINT(),
        'incr_acc_exp': BIGINT(),
        'loss_disp_fiolta': BIGINT(),
        'loss_scr_fa': BIGINT(),
        'loss_fv_chg': BIGINT(),
        'invest_loss': BIGINT(),
        'decr_def_inc_tax_assets': BIGINT(),
        'incr_def_inc_tax_liab': BIGINT(),
        'decr_inventories': BIGINT(),
        'decr_oper_payable': BIGINT(),
        'incr_oper_payable': BIGINT(),
        'others': BIGINT(),
        'im_net_cashflow_oper_act': BIGINT(),
        'conv_debt_into_cap': BIGINT(),
        'conv_copbonds_due_within_1y': BIGINT(),
        'fa_fnc_leases': BIGINT(),
        'end_bal_cash': BIGINT(),
        'beg_bal_cash': BIGINT(),
        'end_bal_cash_equ': BIGINT(),
        'beg_bal_cash_equ': BIGINT(),
        'im_n_incr_cash_equ': BIGINT()
    }

    df = __pro.cashflow(ts_code=ts_code,
                        start_date='19901201',
                        end_date='20210101')

    # clean
    # df = df.drop_duplicates(["end_date"], keep="first")
    df = drop_more_nan_row(df, 'end_date')

    df_add_y_m(df, 'end_date')

    df.reset_index(drop=True)

    df.to_sql(table_name,
              get_engine(),
              dtype=dtype,
              index=False,
              if_exists='append')
Beispiel #30
0
def init_income(ts_code, force=None):
    table_name = 'stock_income'

    if not need_pull_check(ts_code, table_name, force):
        print('need not 2 pull {} -> {}'.format(table_name, ts_code))
        return
    else:
        print('start 2 pull {} -> {} '.format(table_name, ts_code))

    dtype = {
        'ts_code': VARCHAR(length=10),
        'ann_date': DATE(),
        'f_ann_date': DATE(),
        'y': INT(),
        'm': INT(),
        'end_date': DATE(),
        'report_type': VARCHAR(length=1),
        'comp_type': VARCHAR(length=1),
        'basic_eps': FLOAT(),
        'diluted_eps': FLOAT(),
        'total_revenue': FLOAT(),
        'revenue': FLOAT(),
        'int_income': FLOAT(),
        'prem_earned': FLOAT(),
        'comm_income': FLOAT(),
        'n_commis_income': FLOAT(),
        'n_oth_income': FLOAT(),
        'n_oth_b_income': FLOAT(),
        'prem_income': FLOAT(),
        'out_prem': FLOAT(),
        'une_prem_reser': FLOAT(),
        'reins_income': FLOAT(),
        'n_sec_tb_income': FLOAT(),
        'n_sec_uw_income': FLOAT(),
        'n_asset_mg_income': FLOAT(),
        'oth_b_income': FLOAT(),
        'fv_value_chg_gain': FLOAT(),
        'invest_income': FLOAT(),
        'ass_invest_income': FLOAT(),
        'forex_gain': FLOAT(),
        'total_cogs': FLOAT(),
        'oper_cost': FLOAT(),
        'int_exp': FLOAT(),
        'comm_exp': FLOAT(),
        'biz_tax_surchg': FLOAT(),
        'sell_exp': FLOAT(),
        'admin_exp': FLOAT(),
        'fin_exp': FLOAT(),
        'assets_impair_loss': FLOAT(),
        'prem_refund': FLOAT(),
        'compens_payout': FLOAT(),
        'reser_insur_liab': FLOAT(),
        'div_payt': FLOAT(),
        'reins_exp': FLOAT(),
        'oper_exp': FLOAT(),
        'compens_payout_refu': FLOAT(),
        'insur_reser_refu': FLOAT(),
        'reins_cost_refund': FLOAT(),
        'other_bus_cost': FLOAT(),
        'operate_profit': FLOAT(),
        'non_oper_income': FLOAT(),
        'non_oper_exp': FLOAT(),
        'nca_disploss': FLOAT(),
        'total_profit': FLOAT(),
        'income_tax': FLOAT(),
        'n_income': FLOAT(),
        'n_income_attr_p': FLOAT(),
        'minority_gain': FLOAT(),
        'oth_compr_income': FLOAT(),
        't_compr_income': FLOAT(),
        'compr_inc_attr_p': FLOAT(),
        'compr_inc_attr_m_s': FLOAT(),
        'ebit': FLOAT(),
        'ebitda': FLOAT(),
        'insurance_exp': FLOAT(),
        'undist_profit': FLOAT(),
        'distable_profit': FLOAT(),
        'update_flag': VARCHAR(length=1)
    }

    df = __pro.income(ts_code=ts_code,
                      start_date='19901201',
                      end_date='20210101')
    # clean
    df = drop_more_nan_row(df, 'end_date')
    # format
    df_add_y_m(df, 'end_date')
    #
    df.reset_index(drop=True)

    df.to_sql(table_name,
              get_engine(),
              dtype=dtype,
              index=False,
              if_exists='append')