Python combine_dataFrame 예제들, JohhnsonUtil.commonTips.combine_dataFrame Python 예제들

예제 #1

0

파일 보기

파일: sina_Market-DurationCXUP.py 프로젝트: fswzb/pyQuant-2

                else:
                    if 'couts' in top_now.columns.values:
                        if not 'couts' in top_all.columns.values:
                            top_all['couts'] = 0
                            top_all['prev_p'] = 0
                    # for symbol in top_now.index:
                    #     if 'couts' in top_now.columns.values:
                    #         top_all.loc[symbol, ct.columns_now] = top_now.loc[symbol, ct.columns_now]
                    #     else:
                    #         # top_now.loc[symbol, 'dff'] = round(
                    #         # ((float(top_now.loc[symbol, 'buy']) - float(
                    #         # top_all.loc[symbol, 'lastp'])) / float(top_all.loc[symbol, 'lastp']) * 100),
                    #         # 1)
                    #         top_all.loc[symbol, ct.columns_now] = top_now.loc[symbol, ct.columns_now]
                    top_all = cct.combine_dataFrame(top_all, top_now, col=None)

                top_dif = top_all.copy()
                log.debug('top_dif:%s' % (len(top_dif)))
                if 'trade' in top_dif.columns:
                    top_dif['buy'] = (map(lambda x, y: y if int(x) == 0 else x,
                                          top_dif['buy'].values,
                                          top_dif['trade'].values))

                # 判断主升
                # log.debug('top_dif:%s'%(len(top_dif)))
                if ct.checkfilter and cct.get_now_time_int(
                ) > 915 and cct.get_now_time_int() < ct.checkfilter_end_timeDu:
                    top_dif = top_dif[top_dif.low > top_dif.llow *
                                      ct.changeRatio]
                    # top_dif = top_dif[top_dif.buy >= top_dif.lhigh * ct.changeRatio]

예제 #2

0

파일 보기

                    #         if status_change:
                    #             # top_all.loc[symbol] = top_now.loc[symbol]
                    #             top_all.loc[symbol, ['name', 'percent', 'dff', 'couts', 'trade', 'high', 'open', 'low', 'ratio', 'volume',
                    #                                  'prev_p']] = top_now.loc[symbol, ['name', 'percent', 'dff', 'couts', 'trade', 'high', 'open', 'low', 'ratio', 'volume',
                    #                                                                    'prev_p']]
                    #         else:
                    #             top_all.loc[symbol, ['percent', 'dff']] = top_now.loc[
                    #                 symbol, ['percent', 'dff']]
                    #             # top_all.loc[symbol, 'trade':] = top_now.loc[symbol, 'trade':]
                    #             top_all.loc[symbol, ['trade', 'high', 'open', 'low', 'ratio', 'volume',
                    #                                  'prev_p']] = top_now.loc[symbol, ['trade', 'high', 'open', 'low', 'ratio', 'volume',
                    #                                                                    'prev_p']]
                    #     else:
                    #         top_all.append(top_now.loc[symbol])
                    top_all = cct.combine_dataFrame(top_all,
                                                    top_now,
                                                    col='couts',
                                                    compare='dff')

                # top_all=top_all.sort_values(by=['dff','percent','couts'],ascending=[0,0,1])
                # top_all=top_all.sort_values(by=['dff','ratio','percent','couts'],ascending=[0,1,0,1])
                # top_all=top_all.sort_values(by=['dff','percent','couts','ratio'],ascending=[0,0,1,1])

                top_bak = top_all
                codelist = top_all.index.tolist()
                if len(codelist) > 0:
                    # log.info('toTDXlist:%s' % len(codelist))
                    # tdxdata = tdd.get_tdx_all_day_LastDF(codelist)
                    # log.debug("TdxLastP: %s %s" % (len(tdxdata), tdxdata.columns.values))
                    # tdxdata.rename(columns={'low': 'llow'}, inplace=True)
                    # tdxdata.rename(columns={'high': 'lhigh'}, inplace=True)
                    # tdxdata.rename(columns={'close': 'lastp'}, inplace=True)

예제 #3

0

파일 보기

파일: tdx_hdf5_api.py 프로젝트: fswzb/pyQuant-2

def write_hdf_db(fname,
                 df,
                 table='all',
                 index=False,
                 baseCount=500,
                 append=True,
                 MultiIndex=False):
    if 'code' in df.columns:
        df = df.set_index('code')
#    write_status = False
    time_t = time.time()
    #    if not os.path.exists(cct.get_ramdisk_dir()):
    #        log.info("NO RamDisk")
    #        return False
    code_subdf = df.index.tolist()
    global RAMDISK_KEY
    if not RAMDISK_KEY < 1:
        return df
#    if df is not None and not df.empty and len(df) > 0:
#        dd = df.dtypes.to_frame()
#        if 'object' in dd.values:
#            dd = dd[dd == 'object'].dropna()
#            col = dd.index.tolist()
#            log.info("col:%s"%(col))
#            df[col] = df[col].astype(str)
#        df.index = df.index.astype(str)
    df['timel'] = time.time()
    if df is not None and not df.empty and table is not None:
        # h5 = get_hdf5_file(fname,wr_mode='r')
        tmpdf = []
        with SafeHDFStore(fname) as store:
            if store is not None:
                if '/' + table in store.keys():
                    tmpdf = store[table]
        if not MultiIndex:
            if index:
                # log.error("debug index:%s %s %s"%(df,index,len(df)))
                df.index = map((lambda x: str(1000000 - int(x))
                                if x.startswith('0') else x), df.index)
            if tmpdf is not None and len(tmpdf) > 0:
                if 'code' in tmpdf.columns:
                    tmpdf = tmpdf.set_index('code')
                if 'code' in df.columns:
                    df = df.set_index('code')
                diff_columns = set(df.columns) - set(tmpdf.columns)
                if len(diff_columns) <> 0:
                    log.error("columns diff:%s" % (diff_columns))

                limit_t = time.time()
                df['timel'] = limit_t
                # df_code = df.index.tolist()

                df = cct.combine_dataFrame(tmpdf, df, col=None, append=append)

                if not append:
                    df['timel'] = time.time()
                elif fname == 'powerCompute':
                    o_time = df[df.timel < limit_t].timel.tolist()
                    o_time = sorted(set(o_time), reverse=False)
                    if len(o_time) > ct.h5_time_l_count:
                        o_time = [time.time() - t_x for t_x in o_time]
                        o_timel = len(o_time)
                        o_time = np.mean(o_time)
                        if o_time > ct.h5_power_limit_time * 1.5:
                            df['timel'] = time.time()
                            log.error("%s %s o_time:%.1f timel:%s" %
                                      (fname, table, o_time, o_timel))

    #            df=cct.combine_dataFrame(tmpdf, df, col=None,append=False)
                log.info("read hdf time:%0.2f" % (time.time() - time_t))
            else:
                # if index:
                # df.index = map((lambda x:str(1000000-int(x)) if x.startswith('0') else x),df.index)
                log.info("h5 None hdf reindex time:%0.2f" %
                         (time.time() - time_t))
        else:
            # df.loc[(df.index.get_level_values('code')== 600004)]
            # df.loc[(600004,20170414),:]
            # df.xs(20170425,level='date')
            # df.index.get_level_values('code').unique()
            # df.index.get_loc(600006)
            # slice(58, 87, None)
            # df.index.get_loc_level(600006)
            # da.swaplevel(0, 1, axis=0).loc['2017-05-25']
            # da.reorder_levels([1,0], axis=0)
            # da.sort_index(level=0, axis=0,ascending=False
            # setting: dfm.index.is_lexsorted() dfm = dfm.sort_index()  da.loc[('000001','2017-05-12'):('000005','2017-05-25')]
            # da.groupby(level=1).mean()
            # da.index.get_loc('000005')     da.iloc[slice(22,33,None)]
            # mask = totals['dirty']+totals['swap'] > 1e7     result =
            # mask.loc[mask]
            # store.remove('key_name', where='<where clause>')
            multi_code = tmpdf.index.get_level_values('code').unique().tolist()
            df_code = df.index.tolist()
            diff_code = set(df_code) - set(multi_code)
            # da.drop(('000001','2017-05-11'))
            pass

    time_t = time.time()
    if df is not None and not df.empty and table is not None:
        #        df['timel'] =  time.time()
        if df is not None and not df.empty and len(df) > 0:
            dd = df.dtypes.to_frame()

        if not MultiIndex:
            if 'object' in dd.values:
                dd = dd[dd == 'object'].dropna()
                col = dd.index.tolist()
                log.info("col:%s" % (col))
                df[col] = df[col].astype(str)
            df.index = df.index.astype(str)
            df = df.fillna(0)
        with SafeHDFStore(fname) as h5:
            df = df.fillna(0)
            if h5 is not None:
                if '/' + table in h5.keys():
                    h5.remove(table)
                    h5.put(table,
                           df,
                           format='table',
                           data_columns=True,
                           append=False)
                else:
                    h5.put(table,
                           df,
                           format='table',
                           data_columns=True,
                           append=False)
                h5.flush()
                # h5[table] = df
            else:
                log.error("HDFile is None,Pls check:%s" % (fname))
    log.info("write hdf time:%0.2f" % (time.time() - time_t))

    return True

예제 #4

0

파일 보기

파일: sina_Market-DurationUp.py 프로젝트: fswzb/pyQuant-2

                            top_all['couts'] = 0
                            top_all['prev_p'] = 0
                    # for symbol in top_now.index:
                    #     if 'couts' in top_now.columns.values:
                    #         top_all.loc[symbol, ct.columns_now] = top_now.loc[symbol, ct.columns_now]
                    #     else:
                    #         top_all.loc[symbol, ct.columns_now] = top_now.loc[symbol, ct.columns_now]

                    # no_index = top_all.drop([inx for inx in top_all.index  if inx not in top_now.index], axis=0)
                    # no_index.drop([col for col in no_index.columns if col in top_now.columns], axis=1,inplace=True)
                    # no_index = no_index.merge(top_now, left_index=True, right_index=True, how='left')
                    # top_all = top_all.drop([inx for inx in top_all.index  if inx in top_now.index], axis=0)
                    # top_all = pd.concat([top_all, no_index],axis=0)
                    # log.info("for loc code :%0.2f"%(time.time()-time_Rt))
                    #
                    top_all = cct.combine_dataFrame(top_all, top_now)

                # top_all = top_all[top_all.buy > 0]
                top_dif = top_all.copy()
                log.debug('top_dif:%s' % (len(top_dif)))
                if 'trade' in top_dif.columns:
                    top_dif['buy'] = (map(lambda x, y: y if int(x) == 0 else x,
                                          top_dif['buy'].values,
                                          top_dif['trade'].values))

                # 判断主升
                # log.debug('top_dif:%s'%(len(top_dif)))
                if ct.checkfilter and cct.get_now_time_int(
                ) > 915 and cct.get_now_time_int() < ct.checkfilter_end_timeDu:
                    top_dif = top_dif[top_dif.low > top_dif.llow *
                                      ct.changeRatio]