def get_required_rows_from_stock_flow(): f = 'rzrq.xls' f1 = 'other_rzrq.xls' if os.path.isfile(f) and not os.path.isfile(f1): df = normalize.parse_file(f) is_stock_in = df[u'买卖标志'] == u'担保划入' df[u'成交编号'] = df[u'成交编号'].astype(unicode) is_stock_in1 = df[u'成交编号'] == u'担保品提交' is_stock_in11 = df[u'成交编号'] == u'担保物提交' is_1 = is_stock_in & (is_stock_in1 | is_stock_in11) is_2 = df[u'买卖标志'] == u'红股入账' df[u'成交日期'] = df[u'成交日期'].astype(unicode) return df[is_1 | is_2] elif os.path.isfile(f1): print f1 df = normalize.parse_file(f1) is_stock_in = df[u'交易类别'] == u'担保划入' is_stock_in1 = df[u'交易类别'] == u'送股' is_stock_in2 = df[u'交易类别'] == u'托管转入' df1 = df[is_stock_in | is_stock_in1 | is_stock_in2].copy() df1.rename(columns={u'交易类别':u'买卖标志'}, inplace=True) return df1 else: print 'no stock flow file, skip...' return None
def merge(f1, f2): bf1 = f1[:f1.rfind('.')] bf2 = f2[:f2.rfind('.')] of1 = normalize.process(f1, bf1) of2 = normalize.process(f2, bf2) df1 = normalize.parse_file(of1) df2 = normalize.parse_file(of2) df_all = pd.concat([df1, df2]) #df_all['old index'] = df_all.index df_all = df_all.reset_index() df_all.to_excel('tmp_merged.xls', encoding='gbk')
def merge_rzrq_cash_and_stock_flow(): cash_flow_records = get_required_rows_from_cash_flow() if cash_flow_records is None: return '' f1 = 'rzrq.xls' stock_flow_records = normalize.parse_file(f1) stock_flow_records[u'证券代码'] = u'' for i, row in stock_flow_records.iterrows(): stock_name = row[u'证券名称'] if type(stock_name) is float and math.isnan(stock_name): pass else: stock_code = get_stock_code(stock_name) stock_flow_records.loc[i, u'证券代码'] = stock_code stock_number = abs(row[u'成交数量']) stock_flow_records.loc[i, u'成交数量'] = stock_number #stock_flow_records[u'成交日期'] = stock_flow_records[u'成交日期'].astype(int) #merge stock_flow_records['index'] = stock_flow_records.index cash_flow_records['index'] = cash_flow_records.index merged_flow_records = pd.concat([stock_flow_records, cash_flow_records]) sm = merged_flow_records.sort([u'成交日期', 'index']).reset_index() rtn_file = u'华泰融资融券账户_merged.xls' sm.to_excel(rtn_file, encoding='gbk') return rtn_file
def get_required_rows_from_cash_flow(): f = 'huatai_rzrq_cash_flow.xls' if not os.path.isfile(f): print 'no huatai cash flow file, skip ...' return (None, 0) flow_records = normalize.parse_file(f) flow_records.rename(columns={u'日期':u'成交日期', u'资金余额':u'本次金额'}, inplace=True) flow_records = flow_records.fillna(0) flow_records[u'发生金额'] = flow_records[u'借方(收入)'] - flow_records[u'贷方(支出)'] flow_records['need_merge'] = False for i,row in flow_records.iterrows(): zhaiyao = row[u'摘要'] if re.match(match_stock, zhaiyao): result = re.match(match_stock, zhaiyao) operation = result.group(1) stock_code = result.group(2) stock_name = result.group(3) save_stock_name_code(stock_name, stock_code) #print operation,type(stock_code),stock_code,type(stock_name),stock_name elif re.match(match_bank, zhaiyao): result = re.match(match_bank, zhaiyao) operation = result.group(1) flow_records.loc[i, u'摘要'] = operation flow_records.loc[i, 'need_merge'] = True elif zhaiyao in [u'直接偿还融资利息', u'卖券偿还融资负债', u'卖券偿还融资利息', u'直接偿还融资负债', u'直接偿还融资费用', u'融资借款', u'卖券偿还融资费用', u'买券偿还融券利息', u'利息归本', u'股息红利税补缴']: flow_records.loc[i, 'need_merge'] = True else: print 'unknown row: %s' % row df = flow_records[flow_records['need_merge']==True].copy() df[u'发生金额'] = df[u'借方(收入)'] - df[u'贷方(支出)'] return df
def merge_cash_stock_flow(): f1 = 'normal.xls' stock_flow_records = normalize.parse_file(f1) for i, row in stock_flow_records.iterrows(): if u'成交编号' not in row.index: print row if row[u'委托类别'] == u'其他' and row[u'成交编号'] == u'股息差别税': stock_flow_records.loc[i, u'委托类别'] = u'股息差别税' if row[u'委托类别'] == u'转托' and row[u'成交编号'] == u'转托管转入': stock_flow_records.loc[i, u'委托类别'] = u'股份转入' if row[u'委托类别'] == u'ETF申购' and row[u'成交编号'] == u'现金替代差额': stock_flow_records.loc[i, u'委托类别'] = u'ETF申购现金替代差额' if row[u'委托类别'] == u'托管转入' and row[u'成交编号'] == u'上市流通': stock_flow_records.loc[i, u'委托类别'] = u'新股上市流通' if row[u'委托类别'] == u'托管转出' and row[u'成交编号'] == u'上市转出': stock_flow_records.loc[i, u'委托类别'] = u'新股上市转出' if row[u'委托类别'] == u'股票回购': stock_flow_records.loc[i, u'委托类别'] = u'东兴股票质押融资' if row[u'委托类别'] == u'股票购回': stock_flow_records.loc[i, u'委托类别'] = u'东兴股票质押解除' if row[u'委托类别'] == u'直接还款': stock_flow_records.loc[i, u'委托类别'] = u'申购扣款' if row[u'委托类别'] == u'缴款' and row[u'成交编号'] == u'配股认购': stock_flow_records.loc[i, u'委托类别'] = u'配股认购' stock_flow_records.rename(columns={u'成交编号':u'摘要'}, inplace=True) (cash_flow_records, init_cash) = get_required_rows_from_cash_flow() if cash_flow_records is None: return '' cash_flow_records[u'成交日期'] = cash_flow_records[u'成交日期'].astype(int) stock_flow_records[u'成交日期'] = stock_flow_records[u'成交日期'].astype(int) #merge merged_flow_records = pd.concat([cash_flow_records, stock_flow_records]) sm = merged_flow_records.sort([u'成交日期', u'成交时间']).reset_index() if pd.isnull( sm.iloc[0][u'剩余金额'] ): actual_amount = 0 if not pd.isnull( sm.iloc[0][u'发生金额'] ): actual_amount = float(sm.iloc[0][u'发生金额']) else: sm.loc[0, u'发生金额'] = 0 sm.loc[0, u'剩余金额'] = init_cash + actual_amount sm.rename(columns={u'摘要':u'东兴摘要'}, inplace=True) rtn_file = u'东兴普通账户_merged.xls' sm.to_excel(rtn_file, encoding='gbk') return rtn_file
def pre_process_cash_flow(): f = 'dongxing_rzrq_cash_flow.xls' df = normalize.parse_file(f) init_cash = get_init_cash_from_cash_flow(df.iloc[0]) df[u'证券代码'] = '' df[u'证券名称'] = '' df[u'成交价格'] = 0 df[u'成交数量'] = 0 for i, row in df.iterrows(): parse_operation(i, row, df) return (df, init_cash)
def get_required_rows_from_cash_flow(): f = 'dongxing_normal_cash_flow.xls' if not os.path.isfile(f): print 'no cash flow file, skip ...' return (None, 0) flow_records = normalize.parse_file(f) init_cash = get_init_cash_from_cash_flow(flow_records.iloc[0]) del flow_records[u'币种'] is_be0100 = flow_records[u'摘要'].str.contains('BE0100') is_interest_in = flow_records[u'摘要'].str.contains(u'结息入账,积数:') is_bank_transfer = flow_records[u'摘要'].str.contains(u'银行转帐转') df = flow_records[is_be0100 | is_interest_in | is_bank_transfer].copy() df[u'证券代码'] = '' df[u'证券名称'] = '' df[u'成交价格'] = 0 df[u'成交数量'] = 0 for i, row in df.iterrows(): zhaiyao = row[u'摘要'] if zhaiyao.find('BE0100') != -1: df.loc[i, u'证券代码'] = u'BE0100' df.loc[i, u'证券名称'] = u'现金宝' actual_amount = row[u'发生金额'] df.loc[i, u'成交价格'] = 1 if re.match(match_be0100_buy, zhaiyao): df.loc[i, u'委托类别'] = u'买入' df.loc[i, u'成交数量'] = -actual_amount elif re.match(match_be0100_sell, zhaiyao): df.loc[i, u'委托类别'] = u'卖出' df.loc[i, u'成交数量'] = -actual_amount elif re.search(match_be0100_hongli, zhaiyao): df.loc[i, u'委托类别'] = u'红利' else: print '!!! error unknown zhaiyao %s ' % zhaiyao elif zhaiyao.find(u'结息入账,积数:') != -1: df.loc[i, u'委托类别'] = u'利息归本' elif zhaiyao.find(u'银行转帐转入') != -1: df.loc[i, u'委托类别'] = u'银行转入' elif zhaiyao.find(u'银行转帐转出') != -1: df.loc[i, u'委托类别'] = u'银行转出' else: print '!!!! unknown row %s' % row return (df, init_cash)
def test(): f = 'huatai_rzrq_cash_flow.xls' if not os.path.isfile(f): print 'no huatai cash flow file, skip ...' return (None, 0) flow_records = normalize.parse_file(f) flow_records['is_stock'] = False for i,row in flow_records.iterrows(): zhaiyao = row[u'摘要'] if re.match(match_stock, zhaiyao): result = re.match(match_stock, zhaiyao) operation = result.group(1) stock_code = result.group(2) stock_name = result.group(3) flow_records.loc[i, 'is_stock'] = True is_stock = flow_records['is_stock'] == True df = flow_records[is_stock] df.to_excel('is_stock.xls', encoding='gbk')