def get_odps_columns(column_names, is_child=False): p_odps_columns = [] for (index, key) in enumerate(column_names): p_odps_columns.append(Column(name=column_names[key], type="string")) if is_child: p_odps_columns.append(Column(name="parentId", type="string")) return p_odps_columns
def testCreateTableWithChineseColumn(self): test_table_name = tn('pyodps_t_tmp_create_table_with_chinese_columns') columns = [ Column(name='序列', type='bigint', comment='注释'), Column(name=u'值', type=u'string', comment=u'注释2'), ] partitions = [ Partition(name='ds', type='string', comment='分区注释'), Partition(name=u'ds2', type=u'string', comment=u'分区注释2'), ] schema = Schema(columns=columns, partitions=partitions) columns_repr = "[<column 序列, type bigint>, <column 值, type string>]" partitions_repr = "[<partition ds, type string>, <partition ds2, type string>]" schema_repr = textwrap.dedent(""" odps.Schema { 序列 bigint # 注释 值 string # 注释2 } Partitions { ds string # 分区注释 ds2 string # 分区注释2 } """).strip() ddl_string_comment = textwrap.dedent(u""" CREATE TABLE `table_name` ( `序列` BIGINT COMMENT '注释', `值` STRING COMMENT '注释2' ) PARTITIONED BY ( `ds` STRING COMMENT '分区注释', `ds2` STRING COMMENT '分区注释2' )""").strip() ddl_string = textwrap.dedent(u""" CREATE TABLE `table_name` ( `序列` BIGINT, `值` STRING ) PARTITIONED BY ( `ds` STRING, `ds2` STRING )""").strip() self.assertEqual(repr(columns), columns_repr) self.assertEqual(repr(partitions), partitions_repr) self.assertEqual(repr(schema).strip(), schema_repr) self.assertEqual(schema.get_table_ddl().strip(), ddl_string_comment) self.assertEqual( schema.get_table_ddl(with_comments=False).strip(), ddl_string) self.odps.delete_table(test_table_name, if_exists=True) table = self.odps.create_table(test_table_name, schema) self.assertSequenceEqual( [to_str(col.name) for col in table.schema.columns], [to_str(col.name) for col in schema.columns]) self.assertSequenceEqual( [to_str(col.comment) for col in table.schema.columns], [to_str(col.comment) for col in schema.columns])
def create_table_example(): from odps.models import Schema, Column, Partition columns = [ Column(name='num', type='bigint', comment='the column'), Column(name='num2', type='double', comment='the column2') ] partitions = [Partition(name='pt', type='string', comment='the partition')] schema = Schema(columns=columns, partitions=partitions) table = o.create_table('my_new_table', schema, if_not_exists=True, lifecycle=7)
def downloaAndUp(self, csvFilename, tableName, sep=",", urlIndex=1, pt=None): """ :param csvFilename: 传入本地csv的路径,必须要有表头 :param tableName: 上传到odps时的表名 :param sep: csv的分隔符 :param urlIndex: url字段的坐标位置 """ print("start upload ...\n") f = open(csvFilename, encoding='utf-8') first_line = f.readlines(1)[0].strip('\n').split(sep) columns = [ Column(name=f"{x}", type='string', comment='the column') for x in first_line ] if pt: partitions = [ Partition(name='pt', type='string', comment='the partition') ] schema = Schema(columns=columns, partitions=partitions) table = self.creat_table(tableName, schema) table.create_partition(f"pt={pt}", if_not_exists=True) with table.open_writer(partition=f"pt={pt}") as writer: for index, line in enumerate(f): print(f"{index} in {tableName} ...") item = line.strip('\n').split(sep) item.append(pt) resp = download(item[urlIndex]) data = resp.text if sys.getsizeof(data) <= 8 * 1024 * 1000: item[urlIndex] = data else: print(f"failed in {item[0]}") writer.write(item) else: schema = Schema(columns=columns) table = self.creat_table(tableName, schema) with table.open_writer(partition=None) as writer: for index, line in enumerate(f): print(f"{index} in {tableName} ...") item = line.strip('\n').split(sep) resp = download(item[urlIndex]) data = resp.text if sys.getsizeof(data) <= 8 * 1024 * 1000: item[urlIndex] = data else: print(f"failed in {item[0]}") writer.write(item) print("\n\n upload finish ...") f.close()
def generate_odps_columns_from_pandas_df(self,df): type_dict = {"int64":'BIGINT','float64':'DOUBLE','object':'STRING'} column_types = df.dtypes odps_columns = [] for c_name in column_types.index: temp_type = type_dict.get(str(c_name),'STRING') if self.verbose: print("Transfer from pandas columns to odps columns:", o_name +'->'+c_name.lower()+':'+'str(column_type[c_name])+' '->'+temp_type) odps_columns.append(Column(name=c_name.lower(),type=temp_type)) return odps_columns
def createTable(tableName): try: columns = [ Column(name='user_id', type='string', comment='用户ID'), Column(name='node_id', type='string', comment='节点号'), Column(name='label_type', type='string', comment='标签类型'), Column(name='label_value', type='string', comment='标签值'), Column(name='precision_value', type='string', comment='准确率'), Column(name='sampleCoverNum', type='string', comment='样本覆盖量'), Column(name='sameLabelTotalNum', type='string', comment='样本同一标签总量'), Column(name='sampleTotalNum', type='string', comment='样板总量'), Column(name='model_name', type='string', comment='模型名称'), Column(name='created_at', type='string', comment='插入时间') ] partitions = [Partition(name='pt', type='string', comment='批次分区')] schema = Schema(columns=columns, partitions=partitions) table = odps.create_table(tableName, schema, if_not_exists=True) return 0 except: logger.warning("creatTable warning", exc_info=True) return -100
def uploadCSV(self, csvFilename, tableName, sep=",", pt=None): """ :param csvFilename: 传入本地csv的路径,必须要有表头 :param tableName: 上传到odps时的表名 :param sep: csv的分隔符 :param pt: 是否创建分区 """ print("start upload ...\n") df = pd.read_csv(csvFilename, sep=sep) shape0 = df.shape[0] columns = [ Column(name=f"{x}", type='string', comment='the column') for x in df.columns ] if pt: partitions = [ Partition(name='pt', type='string', comment='the partition') ] schema = Schema(columns=columns, partitions=partitions) table = self.creat_table(tableName, schema) table.create_partition(f"pt={pt}", if_not_exists=True) table_columns = [i.name for i in table.schema.columns] with table.open_writer(partition=f"pt={pt}") as writer: for index in df.index: print(f"{index+1}/{shape0} in {tableName} ...") item_dict = dict(df.loc[index]) item = [] for field in table_columns[:-1]: item.append(item_dict.get(field, '')) item.append(pt) writer.write(item) else: schema = Schema(columns=columns) table = self.creat_table(tableName, schema) table_columns = [i.name for i in table.schema.columns] with table.open_writer(partition=None) as writer: for index in df.index: print(f"{index+1}/{shape0} in {tableName} ...") item_dict = dict(df.loc[index]) item = [] for field in table_columns[:-1]: item.append(item_dict.get(field, '')) writer.write(item) print("\n\n upload finish ...")
def uploadToOdps(self, filename): odps_basic = OdpsClient() # 先删除之前的表,如果存在的话 odps_basic.delete_table(self.table_name) # 创建schema columns = [ Column(name='CODE', type='string', comment='代码'), Column(name='sec_name', type='string', comment='名称'), Column(name='ipo_date', type='string', comment='上市日期'), Column(name='delist_date', type='string', comment='摘牌日期'), Column(name='backdoor', type='string', comment='是否借壳上市'), Column(name='backdoordate', type='string', comment='借壳上市日期') ] partitions = [ Partition(name='pt', type='string', comment='the partition报告日期') ] schema = Schema(columns=columns, partitions=partitions) odps_basic.create_table(self.table_name, schema) table_name = odps_basic.get_table(self.table_name) # 写入ODPS表 print(filename) pt_date = filename.strip(".csv") partitions = "pt=" + pt_date table_writer = table_name.open_writer(partition=partitions, create_partition=True) # 读取csv文件数据写入table with open(self.folder + filename, "r", encoding="utf-8") as csvfile: # 读取csv文件,返回的是迭代类型 reader = csv.reader(csvfile) columns = [row for row in reader] # print("---增加记录数量:---", len(columns)) csvfile.close() for column in columns[1:]: column_new = [] column_new.append(column[0]) column_new.extend(column[2:4]) if column[4].split(" ")[0] < "1900-01-01": column_new.append("") else: column_new.append(column[4]) column_new.append(column[5]) if column[6].split(" ")[0] < "1900-01-01": column_new.append("") else: column_new.append(column[6]) table_writer.write(column_new) print("The row has to be written: ", column_new) table_writer.close()
from odps.models import Schema, Column, Partition from config import * # config should include flowing configuration # access_id # secret_key # project # endpoint ''' For more information, ref to https://pyodps.readthedocs.io/ ''' o = ODPS(access_id, secret_key, project, endpoint=endpoint) label_col = [Column(name='label', type='bigint')] dense_col = [ Column( name='dense' + str(i), type='double') for i in range(1, 14) ] sparse_col = [Column(name='C' + str(i), type='string') for i in range(14, 40)] columns = label_col + dense_col + sparse_col schema = Schema( columns=columns) # schema = Schema(columns=columns, partitions=partitions) table_name = 'wide_and_deep' print(schema.columns)
with Y_ans0.open_reader() as reader: count1 = reader.count print 'For original table' print count1 for record in reader[0:2]: print record for record in reader[0:count1]: vid = record[0] records.append([ vid, submit.loc[vid, 'sys'], submit.loc[vid, 'dia'], submit.loc[vid, 'tl'], submit.loc[vid, 'hdl'], submit.loc[vid, 'ldl'] ]) columns = [ Column(name='vid', type='String', comment='体检人id'), Column(name='sys', type='bigint', comment='收缩压'), Column(name='dia', type='bigint', comment='舒张压'), Column(name='tl', type='double', comment='甘油三酯'), Column(name='hdl', type='double', comment='高密度脂蛋白胆固醇'), Column(name='ldl', type='double', comment='低密度脂蛋白胆固醇') ] schema = Schema(columns=columns) o.delete_table('meinian_round2_submit_b', if_exists=True) tableS = o.create_table('meinian_round2_submit_b', schema, if_not_exists=True) writer = tableS.open_writer() writer.write(records) writer.close()
def upload_to_odps(self, filename): # 连接odps odps_basic = OdpsClient() # 创建schema columns = [Column(name='CODE', type='string', comment='代码'), Column(name='sec_name', type='string', comment='证券名称'), Column(name='rights_issue_price', type='double', comment='配股价格'), Column(name='rights_issue_ratio', type='double', comment='配股比例'), Column(name='planned_volume', type='double', comment='机会配股数量(万吨)'), Column(name='actual_volume', type='double', comment='实际认购数量(万吨)'), Column(name='subscription_ratio', type='double', comment='认购比例(%)'), Column(name='total_funds_raised', type='double', comment='募资合计(亿元)'), Column(name='rights_issue_expenses', type='double', comment='配售费用(亿元)'), Column(name='actual_funds_raised', type='double', comment='实际木子(亿元)'), Column(name='underwriting_type', type='string', comment='承销方式'), Column(name='lead_underwriter', type='string', comment='主承销商'), Column(name='rights_issue_anncedate', type='string', comment='配股公告日'), Column(name='record_date', type='string', comment='股权登记日'), Column(name='ex_rights_date', type='string', comment='除权日'), Column(name='listing_date', type='string', comment='配股上市日')] schema = Schema(columns=columns) # 创建odps table odps_basic.create_table(self.new_tableName, schema, if_not_exists=True) table_name = odps_basic.get_table(self.new_tableName) # 写入ODPS表 print(filename) table_writer = table_name.open_writer() # 读取csv文件数据写入table with open(self.csv_path + filename, "r", encoding='utf-8-sig') as csvfile: # 读取csv文件,返回的是迭代类型 reader = csv.reader(csvfile) columns = [row for row in reader] # print("---增加记录数量:---", len(columns)) csvfile.close() for column in columns: column_new = [] column_new.extend(column[:2]) for col in column[2:10]: if (col == 'None' or col == '' or col == 'nan'): column_new.append(float("nan")) else: column_new.append(float(col)) column_new.extend(column[10:]) # print("The row has to be written: ", column_new[0], column[1]) table_writer.write(column_new) table_writer.close()
def uploadToOdps(self, filename): odps_basic = OdpsClient() # 创建schema columns = [ Column(name='CODE', type='string', comment='代码'), Column(name='BBI', type='double', comment='BBI多空指数'), Column(name='DMA', type='double', comment='DMA平均线差'), Column(name='DMI', type='double', comment='DMI趋向指标'), Column(name='EXPMA', type='double', comment='EXPMA指数平均数'), Column(name='MA', type='double', comment='MA简单移动平均'), Column(name='MACD', type='double', comment='MACD指数平滑移动平均'), Column(name='MTM', type='double', comment='MTM动力指标'), Column(name='PRICEOSC', type='double', comment='PRICESOSC价格震荡指标'), Column(name='SAR', type='double', comment='SAR抛物转向'), Column(name='TRIX', type='double', comment='TRIX三次指数平滑平均'), Column(name='BIAS', type='double', comment='BIAS乘离率'), Column(name='CCI', type='double', comment='CCI顺势指标'), Column(name='DPO', type='double', comment='DPO区间震荡线'), Column(name='KDJ', type='double', comment='KDJ随机指标'), Column(name='slowKD', type='double', comment='SLOWKD慢速KD'), Column(name='ROC', type='double', comment='ROC变动速率'), Column(name='RSI', type='double', comment='RSI相对强弱指标'), Column(name='SI', type='double', comment='SI摆动指标'), Column(name='PVT', type='double', comment='PVT量价趋势指标'), Column(name='SOBV', type='double', comment='SOBV能量潮'), Column(name='WVAD', type='double', comment='WVAD威廉变异离散量'), Column(name='BBIBOLL', type='double', comment='BBIBOLL多空布林线'), Column(name='BOLL', type='double', comment='BOLL布林线'), Column(name='CDP', type='double', comment='CDP逆势操作'), Column(name='ENV', type='double', comment='ENV指标'), Column(name='MIKE', type='double', comment='MIKE麦克指标'), Column(name='vol_ratio', type='double', comment='量比'), Column(name='VMA', type='double', comment='VMA量简单移动平均'), Column(name='VMACD', type='double', comment='VMACD量指数平滑异同平均'), Column(name='VOSC', type='double', comment='VOSC成交量震荡'), Column(name='TAPI', type='double', comment='TAPI加权指数成交值'), Column(name='VSTD', type='double', comment='VSTD成交量标准差'), Column(name='ADTM', type='double', comment='ADTM动态买卖气指标'), Column(name='RC', type='double', comment='RC变化率指数'), Column(name='SRMI', type='double', comment='SRMI MI修正指标'), Column(name='ATR', type='double', comment='ATR真实波幅'), Column(name='STD', type='double', comment='STD标准差'), Column(name='VHF', type='double', comment='VHF纵横指标') ] partitions = [ Partition(name='pt', type='string', comment='the partition报告日期') ] schema = Schema(columns=columns, partitions=partitions) odps_basic.create_table(self.table_name, schema) tablename = odps_basic.get_table(self.table_name) # 写入ODPS表 print(filename) pt_date = filename.strip(".csv") partitions = "pt=" + pt_date table_writer = tablename.open_writer(partition=partitions, create_partition=True) # 读取csv文件数据写入table with open(self.folder + filename, "r", encoding="utf-8-sig") as csvfile: # 读取csv文件,返回的是迭代类型 print("Open file to read ...", self.folder + filename) reader = csv.reader(csvfile) columns = [] for r in reader: columns.append(r) print("---增加记录数量:---", len(columns)) csvfile.close() for column in columns: column_new = [column[0]] for col in column[2:]: if (col == 'None' or col == '' or col == 'nan'): column_new.append(float("nan")) else: column_new.append(float(col)) table_writer.write(column_new) table_writer.close()
def uploadToOdps(self, filename): odps_basic = OdpsClient() # 创建schema columns = [ Column(name='CODE', type='string', comment='代码'), Column(name='history_high', type='string', comment='近期创历史新高'), Column(name='history_low', type='string', comment='近期创历史新低'), Column(name='stage_high', type='string', comment='近期创阶段新高'), Column(name='stage_low', type='string', comment='近期创阶段新低'), Column(name='up_days', type='double', comment='连涨天数'), Column(name='down_days', type='double', comment='连跌天数'), Column(name='breakout_ma', type='string', comment='向上有效突破均线'), Column(name='breakdown_ma', type='string', comment='向下有效突破均线'), Column(name='history_high_days', type='double', comment='近期创历史新高次数'), Column(name='history_low_days', type='double', comment='近期创历史新低次数'), Column(name='bull_bear_ma', type='string', comment='均线多空头排列看涨看跌') ] partitions = [ Partition(name='pt', type='string', comment='the partition报告日期') ] schema = Schema(columns=columns, partitions=partitions) odps_basic.create_table(self.table_name, schema) tablename = odps_basic.get_table(self.table_name) # 写入ODPS表 print(filename) pt_date = filename.strip(".csv") partitions = "pt=" + pt_date table_writer = tablename.open_writer(partition=partitions, create_partition=True) # 读取csv文件数据写入table with open(self.folder + filename, "r", encoding="utf-8-sig") as csvfile: # 读取csv文件,返回的是迭代类型 print("Open file to read ...", self.folder + filename) reader = csv.reader(csvfile) columns = [] for r in reader: columns.append(r) print("---增加记录数量:---", len(columns)) csvfile.close() for column in columns: column_new = [column[0]] column_new.extend(column[2:6]) for col in column[6:8]: if (col == 'None' or col == '' or col == 'nan'): column_new.append(float("nan")) else: column_new.append(float(col)) column_new.extend(column[8:10]) for col in column[10:12]: if (col == 'None' or col == '' or col == 'nan'): column_new.append(float("nan")) else: column_new.append(float(col)) column_new.append(column[-1]) table_writer.write(column_new) table_writer.close()
def uploadToOdps(self, filename): odps_basic = OdpsClient() # 创建schema columns = [ Column(name='CODE', type='string', comment='代码'), Column(name='marginornot', type='string', comment='是否融资融券标的'), Column(name='SHSC', type='string', comment='是否沪港通买入标的'), # Column(name='SHSC2', type='string', comment='是否深港通买入标的'), Column(name='riskwarning', type='string', comment='是否属于风险警示版'), Column(name='industry_sw', type='string', comment='所属申万行业名称-全部明细,包含一级二级三级'), Column(name='industry_swcode', type='string', comment='所属申万行业代码-全部明细,包含一级二级三级'), Column(name='mrg_long_amt', type='double', comment='融资买入额'), Column(name='mrg_long_repay', type='double', comment='融资偿还额'), Column(name='mrg_long_bal', type='double', comment='融资余额'), Column(name='mrg_short_vol', type='double', comment='融券卖出量'), Column(name='mrg_short_vol_repay', type='double', comment='融券偿还量'), Column(name='margin_saletradingamount', type='double', comment='融券卖出额'), Column(name='margin_salerepayamount', type='double', comment='融券偿还额'), Column(name='mrg_short_vol_bal', type='double', comment='融券余量'), Column(name='mrg_short_bal', type='double', comment='融券余额'), Column(name='mrg_bal', type='double', comment='融资融券余额'), Column(name='compindex2_index1', type='string', comment='是否属于重要指数成份-上证50指数'), Column(name='compindex2_index2', type='string', comment='是否属于重要指数成份-上证180指数'), Column(name='compindex2_index3', type='string', comment='是否属于重要指数成份-沪深300指数'), Column(name='compindex2_index4', type='string', comment='是否属于重要指数成份-中证100指数'), Column(name='compindex2_index5', type='string', comment='是否属于重要指数成份-深证100指数'), Column(name='compindex2_index6', type='string', comment='是否属于重要指数成份-中证500指数'), Column(name='compindex2_index7', type='string', comment='是否属于重要指数成份-中证1000指数'), Column(name='is_ST', type='string', comment='是否ST'), Column(name='is_xST', type='string', comment='是否*ST') ] partitions = [ Partition(name='pt', type='string', comment='the partition报告日期') ] schema = Schema(columns=columns, partitions=partitions) odps_basic.create_table(self.table_name, schema) table_name = odps_basic.get_table(self.table_name) # 写入ODPS表 print(filename) pt_date = filename.strip(".csv") partitions = "pt=" + pt_date table_writer = table_name.open_writer(partition=partitions, create_partition=True) # 读取csv文件数据写入table with open(self.folder + filename, "r", encoding="utf-8") as csvfile: # 读取csv文件,返回的是迭代类型 reader = csv.reader(csvfile) columns = [row for row in reader] # print("---增加记录数量:---", len(columns)) csvfile.close() for column in columns[1:]: #columns[1:] column_new = [] column_new.append(column[0]) column_new.extend(column[2:7]) for col in column[7:17]: if (col == 'None' or col == '' or col == 'nan'): column_new.append(float("nan")) else: column_new.append(float(col)) column_new.extend(column[17:]) print("The row has to be written: ", column_new[0], column[1]) table_writer.write(column_new) table_writer.close()
def uploadToOdps(self, filename): odps_basic = OdpsClient() # 创建schema columns = [ Column(name='CODE', type='string', comment='代码'), Column(name='cash_recp_sg_and_rs', type='double', comment='销售商品、提供劳务收到的现金'), Column(name='recp_tax_rends', type='double', comment='收到的税费返还'), Column(name='other_cash_recp_ral_oper_act', type='double', comment='收到其他与经营活动有关的现金'), Column(name='net_incr_insured_dep', type='double', comment='保户储金净增加额'), Column(name='net_incr_dep_cob', type='double', comment='客户存款和同业存放款项净增加额'), Column(name='net_incr_loans_central_bank', type='double', comment='向中央银行借款净增加额'), Column(name='net_incr_fund_borr_ofi', type='double', comment='向其他金融机构拆入资金净增加额'), Column(name='net_incr_int_handling_chrg', type='double', comment='收取利息和手续费净增加额'), Column(name='cash_recp_prem_orig_inco', type='double', comment='收到的原保险合同保费取得的现金'), Column(name='net_cash_received_reinsu_bus', type='double', comment='收到的再保业务现金净额'), Column(name='net_incr_disp_tfa', type='double', comment='处置交易性金融资产净增加额'), Column(name='net_incr_disp_fin_assets_avail', type='double', comment='处置可供出售金融资产净增加额'), Column(name='net_incr_loans_other_bank', type='double', comment='拆入资金净增加额'), Column(name='net_incr_repurch_bus_fund', type='double', comment='回购业务资金净增加额'), Column(name='net_cash_from_seurities', type='double', comment='代理买卖证券收到的现金净额'), Column(name='stot_cash_inflows_oper_act', type='double', comment='经营活动现金流入小计'), Column(name='net_incr_lending_fund', type='double', comment='融出资金净增加额'), Column(name='net_fina_instruments_measured_at_fmv', type='double', comment='以公允价值计量且其变动计入当期损益的金融工具净额'), Column(name='cash_pay_goods_purch_serv_rec', type='double', comment='购买商品、接受劳务支付的现金'), Column(name='cash_pay_beh_empl', type='double', comment='支付给职工以及为职工支付的现金'), Column(name='pay_all_typ_tax', type='double', comment='支付的各项税费'), Column(name='other_cash_pay_ral_oper_act', type='double', comment='支付其他与经营活动有关的现金'), Column(name='net_incr_clients_loan_adv', type='double', comment='客户贷款及垫款净增加额'), Column(name='net_incr_dep_cbob', type='double', comment='存放央行和同业款项净增加额'), Column(name='cash_pay_claims_orig_inco', type='double', comment='支付原保险合同赔付款项的现金'), Column(name='handling_chrg_paid', type='double', comment='支付手续费的现金'), Column(name='comm_insur_plcy_paid', type='double', comment='支付保单红利的现金'), Column(name='stot_cash_outflows_oper_act', type='double', comment='经营活动现金流出小计'), Column(name='net_cash_flows_oper_act', type='double', comment='经营活动产生的现金流量净额'), Column(name='cash_recp_disp_withdrwl_invest', type='double', comment='收回投资收到的现金'), Column(name='cash_recp_return_invest', type='double', comment='取得投资收益收到的现金'), Column(name='net_cash_recp_disp_fiolta', type='double', comment='处置固定资产、无形资产和其他长期资产收回的现金净额'), Column(name='net_cash_recp_disp_sobu', type='double', comment='处置子公司及其他营业单位收到的现金净额'), Column(name='other_cash_recp_ral_inv_act', type='double', comment='收到其他与投资活动有关的现金'), Column(name='stot_cash_inflows_inv_act', type='double', comment='投资活动现金流入小计'), Column(name='cash_pay_acq_const_fiolta', type='double', comment='购建固定资产、无形资产和其他长期资产支付的现金'), Column(name='cash_paid_invest', type='double', comment='投资支付的现金'), Column(name='net_incr_pledge_loan', type='double', comment='质押贷款净增加额'), Column(name='net_cash_pay_aquis_sobu', type='double', comment='取得子公司及其他营业单位支付的现金净额'), Column(name='other_cash_pay_ral_inv_act', type='double', comment='支付其他与投资活动有关的现金'), Column(name='stot_cash_outflows_inv_act', type='double', comment='投资活动现金流出小计'), Column(name='net_cash_flows_inv_act', type='double', comment='投资活动产生的现金流量净额'), Column(name='cash_recp_cap_contrib', type='double', comment='吸收投资收到的现金'), Column(name='cash_rec_saims', type='double', comment='子公司吸收少数股东投资收到的现金'), Column(name='cash_recp_borrow', type='double', comment='取得借款收到的现金'), Column(name='other_cash_recp_ral_fnc_act', type='double', comment='收到其他与筹资活动有关的现金'), Column(name='proc_issue_bonds', type='double', comment='发行债券收到的现金'), Column(name='stot_cash_inflows_fnc_act', type='double', comment='筹资活动现金流入小计'), Column(name='cash_prepay_amt_borr', type='double', comment='偿还债务支付的现金'), Column(name='cash_pay_dist_dpcp_int_exp', type='double', comment='分配股利、利润或偿付利息支付的现金'), Column(name='dvd_profit_paid_sc_ms', type='double', comment='子公司支付给少数股东的股利、利润'), Column(name='other_cash_pay_ral_fnc_act', type='double', comment='支付其他与筹资活动有关的现金'), Column(name='stot_cash_outflows_fnc_act', type='double', comment='筹资活动现金流出小计'), Column(name='net_cash_flows_fnc_act', type='double', comment='筹资活动产生的现金流量净额'), Column(name='eff_fx_flu_cash', type='double', comment='汇率变动对现金的影响'), Column(name='net_incr_cash_cash_equ_dm', type='double', comment='现金及现金等价物净增加额'), Column(name='cash_cash_equ_beg_period', type='double', comment='期初现金及现金等价物余额'), Column(name='cash_cash_equ_end_period', type='double', comment='期末现金及现金等价物余额'), Column(name='net_profit_cs', type='double', comment='补充资料-净利润'), Column(name='prov_depr_assets', type='double', comment='资产减值准备'), Column(name='depr_fa_coga_dpba', type='double', comment='固定资产折旧、油气资产折耗、生产性生物资产折旧'), Column(name='amort_intang_assets', type='double', comment='无形资产摊销'), Column(name='amort_lt_deferred_exp', type='double', comment='长期待摊费用摊销'), Column(name='decr_deferred_exp', type='double', comment='待摊费用减少'), Column(name='incr_acc_exp', type='double', comment='预提费用增加'), Column(name='loss_disp_fiolta', type='double', comment='处置固定资产、无形资产和其他长期资产的损失'), Column(name='loss_scr_fa', type='double', comment='固定资产报废损失'), Column(name='loss_fv_chg', type='double', comment='公允价值变动损失'), Column(name='fin_exp_cs', type='double', comment='补充资料-财务费用'), Column(name='invest_loss', type='double', comment='投资损失'), Column(name='decr_deferred_inc_tax_assets', type='double', comment='递延所得税资产减少'), Column(name='incr_deferred_inc_tax_liab', type='double', comment='递延所得税负债增加'), Column(name='decr_inventories', type='double', comment='存货的减少'), Column(name='decr_oper_payable', type='double', comment='经营性应收项目的减少'), Column(name='incr_oper_payable', type='double', comment='经营性应付项目的增加'), Column(name='unconfirmed_invest_loss_cs', type='double', comment='补充资料-未确认的投资损失'), Column(name='others', type='double', comment='其他'), Column(name='im_net_cash_flows_oper_act', type='double', comment='间接法-经营活动产生的现金流量净额'), Column(name='conv_debt_into_cap', type='double', comment='债务转为资本'), Column(name='conv_corp_bonds_due_within_1y', type='double', comment='一年内到期的可转换公司债券'), Column(name='fa_fnc_leases', type='double', comment='融资租入固定资产'), Column(name='end_bal_cash', type='double', comment='现金的期末余额'), Column(name='beg_bal_cash', type='double', comment='现金的期初余额'), Column(name='end_bal_cash_equ', type='double', comment='现金等价物的期末余额'), Column(name='beg_bal_cash_equ', type='double', comment='现金等价物的期初余额'), Column(name='net_incr_cash_cash_equ_im', type='double', comment='间接法-现金及现金等价物净增加额') ] partitions = [ Partition(name='pt', type='string', comment='the partition报告日期') ] schema = Schema(columns=columns, partitions=partitions) odps_basic.create_table(self.table_name, schema) tablename = odps_basic.get_table(self.table_name) # 写入ODPS表 print(filename) pt_date = filename.strip(".csv") partitions = "pt=" + pt_date table_writer = tablename.open_writer(partition=partitions, create_partition=True) # 读取csv文件数据写入table with open(self.folder + filename, "r", encoding="utf-8-sig") as csvfile: # 读取csv文件,返回的是迭代类型 print("Open file to read ...", self.folder + filename) reader = csv.reader(csvfile) columns = [] for r in reader: columns.append(r) print("---增加记录数量:---", len(columns)) csvfile.close() for column in columns: column_new = [column[0]] for col in column[2:]: if (col == 'None' or col == '' or col == 'nan'): column_new.append(float("nan")) else: column_new.append(float(col)) table_writer.write(column_new) table_writer.close()
def uploadToOdps(self, filename): odps_basic = OdpsClient() # 创建schema columns = [ Column(name='CODE', type='string', comment='代码'), Column(name='tot_oper_rev', type='double', comment='营业总收入'), Column(name='oper_rev', type='double', comment='营业收入'), Column(name='int_inc', type='double', comment='利息收入'), Column(name='insur_prem_unearned', type='double', comment='已赚保费'), Column(name='handling_chrg_comm_inc', type='double', comment='手续费及佣金收入'), Column(name='tot_prem_inc', type='double', comment='保费业务收入'), Column(name='reinsur_inc', type='double', comment='分保费收入'), Column(name='prem_ceded', type='double', comment='分出保费'), Column(name='unearned_prem_rsrv_withdraw', type='double', comment='提取未到期责任准备金'), Column(name='net_inc_agencybusiness', type='double', comment='代理买卖证券业务净收入'), Column(name='net_inc_underwriting_business', type='double', comment='证券承销业务净收入'), Column(name='net_inc_customerasset_managementbusiness', type='double', comment='受托客户资产管理业务净收入'), Column(name='other_oper_inc', type='double', comment='其他业务收入'), Column(name='net_int_inc', type='double', comment='利息净收入'), Column(name='net_fee_and_commission_inc', type='double', comment='手续费及佣金净收入'), Column(name='net_other_oper_inc', type='double', comment='其他业务净收入'), Column(name='tot_oper_cost', type='double', comment='营业总成本'), Column(name='oper_cost', type='double', comment='营业成本'), Column(name='int_exp', type='double', comment='利息支出'), Column(name='handling_chrg_comm_exp', type='double', comment='手续费及佣金支出'), Column(name='oper_exp', type='double', comment='营业支出'), Column(name='taxes_surcharges_ops', type='double', comment='税金及附加'), Column(name='selling_dist_exp', type='double', comment='销售费用'), Column(name='gerl_admin_exp', type='double', comment='管理费用'), Column(name='fin_exp_is', type='double', comment='财务费用'), Column(name='impair_loss_assets', type='double', comment='资产减值损失'), Column(name='prepay_surr', type='double', comment='退保金'), Column(name='net_claim_exp', type='double', comment='赔付支出净额'), Column(name='net_insur_cont_rsrv', type='double', comment='提取保险责任准备金'), Column(name='dvd_exp_insured', type='double', comment='保单红利支出'), Column(name='reinsurance_exp', type='double', comment='分保费用'), Column(name='claim_exp_recoverable', type='double', comment='摊回赔付支出'), Column(name='Insur_rsrv_recoverable', type='double', comment='摊回保险责任准备金'), Column(name='reinsur_exp_recoverable', type='double', comment='摊回分保费用'), Column(name='other_oper_exp', type='double', comment='其他业务成本'), Column(name='net_inc_other_ops', type='double', comment='其他经营净收益'), Column(name='net_gain_chg_fv', type='double', comment='公允价值变动净收益'), Column(name='net_invest_inc', type='double', comment='投资净收益'), Column(name='inc_invest_assoc_jv_entp', type='double', comment='对联营企业和合营企业的投资收益'), Column(name='net_gain_fx_trans', type='double', comment='汇兑净收益'), Column(name='gain_asset_dispositions', type='double', comment='资产处置收益'), Column(name='other_grants_inc', type='double', comment='其他收益'), # Column(name='opprofit_gap', type='double', comment='营业利润差额(特殊报表科目)'), # Column(name='opprofit_gap_detail', type='double', comment='营业利润差额说明(特殊报表科目)'), Column(name='opprofit', type='double', comment='营业利润'), Column(name='non_oper_rev', type='double', comment='营业外收入'), Column(name='non_oper_exp', type='double', comment='营业外支出'), Column(name='net_loss_disp_noncur_asset', type='double', comment='非流动资产处置净损失'), # Column(name='profit_gap', type='double', comment='利润总额差额(特殊报表科目)'), # Column(name='profit_gap_detail', type='double', comment='利润总额差额说明(特殊报表科目)'), Column(name='tot_profit', type='double', comment='利润总额'), Column(name='tax', type='double', comment='所得税'), Column(name='unconfirmed_invest_loss_is', type='double', comment='利润表-未确认的投资损失'), # Column(name='net_profit_is_gap', type='double', comment='净利润差额(特殊报表科目)'), # Column(name='net_profit_is_gap_detail', type='double', comment='净利润差额说明(特殊报表科目)'), Column(name='net_profit_is', type='double', comment='净利润'), Column(name='net_profit_continued', type='double', comment='持续经营净利润'), Column(name='net_profit_discontinued', type='double', comment='终止经营净利润'), Column(name='minority_int_inc', type='double', comment='少数股东损益'), Column(name='np_belongto_parcomsh', type='double', comment='归属母公司股东的净利润'), Column(name='eps_basic_is', type='double', comment='基本每股收益'), Column(name='eps_diluted_is', type='double', comment='稀释每股收益'), Column(name='other_compreh_inc', type='double', comment='利润表-其他综合收益'), Column(name='tot_compreh_inc', type='double', comment='综合收益总额'), Column(name='tot_compreh_inc_min_shrhldr', type='double', comment='归属于少数股东的综合收益总额'), Column(name='tot_compreh_inc_parent_comp', type='double', comment='归属于母公司普通股东综合收益总额') ] partitions = [ Partition(name='pt', type='string', comment='the partition报告日期') ] schema = Schema(columns=columns, partitions=partitions) odps_basic.create_table(self.table_name, schema) tablename = odps_basic.get_table(self.table_name) # 写入ODPS表 print(filename) pt_date = filename.strip(".csv") partitions = "pt=" + pt_date table_writer = tablename.open_writer(partition=partitions, create_partition=True) # 读取csv文件数据写入table with open(self.folder + filename, "r", encoding="utf-8-sig") as csvfile: # 读取csv文件,返回的是迭代类型 print("Open file to read ...", self.folder + filename) reader = csv.reader(csvfile) columns = [] for r in reader: columns.append(r) print("---增加记录数量:---", len(columns)) csvfile.close() for column in columns: column_new = [column[0]] for col in column[2:]: if (col == 'None' or col == '' or col == 'nan'): column_new.append(float("nan")) else: column_new.append(float(col)) table_writer.write(column_new) table_writer.close()
def uploadToOdps(self, filename): odps_basic = OdpsClient() # 创建schema columns = [Column(name='CODE', type='string', comment='代码'), Column(name='eps_basic', type='double', comment='每股收益EPS-基本'), Column(name='eps_diluted', type='double', comment='每股收益EPS-稀释'), Column(name='eps_diluted2', type='double', comment='每股收益EPS-期末股本摊薄'), Column(name='eps_adjust', type='double', comment='每股收益EPS-最新股本摊薄'), Column(name='eps_exbasic', type='double', comment='每股收益EPS-扣除/基本'), Column(name='eps_exdiluted', type='double', comment='每股收益EPS-扣除/稀释'), Column(name='eps_exdiluted2', type='double', comment='每股收益EPS-扣除/期末股本摊薄'), Column(name='bps', type='double', comment='每股净资产BPS'), Column(name='bps_adjust', type='double', comment='每股净资产BPS-最新股本摊薄'), Column(name='bps_new', type='double', comment='每股净资产BPS(最新公告)'), Column(name='ocfps', type='double', comment='每股经营活动产生的现金流量净额'), Column(name='grps', type='double', comment='每股营业总收入'), Column(name='orps', type='double', comment='每股营业收入'), Column(name='surpluscapitalps', type='double', comment='每股资本公积'), Column(name='surplusreserveps', type='double', comment='每股盈余公积'), Column(name='undistributedps', type='double', comment='每股未分配利润'), Column(name='retainedps', type='double', comment='每股留存收益'), Column(name='cfps', type='double', comment='每股现金流量净额'), Column(name='ebitps', type='double', comment='每股息税前利润'), Column(name='fcffps', type='double', comment='每股企业自由现金流量'), Column(name='fcfeps', type='double', comment='每股股东自由现金流量'), Column(name='ebitdaps', type='double', comment='每股EBITDA'), Column(name='roe_avg', type='double', comment='净资产收益率ROE(平均)'), Column(name='roe_basic', type='double', comment='净资产收益率ROE(加权)'), Column(name='roe_diluted', type='double', comment='净资产收益率ROE(摊薄)'), Column(name='roe_deducted', type='double', comment='净资产收益率ROE(扣除/平均)'), Column(name='roe_exbasic', type='double', comment='净资产收益率ROE(扣除/加权)'), Column(name='roe_exdiluted', type='double', comment='净资产收益率ROE(扣除/摊薄)'), Column(name='roe_add', type='double', comment='净资产收益率ROE-增发条件'), Column(name='roa2', type='double', comment='总资产报酬率ROA'), Column(name='roa', type='double', comment='总资产净利率ROA'), Column(name='roic', type='double', comment='投入资本回报率ROIC'), Column(name='ROP', type='double', comment='人力投入回报率(ROP)'), Column(name='roe_yearly', type='double', comment='年化净资产收益率'), Column(name='roa2_yearly', type='double', comment='年化总资产报酬率'), Column(name='roa_yearly', type='double', comment='年化总资产净利率'), Column(name='netprofitmargin', type='double', comment='销售净利率'), Column(name='grossprofitmargin', type='double', comment='销售毛利率'), Column(name='cogstosales', type='double', comment='销售成本率'), Column(name='nptocostexpense', type='double', comment='成本费用利润率'), Column(name='expensetosales', type='double', comment='销售期间费用率'), Column(name='optoebt', type='double', comment='主营业务比率'), Column(name='profittogr', type='double', comment='净利润/营业总收入'), Column(name='optogr', type='double', comment='营业利润/营业总收入'), Column(name='ebittogr', type='double', comment='息税前利润/营业总收入'), Column(name='gctogr', type='double', comment='营业总成本/营业总收入'), Column(name='operateexpensetogr', type='double', comment='销售费用/营业总收入'), Column(name='adminexpensetogr', type='double', comment='管理费用/营业总收入'), Column(name='finaexpensetogr', type='double', comment='财务费用/营业总收入'), Column(name='impairtogr', type='double', comment='资产减值损失/营业总收入'), Column(name='impairtoOP', type='double', comment='资产减值损失/营业利润'), Column(name='ebitdatosales', type='double', comment='EBITDA/营业总收入'), Column(name='operateincometoebt', type='double', comment='经营活动净收益/利润总额'), Column(name='investincometoebt', type='double', comment='价值变动净收益/利润总额'), Column(name='nonoperateprofittoebt', type='double', comment='营业外收支净额/利润总额'), Column(name='taxtoebt', type='double', comment='所得税/利润总额'), Column(name='deductedprofittoprofit', type='double', comment='扣除非经常损益后的净利润/净利润'), Column(name='salescashintoor', type='double', comment='销售商品提供劳务收到的现金/营业收入'), Column(name='ocftoor', type='double', comment='经营活动产生的现金流量净额/营业收入'), Column(name='ocftooperateincome', type='double', comment='经营活动产生的现金流量净额/经营活动净收益'), Column(name='capitalizedtoda', type='double', comment='资本支出/折旧和摊销'), Column(name='ocftocf', type='double', comment='经营活动产生的现金流量净额占比'), Column(name='icftocf', type='double', comment='投资活动产生的现金流量净额占比'), Column(name='fcftocf', type='double', comment='筹资活动产生的现金流量净额占比'), Column(name='ocftosales', type='double', comment='经营性现金净流量/营业总收入'), Column(name='ocftoinveststockdividend', type='double', comment='现金满足投资比率'), Column(name='ocftoop', type='double', comment='现金营运指数'), Column(name='ocftoassets', type='double', comment='全部资产现金回收率'), Column(name='ocftodividend', type='double', comment='现金股利保障倍数'), Column(name='debttoassets', type='double', comment='资产负债率'), Column(name='deducteddebttoassets', type='double', comment='剔除预收款项后的资产负债率'), Column(name='longdebttolongcaptial', type='double', comment='长期资本负债率'), Column(name='longcapitaltoinvestment', type='double', comment='长期资产适合率'), Column(name='assetstoequity', type='double', comment='权益乘数'), Column(name='catoassets', type='double', comment='流动资产/总资产'), Column(name='currentdebttoequity', type='double', comment='流动负债权益比率'), Column(name='ncatoassets', type='double', comment='非流动资产/总资产'), Column(name='longdebttoequity', type='double', comment='非流动负债权益比率'), Column(name='tangibleassetstoassets', type='double', comment='有形资产/总资产'), Column(name='equitytototalcapital', type='double', comment='归属母公司股东的权益/全部投入资本'), Column(name='intdebttototalcap', type='double', comment='带息债务/全部投入资本'), Column(name='currentdebttodebt', type='double', comment='流动负债/负债合计'), Column(name='longdebtodebt', type='double', comment='非流动负债/负债合计'), Column(name='ncatoequity', type='double', comment='资本固定化比率'), Column(name='current', type='double', comment='流动比率'), Column(name='quick', type='double', comment='速动比率'), Column(name='cashratio', type='double', comment='保守速动比率'), Column(name='cashtocurrentdebt', type='double', comment='现金比率'), Column(name='ocftoquickdebt', type='double', comment='现金到期债务比'), Column(name='ocftointerest', type='double', comment='现金流量利息保障倍数'), Column(name='debttoequity', type='double', comment='产权比率'), Column(name='equitytodebt', type='double', comment='归属母公司股东的权益/负债合计'), Column(name='equitytointerestdebt', type='double', comment='归属母公司股东的权益/带息债务'), Column(name='tangassettointdebt', type='double', comment='有形资产/带息债务'), Column(name='tangibleassettodebt', type='double', comment='有形资产/负债合计'), Column(name='tangibleassettonetdebt', type='double', comment='有形资产/净债务'), Column(name='debttotangibleequity', type='double', comment='有形净值债务率'), Column(name='ebitdatodebt', type='double', comment='息税折旧摊销前利润/负债合计'), Column(name='ocftodebt', type='double', comment='经营活动产生的现金流量净额/负债合计'), Column(name='ocftointerestdebt', type='double', comment='经营活动产生的现金流量净额/带息债务'), Column(name='ocftoshortdebt', type='double', comment='经营活动产生的现金流量净额/流动负债'), Column(name='ocftonetdebt', type='double', comment='经营活动产生的现金流量净额/净债务'), Column(name='ocftolongdebt', type='double', comment='经营活动产生的现金流量净额/非流动负债'), Column(name='ocficftocurrentdebt', type='double', comment='非筹资性现金净流量与流动负债的比率'), Column(name='ocficftodebt', type='double', comment='非筹资性现金净流量与负债总额的比率'), Column(name='ebittointerest', type='double', comment='已获利息倍数(EBIT/利息费用)'), Column(name='longdebttoworkingcapital', type='double', comment='长期债务与营运资金比率'), Column(name='longdebttodebt', type='double', comment='长期负债占比'), Column(name='netdebttoev', type='double', comment='净债务/股权价值'), Column(name='interestdebttoev', type='double', comment='带息债务/股权价值'), Column(name='ebitdatointerestdebt', type='double', comment='EBITDA/带息债务'), Column(name='ebitdatointerest', type='double', comment='EBITDA/利息费用'), Column(name='tltoebitda', type='double', comment='全部债务/EBITDA'), Column(name='cashtostdebt', type='double', comment='货币资金/短期债务'), Column(name='turndays', type='double', comment='营业周期'), Column(name='invturndays', type='double', comment='存货周转天数'), Column(name='arturndays', type='double', comment='应收账款周转天数'), Column(name='apturndays', type='double', comment='应付账款周转天数'), Column(name='netturndays', type='double', comment='净营业周期'), Column(name='invturn', type='double', comment='存货周转率'), Column(name='arturn', type='double', comment='应收账款周转率'), Column(name='caturn', type='double', comment='流动资产周转率'), Column(name='operatecaptialturn', type='double', comment='营运资本周转率'), Column(name='faturn', type='double', comment='固定资产周转率'), Column(name='non_currentassetsturn', type='double', comment='非流动资产周转率'), Column(name='assetsturn1', type='double', comment='总资产周转率'), Column(name='turnover_ttm', type='double', comment='总资产周转率(TTM)'), Column(name='apturn', type='double', comment='应付账款周转率'), Column(name='yoyeps_basic', type='double', comment='基本每股收益(同比增长率)'), Column(name='yoyeps_diluted', type='double', comment='稀释每股收益(同比增长率)'), Column(name='yoyocfps', type='double', comment='每股经营活动产生的现金流量净额(同比增长率)'), Column(name='yoy_tr', type='double', comment='营业总收入(同比增长率)'), Column(name='yoy_or', type='double', comment='营业收入(同比增长率)'), Column(name='yoyop', type='double', comment='营业利润(同比增长率)'), Column(name='yoyop2', type='double', comment='营业利润(同比增长率)-2'), Column(name='yoyebt', type='double', comment='利润总额(同比增长率)'), Column(name='yoyprofit', type='double', comment='净利润(同比增长率)'), Column(name='yoynetprofit', type='double', comment='归属母公司股东的净利润(同比增长率)'), Column(name='yoynetprofit_deducted', type='double', comment='归属母公司股东的净利润-扣除非经常损益(同比增长率)'), Column(name='yoyocf', type='double', comment='经营活动产生的现金流量净额(同比增长率)'), Column(name='yoyroe', type='double', comment='净资产收益率(摊薄)(同比增长率)'), Column(name='maintenance', type='double', comment='资本项目规模维持率'), Column(name='yoy_cash', type='double', comment='货币资金增长率'), Column(name='yoy_fixedassets', type='double', comment='固定资产投资扩张率'), Column(name='yoy_equity', type='double', comment='净资产(同比增长率)'), Column(name='yoycf', type='double', comment='现金净流量(同比增长率)'), Column(name='yoydebt', type='double', comment='总负债(同比增长率)'), Column(name='yoy_assets', type='double', comment='总资产(同比增长率)'), Column(name='yoybps', type='double', comment='每股净资产(相对年初增长率)'), Column(name='yoyassets', type='double', comment='资产总计(相对年初增长率)'), Column(name='yoyequity', type='double', comment='归属母公司股东的权益(相对年初增长率)'), Column(name='growth_cagr_tr', type='double', comment='营业总收入复合年增长率'), Column(name='growth_cagr_netprofit', type='double', comment='净利润复合年增长率'), Column(name='growth_gr', type='double', comment='营业总收入(N年,增长率)'), Column(name='growth_gc', type='double', comment='营业总成本(N年,增长率)'), Column(name='growth_or', type='double', comment='营业收入(N年,增长率)'), Column(name='growth_op', type='double', comment='营业利润(N年,增长率)'), Column(name='growth_operateincome', type='double', comment='经营活动净收益(N年,增长率)'), Column(name='growth_investincome', type='double', comment='价值变动净收益(N年,增长率)'), Column(name='growth_ebt', type='double', comment='利润总额(N年,增长率)'), Column(name='growth_profit', type='double', comment='净利润(N年,增长率)'), Column(name='growth_netprofit', type='double', comment='归属母公司股东的净利润(N年,增长率)'), Column(name='growth_netprofit_deducted', type='double', comment='归属母公司股东的净利润-扣除非经常损益(N年,增长率)'), Column(name='growth_ocf', type='double', comment='经营活动产生的现金流量净额(N年,增长率)'), Column(name='growth_assets', type='double', comment='资产总计(N年,增长率)'), Column(name='growth_totalequity', type='double', comment='股东权益(N年,增长率)'), Column(name='growth_equity', type='double', comment='归属母公司股东的权益(N年,增长率)'), Column(name='growth_profittosales', type='double', comment='销售利润率(N年,增长率)'), Column(name='growth_roe', type='double', comment='净资产收益率(N年,增长率)'), Column(name='roe', type='double', comment='净资产收益率ROE'), Column(name='dupont_assetstoequity', type='double', comment='权益乘数(杜邦分析)'), Column(name='assetsturn', type='double', comment='杜邦分析-总资产周转率'), Column(name='dupont_np', type='double', comment='归属母公司股东的净利润/净利润'), # Column(name='profittogr', type='double', comment='净利润/营业总收入'), Column(name='dupont_taxburden', type='double', comment='净利润/利润总额'), Column(name='dupont_intburden', type='double', comment='利润总额/息税前利润'), # Column(name='ebittogr', type='double', comment='息税前利润/营业总收入'), Column(name='workingcapitaltoassets', type='double', comment='营运资本/总资产'), Column(name='retainedearningstoassets', type='double', comment='留存收益/总资产'), Column(name='EBITtoassets', type='double', comment='Z值预警-息税前利润(TTM)/总资产'), Column(name='equitytodebt2', type='double', comment='当日总市值/负债总计'), Column(name='bookvaluetodebt', type='double', comment='股东权益合计(含少数)/负债总计'), Column(name='revenuetoassets', type='double', comment='营业收入/总资产'), Column(name='z_score', type='double', comment='Z值'), Column(name='roe_ttm2', type='double', comment='净资产收益率(TTM)'), Column(name='roa2_ttm2', type='double', comment='总资产报酬率(TTM)'), Column(name='roic2_ttm', type='double', comment='投入资本回报率(TTM)'), Column(name='netprofittoassets', type='double', comment='总资产净利率-不含少数股东损益(TTM)'), Column(name='roic_ttm2', type='double', comment='投入资本回报率ROIC(TTM)'), Column(name='ebittoassets2', type='double', comment='息税前利润(TTM)/总资产'), Column(name='netprofitmargin_ttm2', type='double', comment='销售净利率(TTM)'), Column(name='grossprofitmargin_ttm2', type='double', comment='销售毛利率(TTM)'), Column(name='expensetosales_ttm2', type='double', comment='销售期间费用率(TTM)'), Column(name='profittogr_ttm2', type='double', comment='净利润/营业总收入(TTM)'), Column(name='optogr_ttm2', type='double', comment='营业利润/营业总收入(TTM)'), Column(name='gctogr_ttm2', type='double', comment='营业总成本/营业总收入(TTM)'), Column(name='optoor_ttm', type='double', comment='营业利润/营业收入(TTM)'), Column(name='netprofittoor_ttm', type='double', comment='归属母公司股东的净利润/营业收入(TTM)'), Column(name='operateexpensetogr_ttm2', type='double', comment='销售费用/营业总收入(TTM)'), Column(name='adminexpensetogr_ttm2', type='double', comment='管理费用/营业总收入(TTM)'), Column(name='finaexpensetogr_ttm2', type='double', comment='财务费用/营业总收入(TTM)'), Column(name='taxtoebt_ttm', type='double', comment='税项/利润总额(TTM)'), Column(name='impairtogr_ttm2', type='double', comment='资产减值损失/营业总收入(TTM)'), Column(name='operateincometoebt_ttm2', type='double', comment='经营活动净收益/利润总额(TTM)'), Column(name='investincometoebt_ttm2', type='double', comment='价值变动净收益/利润总额(TTM)'), Column(name='nonoperateprofittoebt_ttm2', type='double', comment='营业外收支净额/利润总额(TTM)'), Column(name='taxtoor_ttm', type='double', comment='营业利润/利润总额(TTM)'), Column(name='ebttoor_ttm', type='double', comment='利润总额/营业收入(TTM)'), Column(name='salescashintoor_ttm2', type='double', comment='销售商品提供劳务收到的现金/营业收入(TTM)'), Column(name='ocftoor_ttm2', type='double', comment='经营活动产生的现金流量净额/营业收入(TTM)'), Column(name='ocftooperateincome_ttm2', type='double', comment='经营活动产生的现金流量净额/经营活动净收益(TTM)'), Column(name='operatecashflowtoop_ttm', type='double', comment='经营活动产生的现金流量净额/营业利润(TTM)'), Column(name='gr_ttm2', type='double', comment='营业总收入(TTM)'), Column(name='gc_ttm2', type='double', comment='营业总成本(TTM)'), Column(name='or_ttm2', type='double', comment='营业收入(TTM)'), Column(name='cost_ttm2', type='double', comment='营业成本-非金融类(TTM)'), Column(name='expense_ttm2', type='double', comment='营业支出-金融类(TTM)'), Column(name='grossmargin_ttm2', type='double', comment='毛利(TTM)'), Column(name='operateexpense_ttm2', type='double', comment='销售费用(TTM)'), Column(name='adminexpense_ttm2', type='double', comment='管理费用(TTM)'), Column(name='finaexpense_ttm2', type='double', comment='财务费用(TTM)'), Column(name='periodexpense_t_ttm', type='double', comment='期间费用(TTM)'), Column(name='interestexpense_ttm', type='double', comment='利息支出(TTM)'), Column(name='minorityinterest_ttm', type='double', comment='少数股东损益(TTM)'), Column(name='impairment_ttm2', type='double', comment='资产减值损失(TTM)'), Column(name='operateincome_ttm2', type='double', comment='经营活动净收益(TTM)'), Column(name='investincome_ttm2', type='double', comment='价值变动净收益(TTM)'), Column(name='op_ttm2', type='double', comment='营业利润(TTM)'), Column(name='nonoperateprofit_ttm2', type='double', comment='营业外收支净额(TTM)'), Column(name='ebit_ttm2', type='double', comment='息税前利润(TTM反推法)'), Column(name='tax_ttm', type='double', comment='所得税(TTM)'), Column(name='ebt_ttm2', type='double', comment='利润总额(TTM)'), Column(name='profit_ttm2', type='double', comment='净利润(TTM)'), Column(name='netprofit_ttm2', type='double', comment='归属母公司股东的净利润(TTM)'), Column(name='deductedprofit_ttm2', type='double', comment='扣除非经常性损益后的净利润(TTM)'), Column(name='ebit2_ttm', type='double', comment='EBIT(TTM)'), Column(name='ebitda_ttm', type='double', comment='EBITDA(TTM反推法)'), Column(name='ebitda2_ttm', type='double', comment='EBITDA(TTM)'), Column(name='salescashin_ttm2', type='double', comment='销售商品提供劳务收到的现金(TTM)'), Column(name='operatecashflow_ttm2', type='double', comment='经营活动现金净流量(TTM)'), Column(name='investcashflow_ttm2', type='double', comment='投资活动现金净流量(TTM)'), Column(name='financecashflow_ttm2', type='double', comment='筹资活动现金净流量(TTM)'), Column(name='cashflow_ttm2', type='double', comment='现金净流量(TTM)'), Column(name='extraordinary', type='double', comment='非经常性损益'), Column(name='deductedprofit', type='double', comment='扣除非经常性损益后的净利润'), Column(name='grossmargin', type='double', comment='毛利'), Column(name='operateincome', type='double', comment='经营活动净收益'), Column(name='investincome', type='double', comment='价值变动净收益'), Column(name='ebit', type='double', comment='EBIT(反推法)'), Column(name='ebitda', type='double', comment='EBITDA(反推法)'), Column(name='ebit2', type='double', comment='EBIT'), Column(name='ebitda2', type='double', comment='EBITDA'), Column(name='researchanddevelopmentexpenses', type='double', comment='研发费用'), Column(name='investcapital', type='double', comment='全部投入资本'), Column(name='workingcapital', type='double', comment='营运资本'), Column(name='networkingcapital', type='double', comment='净营运资本'), Column(name='tangibleasset', type='double', comment='有形资产'), Column(name='retainedearnings', type='double', comment='留存收益'), Column(name='interestdebt', type='double', comment='带息债务'), Column(name='netdebt', type='double', comment='净债务'), Column(name='exinterestdebt_current', type='double', comment='无息流动负债'), Column(name='exinterestdebt_noncurrent', type='double', comment='无息非流动负债'), Column(name='fcff', type='double', comment='企业自由现金流量FCFF'), Column(name='fcfe', type='double', comment='股权自由现金流量FCFE'), Column(name='da_perid', type='double', comment='当期计提折旧与摊销'), Column(name='stm_issuingdate', type='string', comment='定期报告披露日期'), Column(name='stm_predict_issuingdate', type='string', comment='定期报告计划披露日期')] partitions = [Partition(name='pt', type='string', comment='the partition报告日期')] schema = Schema(columns=columns, partitions=partitions) odps_basic.create_table(self.table_name, schema) tablename = odps_basic.get_table(self.table_name) # 写入ODPS表 print(filename) pt_date = filename.strip(".csv") partitions = "pt=" + pt_date table_writer = tablename.open_writer(partition=partitions, create_partition=True) # 读取csv文件数据写入table with open(self.folder + filename, "r", encoding="utf-8-sig") as csvfile: # 读取csv文件,返回的是迭代类型 print("Open file to read ...", self.folder + filename) reader = csv.reader(csvfile) columns = [] for r in reader: columns.append(r) print("---增加记录数量:---", len(columns)) csvfile.close() for column in columns: try: column_new = [float("nan") if (col == 'None' or col == '') else float(col) for col in column[2:-2]] column_new.insert(0, column[0]) column_new.extend(column[-2:]) table_writer.write(column_new) except ValueError as e: print("stock:", column[0], filename) print(e) table_writer.close()
def uploadToOdps(self, filename): odps_basic = OdpsClient() # 创建schema columns = [ Column(name='CODE', type='string', comment='代码'), Column(name='monetary_cap', type='double', comment='货币资金'), Column(name='tradable_fin_assets', type='double', comment='以公允价值计量且其变动计入当期损益的金融资产'), Column(name='derivative_fin_assets', type='double', comment='衍生金融资产'), Column(name='notes_rcv', type='double', comment='应收票据'), Column(name='acct_rcv', type='double', comment='应收账款'), Column(name='oth_rcv', type='double', comment='其他应收款'), Column(name='prepay', type='double', comment='预付款项'), Column(name='dvd_rcv', type='double', comment='应收股利'), Column(name='int_rcv', type='double', comment='应收利息'), Column(name='inventories', type='double', comment='存货'), Column(name='consumptive_bio_assets', type='double', comment='消耗性生物资产'), Column(name='deferred_exp', type='double', comment='待摊费用'), Column(name='hfs_assets', type='double', comment='划分为持有待售的资产'), Column(name='non_cur_assets_due_within_1y', type='double', comment='一年内到期的非流动资产'), Column(name='settle_rsrv', type='double', comment='结算备付金'), Column(name='loans_to_oth_banks', type='double', comment='拆出资金'), Column(name='margin_acct', type='double', comment='融出资金'), Column(name='prem_rcv', type='double', comment='应收保费'), Column(name='rcv_from_reinsurer', type='double', comment='应收分保账款'), Column(name='rcv_from_ceded_insur_cont_rsrv', type='double', comment='应收分保合同准备金'), Column(name='red_monetary_cap_for_sale', type='double', comment='买入返售金融资产'), Column(name='tot_acct_rcv', type='double', comment='应收款项'), Column(name='oth_cur_assets', type='double', comment='其他流动资产'), # Column(name='cur_assets_gap', type='double', comment='流动资产差额(特殊报表科目)'), # Column(name='cur_assets_gap_detail', type='string', comment='流动资产差额说明(特殊报表科目)'), Column(name='tot_cur_assets', type='double', comment='流动资产合计'), Column(name='fin_assets_amortizedcost', type='double', comment='以摊余成本计量的金融资产'), Column(name='fin_assets_chg_compreh_inc', type='double', comment='以公允价值计量且其变动计入其他综合收益的金融资产'), Column(name='fin_assets_avail_for_sale', type='double', comment='可供出售金融资产'), Column(name='held_to_mty_invest', type='double', comment='持有至到期投资'), Column(name='invest_real_estate', type='double', comment='投资性房地产'), Column(name='long_term_eqy_invest', type='double', comment='长期股权投资'), Column(name='long_term_rec', type='double', comment='长期应收款'), Column(name='fix_assets', type='double', comment='固定资产'), Column(name='proj_matl', type='double', comment='工程物资'), Column(name='const_in_prog', type='double', comment='在建工程'), Column(name='fix_assets_disp', type='double', comment='固定资产清理'), Column(name='productive_bio_assets', type='double', comment='生产性生物资产'), Column(name='oil_and_natural_gas_assets', type='double', comment='油气资产'), Column(name='intang_assets', type='double', comment='无形资产'), Column(name='r_and_d_costs', type='double', comment='开发支出'), Column(name='goodwill', type='double', comment='商誉'), Column(name='long_term_deferred_exp', type='double', comment='长期待摊费用'), Column(name='deferred_tax_assets', type='double', comment='递延所得税资产'), Column(name='loans_and_adv_granted', type='double', comment='发放贷款及垫款'), Column(name='oth_non_cur_assets', type='double', comment='其他非流动资产'), # Column(name='non_cur_assets_gap', type='double', comment='非流动资产差额(特殊报表科目)'), # Column(name='non_cur_assets_gap_detail', type='string', comment='非流动资产差额说明(特殊报表科目)'), Column(name='tot_non_cur_assets', type='double', comment='非流动资产合计'), # Column(name='assets_gap', type='double', comment='资产差额(特殊报表科目)'), # Column(name='assets_gap_detail', type='string', comment='资产差额说明(特殊报表科目)'), Column(name='tot_assets', type='double', comment='资产总计'), Column(name='cash_deposits_central_bank', type='double', comment='现金及存放中央银行款项'), Column(name='agency_bus_assets', type='double', comment='代理业务资产'), Column(name='rcv_invest', type='double', comment='应收款项类投资'), Column(name='asset_dep_oth_banks_fin_inst', type='double', comment='存放同业和其它金融机构款项'), Column(name='precious_metals', type='double', comment='贵金属'), Column(name='rcv_ceded_unearned_prem_rsrv', type='double', comment='应收分保未到期责任准备金'), Column(name='rcv_ceded_claim_rsrv', type='double', comment='应收分保未决赔款准备金'), Column(name='rcv_ceded_life_insur_rsrv', type='double', comment='应收分保寿险责任准备金'), Column(name='rcv_ceded_lt_health_insur_rsrv', type='double', comment='应收分保长期健康险责任准备金'), Column(name='insured_pledge_loan', type='double', comment='保户质押贷款'), Column(name='cap_mrgn_paid', type='double', comment='存出资本保证金'), Column(name='independent_acct_assets', type='double', comment='独立账户资产'), Column(name='time_deposits', type='double', comment='定期存款'), Column(name='subr_rec', type='double', comment='应收代位追偿款'), Column(name='mrgn_paid', type='double', comment='存出保证金'), Column(name='seat_fees_exchange', type='double', comment='交易席位费'), Column(name='clients_cap_deposit', type='double', comment='客户资金存款'), Column(name='clients_rsrv_settle', type='double', comment='客户备付金'), Column(name='oth_assets', type='double', comment='其他资产'), Column(name='st_borrow', type='double', comment='短期借款'), Column(name='tradable_fin_liab', type='double', comment='以公允价值计量且其变动计入当期损益的金融负债'), Column(name='notes_payable', type='double', comment='应付票据'), Column(name='acct_payable', type='double', comment='应付账款'), Column(name='adv_from_cust', type='double', comment='预收账款'), Column(name='empl_ben_payable', type='double', comment='应付职工薪酬'), Column(name='taxes_surcharges_payable', type='double', comment='应交税费'), Column(name='tot_acct_payable', type='double', comment='应付款项'), Column(name='int_payable', type='double', comment='应付利息'), Column(name='dvd_payable', type='double', comment='应付股利'), Column(name='oth_payable', type='double', comment='其他应付款'), Column(name='acc_exp', type='double', comment='预提费用'), Column(name='deferred_inc_cur_liab', type='double', comment='递延收益-流动负债'), Column(name='hfs_liab', type='double', comment='划分为持有待售的负债'), Column(name='non_cur_liab_due_within_1y', type='double', comment='一年内到期的非流动负债'), Column(name='st_bonds_payable', type='double', comment='应付短期债券'), Column(name='borrow_central_bank', type='double', comment='向中央银行借款'), Column(name='deposit_received_ib_deposits', type='double', comment='吸收存款及同业存放'), Column(name='loans_oth_banks', type='double', comment='拆入资金'), Column(name='fund_sales_fin_assets_rp', type='double', comment='卖出回购金融资产款'), Column(name='handling_charges_comm_payable', type='double', comment='应付手续费及佣金'), Column(name='payable_to_reinsurer', type='double', comment='应付分保账款'), Column(name='rsrv_insur_cont', type='double', comment='保险合同准备金'), Column(name='acting_trading_sec', type='double', comment='代理买卖证券款'), Column(name='acting_uw_sec', type='double', comment='代理承销证券款'), Column(name='oth_cur_liab', type='double', comment='其他流动负债'), # Column(name='cur_liab_gap', type='double', comment='流动负债差额(特殊报表科目)'), # Column(name='cur_liab_gap_detail', type='string', comment='流动负债差额说明(特殊报表科目)'), Column(name='tot_cur_liab', type='double', comment='流动负债合计'), Column(name='lt_borrow', type='double', comment='长期借款'), Column(name='bonds_payable', type='double', comment='应付债券'), Column(name='lt_payable', type='double', comment='长期应付款'), Column(name='lt_empl_ben_payable', type='double', comment='长期应付职工薪酬'), Column(name='specific_item_payable', type='double', comment='专项应付款'), Column(name='provisions', type='double', comment='预计负债'), Column(name='deferred_tax_liab', type='double', comment='递延所得税负债'), Column(name='deferred_inc_non_cur_liab', type='double', comment='递延收益-非流动负债'), Column(name='oth_non_cur_liab', type='double', comment='其他非流动负债'), # Column(name='non_cur_liab_gap', type='double', comment='非流动负债差额(特殊报表科目)'), # Column(name='non_cur_liab_gap_detail', type='string', comment='非流动负债差额说明(特殊报表科目)'), Column(name='tot_non_cur_liab', type='double', comment='非流动负债合计'), # Column(name='liab_gap', type='double', comment='负债差额(特殊报表科目)'), # Column(name='liab_gap_detail', type='string', comment='负债差额说明(特殊报表科目)'), Column(name='tot_liab', type='double', comment='负债合计'), Column(name='liab_dep_oth_banks_fin_inst', type='double', comment='同业和其它金融机构存放款项'), Column(name='agency_bus_liab', type='double', comment='代理业务负债'), Column(name='cust_bank_dep', type='double', comment='吸收存款'), Column(name='claims_payable', type='double', comment='应付赔付款'), Column(name='dvd_payable_insured', type='double', comment='应付保单红利'), Column(name='deposit_received', type='double', comment='存入保证金'), Column(name='insured_deposit_invest', type='double', comment='保户储金及投资款'), Column(name='unearned_prem_rsrv', type='double', comment='未到期责任准备金'), Column(name='out_loss_rsrv', type='double', comment='未决赔款准备金'), Column(name='life_insur_rsrv', type='double', comment='寿险责任准备金'), Column(name='lt_health_insur_v', type='double', comment='长期健康险责任准备金'), Column(name='independent_acct_liab', type='double', comment='独立账户负债'), Column(name='prem_received_adv', type='double', comment='预收保费'), Column(name='pledge_loan', type='double', comment='质押借款'), Column(name='st_finl_inst_payable', type='double', comment='应付短期融资款'), Column(name='oth_liab', type='double', comment='其他负债'), Column(name='derivative_fin_liab', type='double', comment='衍生金融负债'), Column(name='cap_stk', type='double', comment='实收资本(或股本)'), Column(name='other_equity_instruments', type='double', comment='其他权益工具'), Column(name='other_equity_instruments_PRE', type='double', comment='其他权益工具:优先股'), Column(name='cap_rsrv', type='double', comment='资本公积金'), Column(name='surplus_rsrv', type='double', comment='盈余公积金'), Column(name='undistributed_profit', type='double', comment='未分配利润'), Column(name='tsy_stk', type='double', comment='库存股'), Column(name='other_compreh_inc_bs', type='double', comment='其他综合收益'), Column(name='special_rsrv', type='double', comment='专项储备'), Column(name='prov_nom_risks', type='double', comment='一般风险准备'), Column(name='cnvd_diff_foreign_curr_stat', type='double', comment='外币报表折算差额'), Column(name='unconfirmed_invest_loss_bs', type='double', comment='未确认的投资损失'), # Column(name='shrhldr_eqy_gap', type='double', comment='股东权益差额(特殊报表科目)'), # Column(name='shrhldr_eqy_gap_detail', type='string', comment='其他股东权益差额说明(特殊报表科目)'), Column(name='eqy_belongto_parcomsh', type='double', comment='归属母公司股东的权益'), Column(name='minority_int', type='double', comment='少数股东权益'), Column(name='tot_equity', type='double', comment='所有者权益合计'), # Column(name='liab_shrhldr_eqy_gap', type='double', comment='负债及股东权益差额(特殊报表科目)'), # Column(name='liab_shrhldr_eqy_gap_detail', type='string', comment='负债及股东权益差额说明(特殊报表科目)'), Column(name='tot_liab_shrhldr_eqy', type='double', comment='负债及股东权益总计') ] partitions = [ Partition(name='pt', type='string', comment='the partition报告日期') ] schema = Schema(columns=columns, partitions=partitions) odps_basic.create_table(self.table_name, schema) tablename = odps_basic.get_table(self.table_name) # 写入ODPS表 print(filename) pt_date = filename.strip(".csv") partitions = "pt=" + pt_date table_writer = tablename.open_writer(partition=partitions, create_partition=True) # 读取csv文件数据写入table with open(self.folder + filename, "r", encoding="utf-8-sig") as csvfile: # 读取csv文件,返回的是迭代类型 print("Open file to read ...", self.folder + filename) reader = csv.reader(csvfile) columns = [] for r in reader: columns.append(r) print("---增加记录数量:---", len(columns)) csvfile.close() for column in columns: column_new = [column[0]] for col in column[2:]: if (col == 'None' or col == '' or col == 'nan'): column_new.append(float("nan")) else: column_new.append(float(col)) table_writer.write(column_new) table_writer.close()