def create_save_db_row_ids_info(self, ijson): company_id = ijson['company_id'] project_id = ijson['project_id'] row_ids = ijson['row_ids'] db_path = config.Config.databuilder_path.format(company_id, project_id) conn, cur = conn_obj.sqlite_connection(db_path) tt_dbrid_sk_dct, tt_dbrid_dct, tt_dbrid_ch_dct = self.return_row_tt_db_row_ids( conn, cur, row_ids) conn.close() del ijson['row_ids'] ijson['db_row_ids'] = tt_dbrid_sk_dct import pyapi as pyf p_Obj = pyf.PYAPI() res = p_Obj.update_super_key_to_db_row(ijson) return res
def create_db_gv_txt(self, company_id, ijson, sec_cik, client_id, fye, project_name, project_display_name, new_cid, pid, rev_doc_map_d, doc_as_dct): print 'rev_doc_map_d', rev_doc_map_d cdn = ''.join([es for es in project_display_name.split()]) company_name = ijson['company_name'] model_number = ijson['model_number'] deal_id = ijson['deal_id'] project_id = ijson['project_id'] ijson['template_id'] = 10 company_id = "%s_%s" % (project_id, deal_id) db_file = "/mnt/eMB_db/company_info/compnay_info.db" conn, cur = conn_obj.sqlite_connection(db_file) sql = "select table_type, description from model_id_map" cur.execute(sql) res = cur.fetchall() conn.close() ttype_map_d = {} for r in res: tt, description = r ttype_map_d[tt] = description db_file = '/mnt/eMB_db/%s/%s/mt_data_builder.db' % (company_name, model_number) conn, cur = conn_obj.sqlite_connection(db_file) read_qry = 'SELECT table_type, group_id, display_name FROM tt_group_display_name_info;' cur.execute(read_qry) table_data = cur.fetchall() for row in table_data: ttable_type, tgroup_id, display_name = row[:] if display_name: ttype_map_d[ttable_type] = display_name final_ar, taxo_exists, found_open, pc_d, disp_name, empty_line_display = self.read_kpi_data( ijson) scale_dct = {'mn': 'Million', 'th': 'Thousands', 'bn': 'Billion'} vt_dct = {'MNUM': 'Monetary Number', 'BNUM': 'Cardinal Number'} txt_dir_path = '/var/www/html/DB_Model/{0}/{1}/Norm_Scale/'.format( company_name, cdn) list_txt_files = os.listdir(txt_dir_path) #fye = self.read_company_meta_info(company_name, model_number) res_data = self.cp_read_norm_data_mgmt(company_id) if company_id.split('_')[0] not in ['20', '1']: db_data_lst = [ '172.16.20.52', 'root', 'tas123', 'DataBuilder_%s' % (new_cid) ] m_conn, m_cur = self.mysql_connection(db_data_lst) else: db_data_lst = ['172.16.20.52', 'root', 'tas123', self.inc_db_name] m_conn, m_cur = self.mysql_connection(db_data_lst) header_lst = [ 'KBRACompanyID', 'Filing ID', 'PeriodEnding', 'PeriodDuration', 'PeriodType', 'Data_Point_Name', 'Mnemonic', 'Source Type', 'Source Section', 'Value: Full Form', 'Value: As Reported', 'Value: Unit As Reported', 'Value: Type', 'Value: Currency', 'Value: Duration', 'Value: Calculated' ] header_str = '\t'.join(header_lst) fp_path = '/var/www/html/KBRA_output/{0}/{1}/Data.txt'.format( pid, new_cid) f1 = open(fp_path, 'w') f1.write(header_str + '\n') taxo_template_map = self.get_mneumonic_info(ijson) gen_id_map_tt = self.read_table_type() done_docs = {} for txt_file in list_txt_files: txt_file = txt_file.strip() if '-P' not in txt_file: continue if '414-P.txt' != txt_file: continue gr_id = txt_file.split('-') gen_id = gr_id[0] if len(gr_id) == 3: gr_id = gr_id[1:-1] gr_id = ''.join(gr_id) else: gr_id = '' txt_path = os.path.join(txt_dir_path, txt_file) f = open(txt_path) txt_data = f.read() f.close() txt_data = eval(txt_data) import pyapi as pyf p_Obj = pyf.PYAPI() for table_type, tt_data in txt_data.iteritems(): #ijson['table_type'] = table_type #ijson['grpid'] = gr_id data = tt_data['data'] key_map = tt_data['key_map'] rc_keys = data.keys() rc_keys.sort() mneumonic_txt_d = {} map_d = {} ph_cols = {} for rc_tup in rc_keys: dt_dct = data[rc_tup] if rc_tup[1] == 0: mneumonic_txt_d[dt_dct[21]] = dt_dct[1] map_d[dt_dct[21]] = rc_tup[0] #map_d[('REV', rc_tup[0])] = dt_dct[21] else: ph_cols[rc_tup[1]] = dt_dct[10] #map_d.setdefault(map_d[('REV', rc_tup[0])], {}).append(rc_tup) phs = ph_cols.keys() phs.sort() for row in final_ar: mneumonic_txt = row[2] mneumonic_id = row[9] rowid = map_d.get(row[1], -1) for colid in phs: rc_tup = (rowid, colid) g_dt_dct = data.get(rc_tup, {}) formula = g_dt_dct.get(15, []) op_ttype = {} taxo_d = [] for f_r in formula[:1]: for r in f_r: if r['op'] == '=' or r['type'] != 't': continue op_ttype[r['ty']] = 1 taxo_d.append(r['ty']) if len(taxo_d) > 1: re_stated_all = [] else: re_stated_all = g_dt_dct.get(31, []) year_wise_d = {} idx_d = {} for r in re_stated_all: #print '\t', r if (r.get(2)): if r[2] not in idx_d: idx_d[r[2]] = len(idx_d.keys()) + 1 year_wise_d.setdefault(r.get(2), []).append(r) if not year_wise_d: if re_stated_all: continue print 'Error ', (rc_tup, re_stated_all) sys.exit() year_wise_d[1] = [g_dt_dct] idx_d[1] = 1 values = year_wise_d.keys() values.sort(key=lambda x: idx_d[x]) for v1 in values: dt_dct = year_wise_d[v1][0] formula = g_dt_dct.get(15, []) op_ttype = {} taxo_d = [] docids = {} scale_d = {} ttype_d = {} for f_r in formula[:1]: #print for r in f_r: #print r if r['op'] == '=' or r['type'] != 't': continue if r['label'] == 'Not Exists': continue if r['doc_id']: if str(r['doc_id'] ) not in rev_doc_map_d: print 'DOC NOT Matching ', [ mneumonic_txt, g_dt_dct.get(10), r['doc_id'], doc_as_dct.get( ('D', str(r['doc_id']))) ] xxxxxxxxxxxxxxxxxxxxxxxxx docids[rev_doc_map_d[str( r['doc_id'])]] = 1 if r.get('v'): scale_d[str(r['phcsv']['s'])] = 1 ttype_d[ttype_map_d[r['tt']]] = 1 op_ttype[ttype_map_d[r['tt']]] = 1 taxo_d.append(r['ty']) if len(taxo_d) > 1: gv_txt = dt_dct.get(2, '') else: gv_txt = dt_dct.get(38, '') tmpgv_txt = numbercleanup_obj.get_value_cleanup( gv_txt) if (gv_txt == '-' and not tmpgv_txt) or ( gv_txt == '--' and not tmpgv_txt) or (gv_txt == 'n/a' and not tmpgv_txt): tmpgv_txt = '-' if gv_txt and not tmpgv_txt: print 'Error Clean Value', [gv_txt, tmpgv_txt] sys.exit() gv_txt = tmpgv_txt #print #print (row, colid) #{1: '8,792.03', 2: '8792.03', 3: '13681', 4: '170', 5: [[383, 215, 43, 7]], 6: '2013FY', 7: u'Mn', 8: 'MNUM', 9: 'USD', 10: 'FY2013', 39: {'p': '2013', 's': 'TH', 'vt': 'MNUM', 'c': 'USD', 'pt': 'FY'}, 34: '', 14: {'d': '13681', 'bbox': [[46, 215, 27, 7]], 'v': 'Amount', 'x': 'x28_170@0_6', 'txt': u'Tier1capital - Amount', 't': '219'}, 40: '', 24: '219', 25: 'x29_170@0_11', 26: 'PeriodicFinancialStatement-FY2013', 38: '$ 8,792,035'} #if len() clean_value = dt_dct.get(2, '') cln_val = copy.deepcopy(clean_value) currency = dt_dct.get(9, '') if len(taxo_d) > 1: scale = dt_dct.get(7, '') else: scale = dt_dct.get(39, {}).get('s', '') if not scale: scale = dt_dct.get(7, '') scale1 = dt_dct.get(7, '') tmp_ttype = table_type calc_value = dt_dct.get(41, '') if op_ttype: #len(op_ttype.keys()) == 1: tmp_ttype = op_ttype.keys()[0] value_type = dt_dct.get(8, '') restated_lst = dt_dct.get(40, []) rep_rst_flg = 'Original' if restated_lst == 'Y': rep_rst_flg = 'Restated' if len(values) > 1 and idx_d[v1] > 1: rep_rst_flg = 'Restated' ph_info = ph_cols[colid] pdate, period_type, period = '', '', '' if ph_info: print[fye, ph_info, dt_dct.get(3, '')] pdate = self.read_period_ending(fye, ph_info) #print pdate period_type = ph_info[:-4] period = ph_info[-4:] #print [ph_info, pdate] doc_id = dt_dct.get(3, '') doc_data = dt_dct.get(27, []) if doc_id: if str(doc_id) not in rev_doc_map_d: print 'DOC NOT Matching ', [ doc_id, doc_as_dct.get(('D', str(doc_id))) ] xxxxxxxxxxxxxxxxxxxxxxxxx doc_id = rev_doc_map_d[str(doc_id)] #if doc_data:pass #doc_data = doc_data[0][0] #doc_id = doc_data['doc_id'] if len(taxo_d) > 1: # or rc_tup not in data: calc_value = 'Y' if len(ttype_d.keys()) > 1: tmp_ttype = '' if len(scale_d.keys()) > 1: scale = '' scale1 = '' gv_txt = '' if len(docids.keys()) > 1: doc_id = '' if rc_tup not in data: tmp_ttype = '' scale = '' scale1 = '' gv_txt = '' doc_id = '' if str(scale1) not in ('1', ''): tv, factor = sconvert_obj.convert_frm_to_1( scale.lower(), '1', clean_value if not gv_txt else numbercleanup_obj.get_value_cleanup( gv_txt)) #sys.exit() if factor: clean_value = float(tv.replace(',', '')) clean_value = str(clean_value) clean_value = p_Obj.convert_floating_point( clean_value) clean_value = clean_value.replace(',', '') if not clean_value: rep_rst_flg = '' if len(taxo_d) > 1 and len(scale_d.keys( )) > 1: # or rc_tup not in data: scale = '' scale1 = '' table_id = dt_dct.get(24, '') info_ref = '' if table_id and doc_id: dpg = res_data.get(int(table_id), '') print[dpg, table_id] info_ref = self.read_from_info( m_cur, m_conn, dpg, new_cid) #mneumonic_txt = mneumonic_txt.decode('utf-8') #print [mneumonic_txt], mneumonic_txt try: mneumonic_txt = mneumonic_txt.encode('utf-8') except: mneumonic_txt = str(mneumonic_txt) #if value_type != 'Percentage':value_type = 'Absolute' if info_ref: if info_ref != 'From Sentence': info_ref = 'Table' if info_ref == 'From Sentence': info_ref = 'Text' #if len(taxo_d) > 1:# or rc_tup not in data: # scale = '' vaur = scale_dct.get(scale.lower(), scale) vt_c = vt_dct.get(value_type, value_type) print[ 'SSSSSSSSSS', vt_c, value_type, mneumonic_txt ], '\n' tmpcalc_value = 'false' if calc_value == 'Y': tmpcalc_value = 'true' vaur = '' gv_txt = '' dcname = '' if doc_id: dcname = '%s-%s' % (new_cid, doc_id) if 0: #len(docids.keys()) > 1: print 'Error More than One docs in formula ', [ mneumonic_txt, mneumonic_id, pdate, str(period_type), docids ] xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx if dcname == '' and gv_txt: # and len(docids.keys()) == 1: print 'Error Document Mapping not found ', [ doc_id ] xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx done_docs[dcname] = 1 dt_lst = [ client_id, dcname, pdate, str(period_type), rep_rst_flg, mneumonic_txt, mneumonic_id, info_ref, str(tmp_ttype), str(clean_value), str(gv_txt), str(vaur), vt_c, currency, str(period_type), tmpcalc_value ] if clean_value and (not vaur.strip() ) and tmpcalc_value == 'false': print 'Error empty scale for reported value ', dt_lst, scale_d, taxo_d xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx print 'MMMMMMMMMMM', dt_lst, scale_d print[ taxo_d, dt_dct.get(2, ''), dt_dct.get(38, '') ], '\n' info_str = '\t'.join(dt_lst) f1.write(info_str + '\n') f1.close() return fp_path, done_docs
def insert_model(self, ijson): path = ijson['path'] industry, project, user, ttype = ijson.get('industry', ''), ijson.get( 'project', ''), ijson.get('user', ''), ijson.get('type', '') import sqlite_api db_path = config_obj.Template_storage sql_obj = sqlite_api.sqlite_api(db_path) if ijson.get('template_name', ''): template_name = ijson['template_name'] else: template_name = self.clean_name(path.split('/')[-1].split('.')[0]) only_temp = 'N' if not ijson.get('path', ''): data = {} else: only_temp = 'Y' data = self.get_sheets_new(ijson) temp_extra_info = {} temp_extra_info['activesheet'] = data.get('activesheet') if ijson.get('template_id', ''): template_id = int(ijson['template_id']) sql_obj.deleted_temp_info(template_id) else: template_id = sql_obj.max_template_id() sql_obj.insert_template(template_id, template_name, industry, project, ttype, user, json.dumps(temp_extra_info)) if only_temp == 'N': return [{'message': 'done'}] sheets = [] rows = [] sheet_id = 1 taxo_creation = 'N' for sheet_info in data['sheets']: print 'sheete', sheet_info['name'] sheet = self.clean_name(sheet_info['name']) if sheet == config_obj.taxonomy_sheet: taxo_creation = 'Y' extra_info = copy.deepcopy(sheet_info) del extra_info['rows'] if sheet_info['name'] == data['activeSheet']: extra_info['activesheet'] = 1 sheets.append((template_id, template_name, sheet_id, sheet, user, json.dumps(extra_info))) for row in sheet_info['rows']: for colinfo in row['cells']: col = colinfo['index'] fromular_str = colinfo.get( 'formula', '') #self.get_formula_str(col, colinfo) cell_alph = self.find_alp(col) level_id = colinfo.get('level', '') rows.append((template_id, sheet_id, row['index'], col, str(colinfo), 'taxonomy', str(fromular_str), cell_alph, level_id)) sheet_id = sheet_id + 1 sql_obj = sqlite_api.sqlite_api(db_path) sql_obj.insert_sheets(sheets) sql_obj.insert_sheet_data(rows) if taxo_creation == 'Y' and ttype == 'fixed': import pyapi py_obj = pyapi.PYAPI() print 'insertion code', template_id, project py_obj.insert_taxo_to_db(project, template_id) return [{'message': 'done'}]
import os, sys, json, copy, sqlite3 import datetime import utils.convert as scale_convert sconvert_obj = scale_convert.Convert() import utils.numbercleanup as numbercleanup numbercleanup_obj = numbercleanup.numbercleanup() import compute_period_and_date c_ph_date_obj = compute_period_and_date.PH() import db.get_conn as get_conn conn_obj = get_conn.DB() import pyapi as pyf p_Obj = pyf.PYAPI() class Generate_Project_Txt(object): def __init__(self): self.month_map = { 'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6, 'July': 7, 'August': 8, 'September': 9, 'October': 10, 'November': 11, 'December': 12 }
import pyapi obj = pyapi.PYAPI() ijson = {"company_id": "1053729", "db_name":"DataBuilder_1053729", "table_id":"4#3#11","doc_id":4, "project_id":5,"template_id":4,"RUN":"Y"} obj.auto_run_applicator(ijson)