def parse(self, pdf_path, data_date=None): self.pdf_path = pdf_path self.doc = fitz.Document(pdf_path) self.date = self.find_data_date(data_date) dataframes = [ self.process_page(page, page_num) for page_num, page in enumerate(self.doc.pages()) ] dataframes = [ dataframe for dataframe in dataframes if dataframe is not None ] if not dataframes: raise ParseError( self.pdf_path, 'No data found. Probably problems with encoding.') if self.unification_info: if self.overlapping_info and self.overlapping_info['values']: result = ParsingResult(unification_info=self.unification_info, overlapping_info=self.overlapping_info) else: result = ParsingResult(unification_info=self.unification_info) return result if self.overlapping_info and self.overlapping_info['values']: raise UniqueError(self.overlapping_info) if self.warnings: return ParsingResult(warnings=self.warnings) return None
def __render_response_for_notoria(request, overlapping): overlap_bs = [] overlap_fr = [] overlap_dp = [] error_bs = None error_fr = None error_dp = None get_existing_data_func_and_overlap = { 'Assets': (get_existing_data_balance_sheet, overlap_bs), 'EquityLiabilities': (get_existing_data_balance_sheet, overlap_bs), 'AssetsCategories': (get_existing_data_balance_sheet, overlap_bs), 'EquityLiabilitiesCategories': (get_existing_data_balance_sheet, overlap_bs), 'FinancialRatios': (get_existing_financial_ratios_for_parsed_file, overlap_fr), 'DuPontIndicators': (get_existing_dupont_indicators_for_parsed_file, overlap_dp) } for data in overlapping: get_existing, overlap = get_existing_data_func_and_overlap[ data['table_name']] existing = get_existing(data) data['exists'] = list(map(lambda x: list(x), existing)) overlap.append(data) if overlap_bs: error_bs = UniqueError(*overlap_bs) if overlap_fr: error_fr = UniqueError(*overlap_fr) if overlap_dp: error_dp = UniqueError(*overlap_dp) return render( request, 'import/notoria.html', { 'form': NotoriaImportForm(), 'error_bs': error_bs, 'error_fr': error_fr, 'error_dp': error_dp, 'overlap_bs': json.dumps(overlap_bs), 'overlap_fr': json.dumps(overlap_fr), 'overlap_dp': json.dumps(overlap_dp) })
def __render_response_for_stooq(request, overlapping): for data in overlapping: existing = get_existing_data_stock_quotes(data) data['exists'] = list(map(lambda x: list(x), existing)) return render( request, 'import/stooq.html', { 'form': StooqImportForm(), 'error': UniqueError(overlapping[0]), 'overlap': json.dumps(overlapping) })
def parse(self, pdf_path, data_date=None): self.pdf_path = pdf_path self.doc = fitz.Document(pdf_path) if data_date: self.date = data_date self.data_year = data_date.year self.yearbook_year = data_date.year + 1 else: self.find_data_date() self.date = date(int(self.data_year), month=12, day=31) self.skip = [ f'Spółki według wartości rynkowej (na koniec {self.data_year} r.)', f'Spółki według wartości rynkowej (na koniec {self.data_year} r.) (cd.)', f'Spółki o największej wartości rynkowej na koniec {self.data_year} r.' ] self.stop_parsing = [ 'Razem spółki zagraniczne', 'Spółki zagraniczne razem:', f'{self.yearbook_year} Rocznik Giełdowy' ] dataframes = [ self.process_page(page, page_num) for page_num, page in enumerate(self.doc.pages()) ] dataframes = [ dataframe for dataframe in dataframes if dataframe is not None ] if not dataframes: raise ParseError( self.pdf_path, 'No data found. Probably problems with encoding.') if self.unification_info: if self.overlapping_info and self.overlapping_info['values']: result = ParsingResult(unification_info=self.unification_info, overlapping_info=self.overlapping_info) else: result = ParsingResult(unification_info=self.unification_info) return result if self.overlapping_info and self.overlapping_info['values']: raise UniqueError(self.overlapping_info) if self.warnings: return ParsingResult(warnings=self.warnings) return None
def parse(self, path, data_date=None): self.path = path self.workbook = xlrd.open_workbook(self.path, on_demand=True) self.date, sheet_names = self.get_date_and_sheet_names(data_date) data = [self.parse_sheet(sheet_name) for sheet_name in sheet_names] data = [d for d in data if d] if not data: raise ParseError(self.path, 'No data found.') if self.unification_info: if self.overlapping_info and self.overlapping_info['values']: result = ParsingResult(unification_info=self.unification_info, overlapping_info=self.overlapping_info) else: result = ParsingResult(unification_info=self.unification_info) return result if self.overlapping_info and self.overlapping_info['values']: raise UniqueError(self.overlapping_info) return None
def merge_companies(self, valid_data): chosen_from_company = valid_data.get('chosen_from') chosen_to_company = valid_data.get('chosen_to') chosen_from_name = chosen_from_company.name chosen_from = chosen_from_company.id chosen_to_name = chosen_to_company.name chosen_to = chosen_to_company.id merge_assets(chosen_from, chosen_to) merge_assets_categories(chosen_from, chosen_to) merge_equity_liabilities(chosen_from, chosen_to) merge_equity_liabilities_categories(chosen_from, chosen_to) merge_financial_ratios(chosen_from, chosen_to) merge_dupont_indicators(chosen_from, chosen_to) merge_stock_quotes(chosen_from, chosen_to) merge_market_values(chosen_from, chosen_to) ekd_section = chosen_from_company.ekd_section_id ekd_class = chosen_from_company.ekd_class_id if ekd_section is not None and ekd_class is not None: ekd_section = ekd_section.value ekd_class = ekd_class.value overlapping_assets = Assets.objects.filter( company_id=chosen_from).order_by('date') overlapping_assets_categories = AssetsCategories.objects.filter( company_id=chosen_from).order_by('date') overlapping_equity_liabilities = EquityLiabilities.objects.filter( company_id=chosen_from).order_by('date') overlapping_equity_liabilities_categories = EquityLiabilitiesCategories.objects.filter( company_id=chosen_from).order_by('date') overlapping_financial_ratios = FinancialRatios.objects.filter( company_id=chosen_from).order_by('period_start', 'period_end') overlapping_dupont_indicators = DuPontIndicators.objects.filter( company_id=chosen_from).order_by('period_start', 'period_end') overlapping_stock_quotes = StockQuotes.objects.filter( company_id=chosen_from).order_by('date', 'interval') overlapping_balance_data = [] overlapping_financial_ratios_data = [] overlapping_dupont_indicators_data = [] overlapping_stock_quotes_data = [] def add_overlapping_balance(model, overlapping_values, overlapping_data): merge_to = model.objects.filter( company_id=chosen_to, date__in=overlapping_values.values("date")).order_by( 'date').values_list(flat=True) merge_to_values = list( map(lambda x: list(x.values())[1:], merge_to.values())) merge_from_values = list( map(lambda x: list(x.values())[1:], overlapping_values.values_list(flat=True).values())) index = 0 indexes = deque() for f, t in zip(merge_from_values, merge_to_values): if f[1:] == list(t)[1:]: indexes.appendleft(index) index += 1 for i in indexes: del merge_from_values[i] del merge_to_values[i] if merge_from_values: result = { "table_name": model.objects.model._meta.db_table, "columns": [ f.get_attname_column()[1] for f in model._meta.get_fields() if f.name != 'id' ], "values": merge_from_values, "exists": merge_to_values } overlapping_data.append(result) return overlapping_data def add_overlapping_ratios(model, overlapping_values, overlapping_data): overlapping_dates = overlapping_values.values_list( "period_start", "period_end") if model is FinancialRatios: merge_to_values = get_existing_data_financial_ratios( chosen_to, overlapping_dates) elif model is DuPontIndicators: merge_to_values = get_existing_data_dupont_indicators( chosen_to, overlapping_dates) merge_to_values = list(merge_to_values) merge_from_values = list( map(lambda x: list(x.values())[1:], overlapping_values.values_list(flat=True).values())) index = 0 indexes = deque() for f, t in zip(merge_from_values, merge_to_values): if f[1] == datetime.strptime( t[1], '%Y-%m-%d').date() and f[2] == datetime.strptime( t[2], '%Y-%m-%d').date(): if f[3:] == list(t)[3:]: indexes.appendleft(index) index += 1 for i in indexes: del merge_from_values[i] del merge_to_values[i] if merge_from_values: result = { "table_name": model.objects.model._meta.db_table, "columns": [ f.get_attname_column()[1] for f in model._meta.get_fields() if f.name != 'id' ], "values": merge_from_values, "exists": merge_to_values } overlapping_data.append(result) return overlapping_data def add_overlapping_stock_quotes(model, overlapping_values, overlapping_data): overlapping_dates_intervals = overlapping_values.values_list( "date", "interval") if model is StockQuotes: merge_to_values = get_existing_data_stock_quotes_merge( chosen_to, overlapping_dates_intervals) merge_to_values = list(merge_to_values) merge_from_values = list( map(lambda x: list(x.values())[1:], overlapping_values.values_list(flat=True).values())) index = 0 indexes = deque() for f, t in zip(merge_from_values, merge_to_values): if f[1] == datetime.strptime( t[1], '%Y-%m-%d').date() and f[9] == t[9]: if f[2:9] == list(t)[2:9]: indexes.appendleft(index) index += 1 for i in indexes: del merge_from_values[i] del merge_to_values[i] if merge_from_values: result = { "table_name": model.objects.model._meta.db_table, "columns": [ f.get_attname_column()[1] for f in model._meta.get_fields() if f.name != 'id' ], "values": merge_from_values, "exists": merge_to_values } overlapping_data.append(result) return overlapping_data if overlapping_assets: overlapping_balance_data = add_overlapping_balance( Assets, overlapping_assets, overlapping_balance_data) if overlapping_assets_categories: overlapping_balance_data = add_overlapping_balance( AssetsCategories, overlapping_assets_categories, overlapping_balance_data) if overlapping_equity_liabilities: overlapping_balance_data = add_overlapping_balance( EquityLiabilities, overlapping_equity_liabilities, overlapping_balance_data) if overlapping_equity_liabilities_categories: overlapping_balance_data = add_overlapping_balance( EquityLiabilitiesCategories, overlapping_equity_liabilities_categories, overlapping_balance_data) if overlapping_financial_ratios: overlapping_financial_ratios_data = add_overlapping_ratios( FinancialRatios, overlapping_financial_ratios, overlapping_financial_ratios_data) if overlapping_dupont_indicators: overlapping_dupont_indicators_data = add_overlapping_ratios( DuPontIndicators, overlapping_dupont_indicators, overlapping_dupont_indicators_data) if overlapping_stock_quotes: overlapping_stock_quotes_data = add_overlapping_stock_quotes( StockQuotes, overlapping_stock_quotes, overlapping_stock_quotes_data) overlapping_market_values_data = self.add_overlapping_market_values( chosen_from, chosen_to) delete_from_assets(chosen_from) delete_from_assets_categories(chosen_from) delete_from_equity_liabilities(chosen_from) delete_from_equity_liabilities_categories(chosen_from) delete_from_stock_quotes(chosen_from) delete_from_financial_ratios(chosen_from) delete_from_dupont_indicators(chosen_from) delete_from_market_values(chosen_from) delete_company(chosen_from) update_company( chosen_to, company_unification.Company( name=chosen_from_company.name, ticker=chosen_from_company.ticker, isin=chosen_from_company.isin, bloomberg=chosen_from_company.bloomberg, ekd_section=ekd_section, ekd_class=ekd_class)) if overlapping_balance_data or overlapping_financial_ratios_data or overlapping_dupont_indicators_data \ or overlapping_stock_quotes_data or overlapping_market_values_data: error_bs = [] error_fr = [] error_dp = [] error_stock = [] error_market_values = [] overlap_bs = [] overlap_fr = [] overlap_dp = [] overlap_stock = [] overlap_market_values = [] if overlapping_balance_data: error_bs = UniqueError(*overlapping_balance_data) overlap_bs = json.dumps(error_bs.overlapping_data, default=str) if overlapping_financial_ratios_data: error_fr = UniqueError(*overlapping_financial_ratios_data) overlap_fr = json.dumps(error_fr.overlapping_data, default=str) if overlapping_dupont_indicators_data: error_dp = UniqueError(*overlapping_dupont_indicators_data) overlap_dp = json.dumps(error_dp.overlapping_data, default=str) if overlapping_stock_quotes_data: error_stock = UniqueError(*overlapping_stock_quotes_data) overlap_stock = json.dumps(error_stock.overlapping_data, default=str) if overlapping_market_values_data: error_market_values = UniqueError( *overlapping_market_values_data) overlap_market_values = json.dumps( overlapping_market_values_data, default=str) return render( self.request, 'manage/home.html', { "chosen_from": chosen_from_name, "chosen_to": chosen_to_name, "error_bs": error_bs, "overlap_bs": overlap_bs, "error_fr": error_fr, "overlap_fr": overlap_fr, "error_dp": error_dp, "overlap_dp": overlap_dp, "error_stock": error_stock, "overlap_stock": overlap_stock, 'error_mv': error_market_values, 'overlap_mv': overlap_market_values }) else: messages.success(self.request, self.success_message) return HttpResponseRedirect(self.get_success_url())
def parse(self, path, end_date=None): self.path = path self.workbook = xlrd.open_workbook(path, on_demand=True) sheet_name = 'kap' excel_sheet = self.workbook.sheet_by_name(sheet_name) start_row = 8 curr_row = start_row isin_column = 1 headers_check_row = 4 name_column = 2 capitalization_column = 4 values = [] milion = 1e6 unification_info = [] overlapping_info = {} end_date = self.get_date(end_date) if "isin" in excel_sheet.cell( headers_check_row, isin_column).value.lower() and "nazwa" in excel_sheet.cell( headers_check_row, name_column).value.lower(): while curr_row < excel_sheet.nrows: isin = excel_sheet.cell(curr_row, isin_column).value name = excel_sheet.cell(curr_row, name_column).value value = excel_sheet.cell(curr_row, capitalization_column).value * milion save_value_to_database(name, isin, value, end_date, overlapping_info, unification_info, self.save, self.override) curr_row = curr_row + 1 elif "nazwa" in excel_sheet.cell( headers_check_row, isin_column).value.lower( ): # case where name is in place of isin name_column = 1 capitalization_column = 3 while curr_row < excel_sheet.nrows: name = excel_sheet.cell(curr_row, name_column).value value = excel_sheet.cell(curr_row, capitalization_column).value * milion isin = None save_value_to_database(name, isin, value, end_date, overlapping_info, unification_info, self.save, self.override) curr_row = curr_row + 1 else: raise ParseError( path, '1: "ISIN" should be in B5 cell and "Nazwa" should be in C5 cell or 2: "Nazwa" ' 'should be in B5 cell') if unification_info: if overlapping_info and overlapping_info['values']: result = ParsingResult(unification_info=unification_info, overlapping_info=overlapping_info) else: result = ParsingResult(unification_info=unification_info) return result if overlapping_info and overlapping_info['values']: raise UniqueError(overlapping_info) return None
def parse_ratios(self, path, sheet_name, ratio_name, table_name, override=False, save=False): function_mapping = { 'FinancialRatios': exactly_same_financial_ratios, 'DuPontIndicators': exactly_same_dupont_indicators } if sheet_name not in self.available_sheets: raise ParseError(path, "Available sheet names: QS, YS") excel_sheet = get_sheet(path, sheet_name) is_directory_import = override or save company_id, unification_info = self.get_company_id_balance_sheet( path, is_directory_import) curr_row = 200 if ratio_name == 'DuPont indicators': curr_row = 225 curr_column = 2 ratios = [company_id] overlapping_ratios = {} while curr_row < excel_sheet.nrows: if excel_sheet.cell(curr_row, curr_column).value == ratio_name: attributes_column = curr_column curr_column += 1 dates_row = curr_row + 1 curr_row += 2 attributes = ['CompanyID', 'Period start', 'Period end'] while curr_column < excel_sheet.ncols: date_value = excel_sheet.cell(dates_row, curr_column).value if not date_value: curr_column += 1 continue period_start, period_end = get_start_end_date(date_value) ratios += [period_start, period_end] while excel_sheet.cell(curr_row, attributes_column).value != '': attribute = excel_sheet.cell(curr_row, attributes_column).value curr_value = excel_sheet.cell(curr_row, curr_column).value attributes.append(attribute) insert_float_value(ratios, curr_value) curr_row += 1 if unification_info is not None: unification_info.add_data(table_name=table_name, columns=attributes, data=ratios) else: if override: common.DAL.db_queries_insert.replace_values( table_name=table_name, columns=attributes, values=ratios) elif save: common.DAL.db_queries_insert.insert_values( table_name=table_name, columns=attributes, values=ratios) else: try: common.DAL.db_queries_insert.insert_values_without_ignore( table_name=table_name, columns=attributes, values=ratios) except IntegrityError: if not function_mapping[table_name](attributes, ratios): if not overlapping_ratios: init_overlapping_info( overlapping_ratios, table_name, attributes) overlapping_ratios["values"].append(ratios) attributes = ['CompanyID', 'Period start', 'Period end'] ratios = [company_id] curr_column += 1 curr_row = dates_row + 1 break curr_row += 1 if unification_info is not None and unification_info.data: return ParsingResult([unification_info]) if overlapping_ratios and not is_directory_import: raise UniqueError(overlapping_ratios) return None
def parse_balance_sheet(self, path, sheet_name, override=False, save=False): if sheet_name not in self.available_sheets: raise ParseError(path, "Available sheet names: QS, YS") excel_sheet = get_sheet(path, sheet_name) is_directory_import = override or save company_id, unification_info = self.get_company_id_balance_sheet( path, is_directory_import) curr_row = 0 curr_column = 2 assets = [company_id] assets_categories = [company_id] equity_liabilities = [company_id] equity_liabilities_categories = [company_id] overlapping_assets = {} overlapping_assets_categories = {} overlapping_equity_liabilities = {} overlapping_equity_liabilities_categories = {} while curr_row < excel_sheet.nrows: if excel_sheet.cell(curr_row, curr_column).value == 'Balance sheet': attributes_column = curr_column curr_column += 1 dates_row = curr_row + 1 sum_row = dates_row + 1 curr_row += 3 assets_attributes = ['CompanyID', 'Date'] equity_liabilities_categories_attributes = [ 'CompanyID', 'Date' ] assets_categories_attributes = ['CompanyID', 'Date'] equity_liabilities_attributes = ['CompanyID', 'Date'] different_assets_exist = False while curr_column < excel_sheet.ncols: # check if data for that period exists if not excel_sheet.cell(sum_row, curr_column).value: curr_column += 1 continue # add date to list date_value = excel_sheet.cell(dates_row, curr_column).value assets.append(date_value) assets_categories.append(date_value) equity_liabilities.append(date_value) equity_liabilities_categories.append(date_value) # iterate from the first element until assets end while excel_sheet.cell(curr_row, attributes_column).value != '': attribute = excel_sheet.cell(curr_row, attributes_column).value curr_value = excel_sheet.cell(curr_row, curr_column).value if attribute in self.assets_categories: assets_categories_attributes.append(attribute) insert_float_value(assets_categories, curr_value) elif attribute in self.detailed_assets: assets_attributes.append(attribute) insert_float_value(assets, curr_value) else: different_assets_exist = True curr_row += 1 if different_assets_exist: for a in self.detailed_assets: if a not in assets_attributes: assets_attributes.append(a) insert_float_value(assets, '') for ac in self.assets_categories: if ac not in assets_categories_attributes: assets_categories_attributes.append(ac) insert_float_value(assets_categories, '') curr_row += 2 # omit headers and iterate until equities and liabilities end different_eq_exist = False while excel_sheet.cell( curr_row, attributes_column).value != 'Date of publication': attribute = excel_sheet.cell(curr_row, attributes_column).value curr_value = excel_sheet.cell(curr_row, curr_column).value if attribute in self.equity_liabilities_categories: equity_liabilities_categories_attributes.append( attribute) insert_float_value(equity_liabilities_categories, curr_value) elif attribute in self.detailed_equity_liabilities: equity_liabilities_attributes.append(attribute) insert_float_value(equity_liabilities, curr_value) else: different_eq_exist = True curr_row += 1 if different_eq_exist: for e in self.detailed_equity_liabilities: if e not in equity_liabilities_attributes: equity_liabilities_attributes.append(e) insert_float_value(equity_liabilities, '') for eqc in self.equity_liabilities_categories: if eqc not in equity_liabilities_categories_attributes: equity_liabilities_categories_attributes.append( eqc) insert_float_value( equity_liabilities_categories, '') if unification_info is not None: data_to_insert = [ ("Assets", assets_attributes, assets), ("EquityLiabilities", equity_liabilities_attributes, equity_liabilities), ("AssetsCategories", assets_categories_attributes, assets_categories), ("EquityLiabilitiesCategories", equity_liabilities_categories_attributes, equity_liabilities_categories), ] for data in data_to_insert: unification_info.add_data(table_name=data[0], columns=data[1], data=data[2]) else: if override: common.DAL.db_queries_insert.replace_values( table_name="Assets", columns=assets_attributes, values=assets) common.DAL.db_queries_insert.replace_values( table_name="EquityLiabilities", columns=equity_liabilities_attributes, values=equity_liabilities) common.DAL.db_queries_insert.replace_values( table_name="AssetsCategories", columns=assets_categories_attributes, values=assets_categories) common.DAL.db_queries_insert.replace_values( table_name="EquityLiabilitiesCategories", columns= equity_liabilities_categories_attributes, values=equity_liabilities_categories) elif save: common.DAL.db_queries_insert.insert_values( table_name="Assets", columns=assets_attributes, values=assets) common.DAL.db_queries_insert.insert_values( table_name="EquityLiabilities", columns=equity_liabilities_attributes, values=equity_liabilities) common.DAL.db_queries_insert.insert_values( table_name="AssetsCategories", columns=assets_categories_attributes, values=assets_categories) common.DAL.db_queries_insert.insert_values( table_name="EquityLiabilitiesCategories", columns= equity_liabilities_categories_attributes, values=equity_liabilities_categories) else: try: common.DAL.db_queries_insert.insert_values_without_ignore( table_name="Assets", columns=assets_attributes, values=assets) except IntegrityError: if not exactly_same_assets( assets_attributes, assets): if not overlapping_assets: init_overlapping_info( overlapping_assets, "Assets", assets_attributes) overlapping_assets["values"].append(assets) try: common.DAL.db_queries_insert.insert_values_without_ignore( table_name="EquityLiabilities", columns=equity_liabilities_attributes, values=equity_liabilities) except IntegrityError: if not exactly_same_equity_liabilities( equity_liabilities_attributes, equity_liabilities): if not overlapping_equity_liabilities: init_overlapping_info( overlapping_equity_liabilities, "EquityLiabilities", equity_liabilities_attributes) overlapping_equity_liabilities[ "values"].append(equity_liabilities) try: common.DAL.db_queries_insert.insert_values_without_ignore( table_name="AssetsCategories", columns=assets_categories_attributes, values=assets_categories) except IntegrityError: if not exactly_same_assets_categories( assets_categories_attributes, assets_categories): if not overlapping_assets_categories: init_overlapping_info( overlapping_assets_categories, "AssetsCategories", assets_categories_attributes) overlapping_assets_categories[ "values"].append(assets_categories) try: common.DAL.db_queries_insert.insert_values_without_ignore( table_name="EquityLiabilitiesCategories", columns= equity_liabilities_categories_attributes, values=equity_liabilities_categories) except IntegrityError: if not exactly_same_equity_liabilities_categories( equity_liabilities_categories_attributes, equity_liabilities_categories): if not overlapping_equity_liabilities_categories: init_overlapping_info( overlapping_equity_liabilities_categories, "EquityLiabilitiesCategories", equity_liabilities_categories_attributes ) overlapping_equity_liabilities_categories[ "values"].append( equity_liabilities_categories) assets_attributes = ['CompanyID', 'Date'] assets_categories_attributes = ['CompanyID', 'Date'] equity_liabilities_attributes = ['CompanyID', 'Date'] equity_liabilities_categories_attributes = [ 'CompanyID', 'Date' ] assets = [company_id] equity_liabilities = [company_id] assets_categories = [company_id] equity_liabilities_categories = [company_id] curr_column += 1 curr_row = sum_row + 1 break curr_row += 1 overlapping_data = [] if overlapping_assets: overlapping_data.append(overlapping_assets) if overlapping_assets_categories: overlapping_data.append(overlapping_assets_categories) if overlapping_equity_liabilities: overlapping_data.append(overlapping_equity_liabilities) if overlapping_equity_liabilities_categories: overlapping_data.append(overlapping_equity_liabilities_categories) if unification_info is not None and unification_info.data: return ParsingResult([unification_info]) if overlapping_data and not is_directory_import: raise UniqueError(*overlapping_data) return None