def get_company_data(self, parser, ticker, type, t): global htmlData counter = 0 print ("Started scraping for ", ticker, " ", type) while True: htmlData = [] # parser should be a composition in this class, doesn't work though parser.feed(self.scrape_company_data(ticker, type, t, counter)) if (len (htmlData) < 50): break else: #parse index = 0 #find report dates, check how many reports on page and prepare objects for reports for i in range(4): if 'th' in htmlData: index = htmlData.index('th') htmlData.pop(index) reports.append(FinancialReport()) reports[counter + i].date = htmlData[index] index = htmlData.index('th') htmlData.pop(index) else: break reports_on_page = i + 1 if i == 3 else i #revenue_from_sale_of_merchandise_and_raw_materials for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].revenue_from_sale_of_merchandise_and_raw_materials = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #profit_from_operating_activities for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].profit_from_operating_activities = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #gross_profit for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].gross_profit = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #net_profit for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].net_profit = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #net_flow for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].net_flow = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #net_operating_flow for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].net_operating_flow = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #net_investments_flow for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].net_investments_flow = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #net_financial_flow for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].net_financial_flow = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #assets for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].assets = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #liabilities_and_provision_for_liabilities for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].liabilities_and_provision_for_liabilities = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #non_current_liabilities for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].non_current_liabilities = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #current_liabilities for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].current_liabilities = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #owners_equity for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].owners_equity = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #share_capital for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].share_capital = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #number_of_shares for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].number_of_shares = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #book_worth_per_share for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].book_worth_per_share = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #profit_per_share for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].profit_per_share = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #diluted_number_of_shares for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].diluted_number_of_shares = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #diluted_book_worth_per_share for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].diluted_book_worth_per_share = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #diluted_profit_per_share for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].diluted_profit_per_share = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) #dividend_per_share for i in range(reports_on_page): index = htmlData.index('td') htmlData.pop(index) if Helper.isReal(htmlData[index]): reports[counter + i].dividend_per_share = ''.join(htmlData[index].strip().split()) index = htmlData.index('td') htmlData.pop(index) counter += 4 print ("Finished scraping successfully for ", ticker, " ", type)