def start_requests(self): newest_name_num = self.get_newest_company_file("NSE") excel_name = "D:\item\OPDCMS\listed company update\india\data\companyList/NSE_" + newest_name_num + ".csv" f = open(excel_name, "r") reader = csv.reader(f) for i, row in enumerate(reader): if i >= 1: Symbol = row[0].decode("gbk").encode("utf-8") if Symbol in self.code_list: item = IndiaItem() item["security_code"] = None item["info_disclosure_id"] = Symbol iNum = self.code_list.index(Symbol) item["company_id"] = self.company_id_list[iNum] item["name_origin"] = row[1].decode("gbk").encode("utf-8") item["name_en"] = item["name_origin"] item["First_Listing_Date"] = row[2].decode("gbk").encode("utf-8") item["Face_Value"] = row[3].decode("gbk").encode("utf-8") item["Paid_Up_Value"] = row[4].decode("gbk").encode("utf-8") item["Market_Lot"] = row[5].decode("gbk").encode("utf-8") item["ISIN"] = row[6].decode("gbk").encode("utf-8") item["country_code_listed"] = "IND" item["exchange_market_code"] = "BSE" item["currency_code"] = "INR" item["gmt_create"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item["user_create"] = "root" item["website_url"] = None item["status"] = None url = self.url1 + str(Symbol) + self.url2 link = self.link + str(Symbol) yield scrapy.Request(url, callback=self.parse, meta={"item": item, "link": link})
def start_requests(self): newest_name_num = self.get_newest_company_file("BSE") excel_name = "/data/OPDCMS/india/listed_company_update/company_list/BSE_" + newest_name_num + ".csv" f = open(excel_name, "r") reader = csv.reader(f) for i, row in enumerate(reader): if i >= 1: code = row[0] if code in self.code_list: item = IndiaItem() item["security_code"] = code iNum = self.code_list.index(code) item["company_id"] = self.company_id_list[iNum] item["Symbol"] = row[1] item["name_origin"] = row[2] item["name_en"] = item["name_origin"] item["status"] = row[3] item["Group_Num"] = row[4] item["Face_Value"] = row[5] item["ISIN"] = row[6] item["Industry"] = row[7] item["Instrument"] = row[8] item["country_code_listed"] = "IND" item["exchange_market_code"] = "BSE" item["currency_code"] = "INR" item["gmt_create"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item["user_create"] = "zx" item["info_disclosure_id"] = None url = self.url + str(code) + "/" yield scrapy.Request(url, callback=self.parse, meta={"item": item})
def parse(self, response): tr_list = response.xpath( '//div[@class="content"]//table[@cellspacing="1"]//tr') del tr_list[:1] for temp in tr_list: item = IndiaItem() item["fiscal_year"] = temp.xpath('./td[1]/text()').extract()[0] item["doc_source_url"] = temp.xpath('./td[2]/a/@href').extract()[0] item["company_code"] = response.meta["company_id"] item["country_code"] = "IND" item["exchange_market_code"] = "BSE" item["financial_reporting_standard_code"] = "IFRS/IND AS" item["doc_type"] = "pdf" item["is_doc_url_direct"] = 1 item["is_downloaded"] = 1 item["currency_code"] = "INR" item["doc_downloaded_timestamp"] = time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item["language_written_code"] = "en" num = int(self.report_num_dict[item["company_code"]]) + 1 num = self.go_heavy_num(num) self.report_num_dict[item["company_code"]] = num item["report_id"] = item["company_code"] + item[ "fiscal_year"] + "00" + "06" + "01" + self.report_num_dict[ item["company_code"]] item["doc_local_path"] = "/volum1/homes/India/" + item[ "fiscal_year"] + "/" + item["report_id"] + ".pdf" item["gmt_create"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item["user_create"] = "root" item["financial_statement_season_type_code"] = "FY" item["file_name"] = item["report_id"] item["jud"] = 0 yield item
def parse(self, response): max_fiscal_year = response.meta["max_fiscal_year"] tr_list = response.xpath('//table[@class="ng-scope"]/tbody/tr') for temp in tr_list: item = IndiaItem() item["fiscal_year"] = temp.xpath('./td[1]/text()').extract()[0] if int(item["fiscal_year"]) > int(max_fiscal_year): item["doc_source_url"] = "https://www.bseindia.com" + temp.xpath('./td[2]/a/@href').extract()[0] item["company_code"] = response.meta["company_id"] item["country_code"] = "IND" item["exchange_market_code"] = "BSE" item["financial_reporting_standard_code"] = "IFRS/IND AS" item["doc_type"] = "pdf" item["is_doc_url_direct"] = 1 item["is_downloaded"] = 1 item["currency_code"] = "INR" item["doc_downloaded_timestamp"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item["language_written_code"] = "en" item["report_id"] = item["company_code"] + self.uniqueIDMaker() item["doc_local_path"] = "/volume3/homes3/India/" + item["fiscal_year"] + "/" + item["report_id"] + ".pdf" item["gmt_create"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item["user_create"] = "zx" item["financial_statement_season_type_code"] = "FY" item["file_name"] = item["report_id"] item["jud"] = 0 yield item
def download(self, response): newstdate = response.meta["newstdate"] item = IndiaItem() try: start_year = response.xpath( '//table[@id="ContentPlaceHolder1_tbl_typeID"]//tr[2]/td[2]/text()' ).extract()[0] end_year = response.xpath( '//table[@id="ContentPlaceHolder1_tbl_typeID"]//tr[3]/td[2]/text()' ).extract()[0] tr_list = response.xpath( '//table[@id="ContentPlaceHolder1_tbl_typeID"]//tr') start = str(start_year).split("-") end = str(end_year).split("-") item["start_date"] = "20" + start[-1] + "-" + start[1].replace( "Jan", "01").replace("Feb", "02").replace("Mar", "03").replace( "Apr", "04").replace("May", "05").replace( "Jun", "06").replace("Jul", "07").replace( "Aug", "08").replace("Sep", "09").replace( "Oct", "10").replace("Nov", "11").replace( "Dec", "12") + "-" + start[0] + " 00:00:00" item["end_date"] = "20" + end[-1] + "-" + end[1].replace( "Jan", "01").replace("Feb", "02").replace("Mar", "03").replace( "Apr", "04").replace("May", "05").replace( "Jun", "06").replace("Jul", "07").replace( "Aug", "08").replace("Sep", "09").replace( "Oct", "10").replace("Nov", "11").replace( "Dec", "12") + "-" + end[0] + " 00:00:00" if item["end_date"] > str(newstdate): item["company_code"] = response.meta["company_id"] item["fiscal_year"] = str(item["end_date"]).split("-")[0] item["country_code"] = "IND" item["exchange_market_code"] = "BSE" item["financial_reporting_standard_code"] = "IFRS/IND AS" item["doc_type"] = "csv" item["source_url"] = response.url item["doc_source_url"] = None item["is_doc_url_direct"] = 1 item["is_downloaded"] = 1 item["currency_code"] = "INR" item["doc_downloaded_timestamp"] = time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item["language_written_code"] = "en" item["report_id"] = item["company_code"] + self.uniqueIDMaker() item["doc_local_path"] = "/volume3/homes3/India/" + item[ "fiscal_year"] + "/" + item["report_id"] + ".csv" item["gmt_create"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item["user_create"] = "zx" item["financial_statement_type_code"] = "IS" item["financial_statement_season_type_code"] = response.meta[ "type"] del tr_list[0:4] self.saveExcel(tr_list, item["report_id"]) yield item except IndexError: pass
def getzip(self, response): report_id_list = response.meta["report_id_list"] item = IndiaItem() zip_link = response.xpath( '//table[@class="viewTable"]//td[@class="t0"]/a/@href').extract() if len(zip_link) != 0: item["doc_source_url"] = "https://www.nseindia.com" + zip_link[0] season_num = response.meta["season_num"] season_type_code = response.meta["season_type_code"] item["company_code"] = response.meta["company_id"] start_year = response.meta["FromDate"] end_year = response.meta["ToDate"] start = str(start_year).split("-") end = str(end_year).split("-") item["start_date"] = start[-1] + "-" + start[1].replace( "Jan", "01").replace("Feb", "02").replace("Mar", "03").replace( "Apr", "04").replace("May", "05").replace( "Jun", "06").replace("Jul", "07").replace( "Aug", "08").replace("Sep", "09").replace( "Oct", "10").replace("Nov", "11").replace( "Dec", "12") + "-" + start[0] + " 00:00:00" item["end_date"] = end[-1] + "-" + end[1].replace( "Jan", "01").replace("Feb", "02").replace("Mar", "03").replace( "Apr", "04").replace("May", "05").replace( "Jun", "06").replace("Jul", "07").replace( "Aug", "08").replace("Sep", "09").replace( "Oct", "10").replace("Nov", "11").replace( "Dec", "12") + "-" + end[0] + " 00:00:00" item["fiscal_year"] = str(item["end_date"]).split("-")[0] item["country_code"] = "IND" item["exchange_market_code"] = "NSE" item["financial_reporting_standard_code"] = "IFRS/IND AS" item["doc_type"] = "pdf" item["is_doc_url_direct"] = 1 item["is_downloaded"] = 1 item["currency_code"] = "INR" item["doc_downloaded_timestamp"] = time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item["language_written_code"] = "en" num = int(self.report_num_dict[item["company_code"]]) + 1 num = self.go_heavy_num(num) self.report_num_dict[item["company_code"]] = num item["report_id"] = item["company_code"] + item[ "fiscal_year"] + "00" + season_num + "01" + self.report_num_dict[ item["company_code"]] id = self.pattern2.search(str(item["report_id"])).group() if id not in report_id_list: item["doc_local_path"] = "/volum1/homes/India/" + item[ "fiscal_year"] + "/" + item["report_id"] + ".pdf" item["gmt_create"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item["user_create"] = "root" item["financial_statement_season_type_code"] = season_type_code item["file_name"] = item["report_id"] item["jud"] = 1 yield item
def getzip(self, response): item = IndiaItem() season_num = response.meta["season_num"] season_type_code = response.meta["season_type_code"] item["company_code"] = response.meta["company_id"] start_year = response.meta["FromDate"] end_year = response.meta["ToDate"] start = str(start_year).split("-") end = str(end_year).split("-") item["start_date"] = start[-1] + "-" + start[1].replace("Jan", "01").replace("Feb", "02").replace( "Mar", "03").replace("Apr", "04").replace("May", "05").replace("Jun", "06").replace("Jul", "07").replace("Aug", "08").replace("Sep", "09").replace("Oct", "10").replace("Nov", "11").replace("Dec", "12") + "-" + start[0] + " 00:00:00" item["end_date"] = end[-1] + "-" + end[1].replace("Jan", "01").replace("Feb", "02").replace("Mar","03").replace( "Apr", "04").replace("May", "05").replace("Jun", "06").replace("Jul", "07").replace("Aug", "08").replace( "Sep","09").replace("Oct", "10").replace("Nov", "11").replace("Dec", "12") + "-" + end[0] + " 00:00:00" item["fiscal_year"] = str(item["end_date"]).split("-")[0] item["country_code"] = "IND" item["exchange_market_code"] = "NSE" item["financial_reporting_standard_code"] = "IFRS/IND AS" item["doc_type"] = "csv" item["source_url"] = response.url item["doc_source_url"] = None item["is_doc_url_direct"] = 1 item["is_downloaded"] = 1 item["currency_code"] = "INR" item["doc_downloaded_timestamp"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item["language_written_code"] = "en" num = int(self.report_num_dict[item["company_code"]]) + 1 num = self.go_heavy_num(num) self.report_num_dict[item["company_code"]] = num item["report_id"] = item["company_code"] + item["fiscal_year"] + "00" + season_num + "01" + self.report_num_dict[item["company_code"]] item["doc_local_path"] = "/volum1/homes/India/" + item["fiscal_year"] + "/" + item["report_id"] + ".csv" item["gmt_create"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item["user_create"] = "root" item["financial_statement_season_type_code"] = season_type_code item["file_name"] = item["report_id"] yield item tr_list = response.xpath('//td[@valign="top"]//table//tr') for temp in tr_list: title = temp.xpath('./td[1]//text()').extract() if len(title) == 0: title = None else: title = str(title[0]).replace("\n", "").replace(",", "|#|") value = temp.xpath('./td[2]/text()').extract() if len(value) == 0: value = "" else: value = str(value[0]).replace("\n", "").replace(",", "|#|") if title is not None or title != "\n" or title != " " or title != " ": with open("D:\item\OPDCMS\listed company update\india\data\csv/" + item["report_id"] + ".csv", "a") as f: f.write(title + "," + value + "\n")
def download(self, response): report_id_list = response.meta["report_id_list"] code = response.meta["code"] type = response.meta["type"] item = IndiaItem() start_year = response.xpath('//table[@id="ctl00_ContentPlaceHolder1_tbl_typeID"]//tr[2]/td[2]/text()').extract()[0] end_year = response.xpath('//table[@id="ctl00_ContentPlaceHolder1_tbl_typeID"]//tr[3]/td[2]/text()').extract()[0] tr_list = response.xpath('//table[@id="ctl00_ContentPlaceHolder1_tbl_typeID"]//tr') start = str(start_year).split("-") end = str(end_year).split("-") item["start_date"] = "20" + start[-1] + "-" + start[1].replace("Jan", "01").replace("Feb", "02").replace("Mar", "03").replace("Apr", "04").replace("May", "05").replace("Jun", "06").replace("Jul", "07").replace("Aug", "08").replace("Sep", "09").replace("Oct", "10").replace("Nov", "11").replace("Dec", "12") + "-" + start[0] + " 00:00:00" item["end_date"] = "20" + end[-1] + "-" + end[1].replace("Jan", "01").replace("Feb", "02").replace("Mar", "03").replace("Apr", "04").replace("May", "05").replace("Jun", "06").replace("Jul", "07").replace("Aug", "08").replace("Sep", "09").replace("Oct", "10").replace("Nov", "11").replace("Dec", "12") + "-" + end[0] + " 00:00:00" item["company_code"] = response.meta["company_id"] item["fiscal_year"] = str(item["end_date"]).split("-")[0] item["country_code"] = "IND" item["exchange_market_code"] = "BSE" item["financial_reporting_standard_code"] = "IFRS/IND AS" item["doc_type"] = "csv" item["source_url"] = response.url item["doc_source_url"] = None item["is_doc_url_direct"] = 1 item["is_downloaded"] = 1 item["currency_code"] = "INR" item["doc_downloaded_timestamp"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item["language_written_code"] = "en" num = int(self.report_num_dict[item["company_code"]]) + 1 num = self.go_heavy_num(num) self.report_num_dict[item["company_code"]] = num if type == "FY": item["report_id"] = item["company_code"] + item["fiscal_year"] + "03" + "06" + "01" + self.report_num_dict[item["company_code"]] else: item["report_id"] = item["company_code"] + item["fiscal_year"] + "03" + "05" + "01" + self.report_num_dict[item["company_code"]] id = self.pattern2.search(str(item["report_id"])).group() if id not in report_id_list: item["doc_local_path"] = "/volum1/homes/India/" + item["fiscal_year"] + "/" + item["report_id"] + ".csv" item["gmt_create"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item["user_create"] = "root" item["financial_statement_type_code"] = "IS" del tr_list[0:4] self.saveExcel(tr_list, item["report_id"]) yield item
def parse(self, response): company_id = response.meta["company_id"] code = response.meta["code"] time_list = [] data_list = response.xpath('//table[@cellspacing="1"]//tr[@style="background-color:white;height:32px;"]') for temp in data_list: item = IndiaItem() item["company_code"] = company_id pdf_url = temp.xpath('./preceding-sibling::tr[1]/td[3]/a/@href').extract() title = temp.xpath('./preceding-sibling::tr[1]/td[1]/text()').extract() title_backup = temp.xpath('./preceding-sibling::tr[1]/td[1]/a/text()').extract() data = temp.xpath('./preceding-sibling::tr[2]/td/text()').extract() data_time = self.pattern.findall(str(data)) data_title = self.pattern.findall(str(title)) if len(pdf_url) == 0: continue else: item["doc_source_url"] = pdf_url[0] if len(data_title) != 0 or len(title) == 0: title = str(title_backup[0]).replace("(", "").replace(")", "").replace(",", "").replace("&", "") else: title = str(title[0]).replace("(", "").replace(")", "").replace(",", "").replace("&", "") if any(i in title for i in self.Q1_list): item["end_date"] = self.get_pdf_time(title)[0] fiscal_year = self.get_pdf_time(title)[1] item["fiscal_year"] = fiscal_year if fiscal_year is None: fiscal_year = "0000" num = int(self.report_num_dict[item["company_code"]]) + 1 num = self.go_heavy_num(num) self.report_num_dict[item["company_code"]] = num item["financial_statement_season_type_code"] = "Q1" item["report_id"] = item["company_code"] + fiscal_year + "00" + "01" + "01" + self.report_num_dict[item["company_code"]] elif any(i in title for i in self.Q2_list): item["end_date"] = self.get_pdf_time(title)[0] fiscal_year = self.get_pdf_time(title)[1] item["fiscal_year"] = fiscal_year if fiscal_year is None: fiscal_year = "0000" num = int(self.report_num_dict[item["company_code"]]) + 1 num = self.go_heavy_num(num) self.report_num_dict[item["company_code"]] = num item["financial_statement_season_type_code"] = "Q2" item["report_id"] = item["company_code"] + fiscal_year + "00" + "02" + "01" + \ self.report_num_dict[item["company_code"]] elif any(i in title for i in self.Q3_list): item["end_date"] = self.get_pdf_time(title)[0] fiscal_year = self.get_pdf_time(title)[1] item["fiscal_year"] = fiscal_year if fiscal_year is None: fiscal_year = "0000" num = int(self.report_num_dict[item["company_code"]]) + 1 num = self.go_heavy_num(num) self.report_num_dict[item["company_code"]] = num item["financial_statement_season_type_code"] = "Q3" item["report_id"] = item["company_code"] + fiscal_year + "00" + "03" + "01" + \ self.report_num_dict[item["company_code"]] elif any(i in title for i in self.Q4_list): item["end_date"] = self.get_pdf_time(title)[0] fiscal_year = self.get_pdf_time(title)[1] item["fiscal_year"] = fiscal_year if fiscal_year is None: fiscal_year = "0000" num = int(self.report_num_dict[item["company_code"]]) + 1 num = self.go_heavy_num(num) self.report_num_dict[item["company_code"]] = num item["financial_statement_season_type_code"] = "Q4" item["report_id"] = item["company_code"] + fiscal_year + "00" + "04" + "01" + \ self.report_num_dict[item["company_code"]] elif any(i in title for i in self.Q_list): item["end_date"] = self.get_pdf_time(title)[0] fiscal_year = self.get_pdf_time(title)[1] item["fiscal_year"] = fiscal_year if fiscal_year is None: fiscal_year = "0000" num = int(self.report_num_dict[item["company_code"]]) + 1 num = self.go_heavy_num(num) self.report_num_dict[item["company_code"]] = num item["financial_statement_season_type_code"] = "Q" item["report_id"] = item["company_code"] + fiscal_year + "00" + "05" + "01" + \ self.report_num_dict[item["company_code"]] elif any(i in title for i in self.FY_list): item["end_date"] = self.get_pdf_time(title)[0] fiscal_year = self.get_pdf_time(title)[1] item["fiscal_year"] = fiscal_year if fiscal_year is None: fiscal_year = "0000" num = int(self.report_num_dict[item["company_code"]]) + 1 num = self.go_heavy_num(num) self.report_num_dict[item["company_code"]] = num item["financial_statement_season_type_code"] = "FY" item["report_id"] = item["company_code"] + fiscal_year + "00" + "06" + "01" + \ self.report_num_dict[item["company_code"]] elif any(i in title for i in self.Financial_list): item["end_date"] = self.get_pdf_time(title)[0] fiscal_year = self.get_pdf_time(title)[1] item["fiscal_year"] = fiscal_year if fiscal_year is None: fiscal_year = "0000" num = int(self.report_num_dict[item["company_code"]]) + 1 num = self.go_heavy_num(num) self.report_num_dict[item["company_code"]] = num item["financial_statement_season_type_code"] = None item["report_id"] = item["company_code"] + fiscal_year + "00" + "00" + "01" + \ self.report_num_dict[item["company_code"]] else: continue if len(data_time) != 0 or len(data) == 0: data = time_list[-1] else: data = data[0] time_list.append(data) release_time = str(data).replace("Jan", "01").replace("Feb", "02").replace("Mar", "03").replace( "Apr", "04").replace("May", "05").replace("Jun", "06").replace("Jul", "07").replace("Aug","08").replace( "Sep", "09").replace("Oct", "10").replace("Nov", "11").replace("Dec", "12").replace(" ","").replace(" ","") release_time_list = re.findall("(\d{2})(\d{2})(\d{4})", release_time) if len(release_time_list) != 0: temp = release_time_list[0] item["disclosure_date"] = temp[2] + "-" + temp[1] + "-" + temp[0] + " 00:00:00" else: item["disclosure_date"] = None item["country_code"] = "IND" item["exchange_market_code"] = "BSE" item["financial_reporting_standard_code"] = "IFRS/IND AS" item["doc_type"] = "pdf" item["is_doc_url_direct"] = 1 item["is_downloaded"] = 1 item["currency_code"] = "INR" item["doc_downloaded_timestamp"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item["language_written_code"] = "en" item["doc_local_path"] = "/volum1/homes/India/" + fiscal_year + "/" + item["report_id"] + ".pdf" item["gmt_create"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item["user_create"] = "root" item["file_name"] = item["report_id"] item["jud"] = 0 yield item page_link = response.xpath('//span[@id="ctl00_ContentPlaceHolder1_lblNext"]/a/@href').extract() if len(page_link) != 0: url = "http://www.bseindia.com/corporates/" + page_link[0] yield scrapy.Request(url, callback=self.parse, meta={"code": code, "company_id": company_id})
def parse(self, response): data_list = re.findall( '"NEWSSUB":"(.*?)",.*?"ATTACHMENTNAME":"(.*?)",.*?"DissemDT":"(.*?)",', response.text) for temp in data_list: item = IndiaItem() item["company_code"] = response.meta["company_id"] if len(temp[1]) > 3: item[ "doc_source_url"] = "https://www.bseindia.com/xml-data/corpfiling/AttachLive/" + str( temp[1]).replace('"', "") title = temp[0] item["disclosure_date"] = str(temp[2]).split(".")[0].replace( "T", " ") item["fiscal_year"] = item["disclosure_date"].split("-")[0] if any(i in title for i in self.Q1_list): item["financial_statement_season_type_code"] = "Q1" item["report_id"] = item[ "company_code"] + self.uniqueIDMaker() item["announcement"] = 1 elif any(i in title for i in self.Q2_list): item["financial_statement_season_type_code"] = "Q2" item["report_id"] = item[ "company_code"] + self.uniqueIDMaker() item["announcement"] = 1 elif any(i in title for i in self.Q3_list): item["financial_statement_season_type_code"] = "Q3" item["report_id"] = item[ "company_code"] + self.uniqueIDMaker() item["announcement"] = 1 elif any(i in title for i in self.Q4_list): item["financial_statement_season_type_code"] = "Q4" item["report_id"] = item[ "company_code"] + self.uniqueIDMaker() item["announcement"] = 1 elif any(i in title for i in self.Q_list): item["financial_statement_season_type_code"] = "Q" item["report_id"] = item[ "company_code"] + self.uniqueIDMaker() item["announcement"] = 1 elif any(i in title for i in self.FY_list): item["financial_statement_season_type_code"] = "FY" item["report_id"] = item[ "company_code"] + self.uniqueIDMaker() item["announcement"] = 1 elif any(i in title for i in self.Financial_list): item["financial_statement_season_type_code"] = None item["report_id"] = item[ "company_code"] + self.uniqueIDMaker() item["announcement"] = 1 else: item["report_id"] = item[ "company_code"] + self.uniqueIDMaker() item["announcement"] = 0 item["country_code"] = "IND" item["exchange_market_code"] = "BSE" item["financial_reporting_standard_code"] = "IFRS/IND AS" item["doc_type"] = "pdf" item["is_doc_url_direct"] = 1 item["is_downloaded"] = 1 item["currency_code"] = "INR" item["doc_downloaded_timestamp"] = time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item["language_written_code"] = "en" item["doc_local_path"] = "/volume3/homes3/India/" + item[ "fiscal_year"] + "/" + item["report_id"] + ".pdf" item["gmt_create"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) item["user_create"] = "zx" item["file_name"] = item["report_id"] item["jud"] = 0 item["pdf_name"] = title yield item