def request_pdf(url, jud_pdf_name, court_name, case_id, page_no): try: response = requests.request("GET", url, proxies=proxy_dict) if response.status_code == 200 and response.headers[ 'Content-Type'] == 'application/pdf': file_path = module_directory + "/../data_files/pdf_files/" + court_name + "_" + \ "_" + slugify(jud_pdf_name) + '.pdf' fw = open(file_path, "wb") fw.write(response.content) update_query( "UPDATE tracker SET no_pdf=no_pdf+1 WHERE court_name=%s", (court_name)) return file_path else: logging.error("Failed to get pdf file for: " + str(jud_pdf_name)) insert_query( "INSERT INTO alerts (court_name, case_id, page_no, error_message) VALUES (%s, %s, %s, %s)", (court_name, case_id, page_no, 'Failed to download PDF File.')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) return None except Exception as e: logging.error( "Failed to get pdf file for: " + str(jud_pdf_name) + ". Error: %s", e) insert_query( "INSERT INTO alerts (court_name, case_id, page_no, error_message) VALUES (%s, %s, %s, %s)", (court_name, case_id, page_no, 'Failed to download PDF File.')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", court_name) return None
def request_pdf(url, headers, pdf_name, court_name, case_id): try: if url is not None: no_tries = 0 while no_tries < NO_TRIES: response = requests.get(base_url + '/include/captcha.php', headers=headers, proxies=proxy_dict) payload = 'vercode=' + image_to_text_api(response.content, court_name).lower() + '&submit=Submit' response = requests.request("POST", url, data=payload, headers=headers, verify=False, proxies=proxy_dict) if response.status_code == 200 and response.headers['Content-Type'] == 'application/pdf': file_path = module_directory + "/../data_files/pdf_files/" + court_name + "_" + pdf_name fw = open(file_path, "wb") fw.write(response.content) update_query("UPDATE tracker SET no_pdf=no_pdf+1 WHERE court_name=%s", (court_name)) return file_path else: no_tries += 1 logging.error("Failed to get text file for: " + str(pdf_name)) insert_query("INSERT INTO alerts (court_name, case_id, error_message) VALUES (%s, %s, %s)", (court_name, case_id, 'Failed to download PDF File.')) update_query("UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) return None else: logging.error("Failed to get pdf file for: " + str(pdf_name)) insert_query("INSERT INTO alerts (court_name, case_id, error_message) VALUES (%s, %s, %s)", (court_name, case_id, 'Failed to download PDF File. No url.')) update_query("UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) return None except Exception as e: logging.error("Failed to get pdf file for: " + str(pdf_name) + ". Error: %s", e) insert_query("INSERT INTO alerts (court_name, case_id, error_message) VALUES (%s, %s, %s)", (court_name, case_id, 'Failed to download PDF File.')) update_query("UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) return None
def parser(court_name, page_no, response): try: table_data = None soup = BeautifulSoup(response, "html.parser") tables = soup.find_all("table") i = 0 for table in tables: i += 1 if i == 5: table_data = table break table_data = BeautifulSoup(str(table_data), "html.parser") table_rows = table_data.find_all("tr") i = 0 for table_row in table_rows: update_query( "UPDATE tracker SET total_cases=%s, inserted_cases=0, no_pdf=0, no_text=0, transferred_pdf=0," "transferred_text=0 WHERE court_name=%s", ((len(table_rows) - 1), court_name)) if i == 0: i += 1 continue else: emergency_exit = select_one_query( "SELECT emergency_exit FROM tracker WHERE court_name=%s", court_name) if emergency_exit is not None: if emergency_exit['emergency_exit'] == 1: break s_no = None country = None applicant = None case_id = None pdf_url = None ruling_date = None itr_taxman_ctr = None row = BeautifulSoup(str(table_row), "html.parser") total_td = row.find_all("td") j = 0 for td in total_td: td_soup = BeautifulSoup(str(td), "html.parser") strong_text = td_soup.find('strong') if j == 0: if strong_text is not None: s_no = escape_string( str(strong_text.decode_contents())) elif j == 1: if strong_text is not None: case_id = escape_string( str(strong_text.decode_contents())) elif j == 2: if strong_text is not None: ruling_date = escape_string( str(strong_text.decode_contents())) elif j == 3: if strong_text is not None: applicant = escape_string( str(strong_text.decode_contents())) elif j == 4: if strong_text is not None: country = escape_string( str(strong_text.decode_contents())) elif j == 5: if strong_text is not None: itr_taxman_ctr = escape_string( str(strong_text.decode_contents())) elif j == 6: td_soup = BeautifulSoup(str(td), "html.parser") if td_soup.a is not None: a = td_soup.a index_of_first_comma = str(a['href']).index("'") index_of_last_comma = str(a['href']).rindex("'") pdf_url = str(a['href'])[index_of_first_comma + 1:index_of_last_comma] j += 1 if select_count_query(str(court_name), str(escape_string(case_id)), 'date', ruling_date): pdf_filepath = None text_filename = None pdf_final_url = None pdf_filename = None if pdf_url is not None: pdf_filename = slugify('aar-rulings' + str(escape_string(case_id)) + str(ruling_date)) + '.pdf' text_filename = slugify('aar-rulings-' + str(escape_string(case_id)) + str(ruling_date)) + '.txt' pdf_final_url = 'http://aarrulings.in/it-rulings/uploads/pdf/' + pdf_url pdf_filepath = request_pdf(pdf_final_url, pdf_filename, court_name, case_id, page_no) if pdf_filepath is not None: pdf_text_data = escape_string( str(pdf_to_text_api(pdf_filepath))) text_filepath = module_directory + "/../data_files/text_files/" + court_name + '_' + text_filename fw = open(text_filepath, "w") fw.write(pdf_text_data) else: text_filepath = None pdf_text_data = None pdf_filename = None text_filename = None if insert_query( "INSERT INTO aar_rulings (sl_no, case_id, date, country, " "itr_taxman_ctr, pdf_url, pdf_filename, text_filename) " "VALUES (%s,%s,%s,%s,%s,%s,%s,%s)", (s_no, str(escape_string(case_id)), ruling_date, country, itr_taxman_ctr, pdf_final_url, pdf_filename, text_filename)): update_query( "UPDATE tracker SET inserted_cases=inserted_cases+1 WHERE court_name=%s", court_name) else: update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s and page_no", (court_name, page_no)) insert_query( "INSERT INTO alerts (court_name, case_id, page_no,error_message) VALUES " "(%s, %s, %s, %s)", (court_name, case_id, page_no, 'Failed to insert court data in table')) if update_query( "UPDATE aar_rulings SET name_of_applicant=%s WHERE case_id=%s", (applicant, case_id)): update_query( "UPDATE tracker SET no_text=no_text+1 WHERE court_name=%s", court_name) else: insert_query( "INSERT INTO alerts (court_name, case_id, page_no,error_message) VALUES (%s, %s, %s," " %s)", (court_name, case_id, page_no, 'Failed to insert applicant name in table')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s and page_no", (court_name, page_no)) if update_query( "UPDATE aar_rulings SET text_data=%s WHERE case_id=%s", (pdf_text_data, case_id)): update_query( "UPDATE tracker SET no_text=no_text+1 WHERE court_name=%s", court_name) else: insert_query( "INSERT INTO alerts (court_name, case_id, page_no,error_message) VALUES (%s, %s, %s," " %s)", (court_name, case_id, page_no, 'Failed to insert text data in table')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s and page_no", (court_name, page_no)) if transfer_to_bucket('PDF_Files', pdf_filepath): update_query( "UPDATE tracker SET transferred_pdf=transferred_pdf+1 WHERE court_name=%s", court_name) os.remove(pdf_filepath) else: insert_query( "INSERT INTO alerts (court_name, case_id, error_message) VALUES (%s, %s, %s)", (court_name, case_id, 'Failed to transfer pdf to bucket.')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) if transfer_to_bucket('Text_Files', text_filepath): update_query( "UPDATE tracker SET transferred_text=transferred_text+1 WHERE court_name=%s", (court_name)) os.remove(text_filepath) else: insert_query( "INSERT INTO alerts (court_name, case_id, error_message) VALUES (%s, %s, %s)", (court_name, case_id, 'Failed to transfer text to bucket.')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) else: update_query( "UPDATE tracker SET inserted_cases=inserted_cases+1, no_pdf=no_pdf+1, no_text=no_text+1, " "transferred_pdf=transferred_pdf+1, transferred_text=transferred_text+1 WHERE " "court_name=%s", court_name) except Exception as e: traceback.print_exc() logging.error("Failed to request: %s", e) insert_query( "INSERT INTO alerts (court_name, page_no, error_message) VALUES (%s, %s, %s)", (court_name, page_no, str(e))) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", court_name) return False
def request_data(base_url, court_name): page_no = 0 try: emergency_exit = select_one_query( "SELECT emergency_exit FROM tracker WHERE court_name=%s ", court_name) if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True page_no = select_one_query( "SELECT page_no FROM tracker WHERE court_name=%s", court_name)['page_no'] url = base_url + str(page_no) response = requests.request("GET", url) response = response.text if response.lower().__contains__("access denied"): print('Problem') insert_query( "INSERT INTO alerts (court_name, page_no, error_message) VALUES (%s, %s, %s)", (court_name, page_no, 'IP has been Blacklisted')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", court_name) return False else: parser(court_name, page_no, response) soup = BeautifulSoup(response, "html.parser") last_page_a_tag = soup.find("a", string="Last ›") if last_page_a_tag is not None: last_page_link = str(last_page_a_tag['href']) last_page_no = int( last_page_link[int(last_page_link.rindex("/") + 1):]) else: return True while page_no <= last_page_no: update_query( "UPDATE tracker SET page_no=%s WHERE court_name=%s", (page_no, court_name)) update_query( "UPDATE tracker SET no_tries=0, no_alerts=0 WHERE court_name=%s", court_name) no_tries = select_one_query( "SELECT no_tries FROM tracker WHERE court_name=%s", court_name)['no_tries'] url = base_url + str(page_no) response = requests.request("GET", url, proxies=proxy_dict) response = response.text while no_tries < NO_TRIES: update_query( "UPDATE tracker SET total_cases=0, inserted_cases=0, no_pdf=0, no_text=0, " "transferred_pdf=0, transferred_text=0 WHERE court_name=%s", (court_name)) parser(court_name, page_no, response) check_cases = select_one_query( "SELECT total_cases, inserted_cases FROM tracker WHERE court_name=%s", court_name) if check_cases['total_cases'] == check_cases[ 'inserted_cases']: break no_tries += 1 update_query( "UPDATE tracker SET no_tries=%s WHERE court_name=%s", (no_tries, court_name)) if no_tries == NO_TRIES: insert_query( "INSERT INTO alerts (court_name, page_no, error_message) VALUES (%s, %s, %s)", (court_name, page_no, 'Tries Exceeded')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", court_name) create_transfer_json(court_name) update_history_tracker(court_name) page_no += 20 return True except Exception as e: traceback.print_exc() logging.error("Failed to request: %s", e) insert_query( "INSERT INTO alerts (court_name, page_no, error_message) VALUES (%s, %s, %s)", (court_name, page_no, str(e))) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", court_name) return False
def parser(base_url, court_name, bench_id, response): pdf_base_path = base_url + 'viewpdf/' update_query("UPDATE tracker SET total_cases=%s, inserted_cases=0, no_pdf=0, no_text=0, transferred_pdf=0," "transferred_text=0 WHERE court_name=%s and bench=%s", (str(len(response)), court_name, bench_id)) for case in response: emergency_exit = select_one_query("SELECT emergency_exit FROM tracker WHERE court_name=%s and bench=%s", (court_name, bench_id)) if emergency_exit is not None: if emergency_exit['emergency_exit'] == 1: break case_type = case['CaseType'] case_no = case['CaseNo'] case_yr = case['CaseYr'] jud_dt = case['Jud_Dt'] jud_pdf_name = case['Jud_Pdf_Name'] case_id = case_type + ' ' + case_no + ' OF ' + case_yr if select_count_query(str(court_name), str(case_id), 'judgment_date', jud_dt): pdf_url = pdf_base_path + jud_pdf_name pdf_filename = str(jud_pdf_name).replace('.pdf', '') pdf_filepath = request_pdf(pdf_url, pdf_filename, court_name, bench_id, case_id) if pdf_filepath is not None: pdf_text_data = escape_string(str(pdf_to_text_api(pdf_filepath))) text_filepath = module_directory + "/../data_files/text_files/" + court_name + "_" + slugify( pdf_filename) + '.txt' fw = open(text_filepath, "w") fw.write(pdf_text_data) text_filename = jud_pdf_name.replace('.pdf', '.txt') else: text_filepath = None pdf_text_data = None text_filename = None jud_pdf_name = None if insert_query( "INSERT INTO kolkata (case_id, judgment_date, pdf_url, pdf_filename, text_filename, case_type, " "case_no, case_year, bench) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)", (case_id, jud_dt, pdf_url, jud_pdf_name, text_filename, case_type, case_no, case_yr, bench_id)): update_query("UPDATE tracker SET inserted_cases=inserted_cases+1 WHERE court_name=%s and bench=%s", (court_name, bench_id)) else: update_query("UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s and bench=%s", (court_name, bench_id)) insert_query("INSERT INTO alerts (court_name, bench, case_id, error_message) VALUES (%s, %s, %s, %s)", (court_name, bench_id, case_id, 'Failed to insert court data in table')) if update_query("UPDATE kolkata SET text_data=%s WHERE case_id=%s", (pdf_text_data, case_id)): update_query("UPDATE tracker SET no_text=no_text+1 WHERE court_name=%s and bench=%s", (court_name, bench_id)) else: insert_query("INSERT INTO alerts (court_name, bench, case_id, error_message) VALUES (%s, %s, %s, %s)", (court_name, bench_id, case_id, 'Failed to insert text data.')) update_query("UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s and bench=%s", (court_name, bench_id)) if transfer_to_bucket('PDF_Files', pdf_filepath): update_query("UPDATE tracker SET transferred_pdf=transferred_pdf+1 WHERE court_name=%s and bench=%s", (court_name, bench_id)) os.remove(pdf_filepath) else: insert_query("INSERT INTO alerts (court_name, bench, case_id, error_message) VALUES (%s, %s, %s, %s)", (court_name, bench_id, case_id, 'Failed to transfer PDF to bucket.')) update_query("UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s and bench=%s", (court_name, bench_id)) if transfer_to_bucket('Text_Files', text_filepath): update_query("UPDATE tracker SET transferred_text=transferred_text+1 WHERE court_name=%s and " "bench=%s", (court_name, bench_id)) os.remove(text_filepath) else: insert_query("INSERT INTO alerts (court_name, bench, case_id, error_message) VALUES (%s, %s, %s, %s)", (court_name, bench_id, case_id, 'Failed to transfer text to bucket.')) update_query("UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s and bench=%s", (court_name, bench_id)) else: update_query("UPDATE tracker SET inserted_cases=inserted_cases+1, no_pdf=no_pdf+1, no_text=no_text+1," "transferred_pdf=transferred_pdf+1, transferred_text=transferred_text+1 WHERE court_name=%s " "and bench=%s", (court_name, bench_id))
def request_data(base_url, court_name, bench_id): start_date = None try: response = None url = base_url + "findtext" headers = { 'Content-Type': "application/x-www-form-urlencoded", 'Cache-Control': "no-cache" } start_date = select_one_query("SELECT end_date FROM tracker WHERE court_name=%s and bench=%s", (court_name, bench_id))['end_date'] today = datetime.datetime.now() while datetime.datetime.strptime(str(start_date), "%Y-%m-%d") <= today: emergency_exit = select_one_query("SELECT emergency_exit FROM tracker WHERE court_name=%s and bench=%s", (court_name, bench_id)) if emergency_exit['emergency_exit'] == 1: update_history_tracker_bench(court_name, bench_id) return True update_query("UPDATE tracker SET no_tries=0, no_alerts=0 WHERE court_name=%s and bench=%s", (court_name, bench_id)) no_tries = select_one_query("SELECT no_tries FROM tracker WHERE court_name=%s and bench=%s", (court_name, bench_id))['no_tries'] while no_tries < NO_TRIES: update_query("UPDATE tracker SET total_cases=0, inserted_cases=0, no_pdf=0, no_text=0, " "transferred_pdf=0, transferred_text=0 WHERE court_name=%s and bench=%s", (court_name, bench_id)) payload = "dtfrom=" + start_date + \ "&dtto=" + start_date response = requests.request("POST", url, data=payload, headers=headers, proxies=proxy_dict) response = json.loads(str(response.text)) if response['error'] == 0 and "data_errors" not in response: parser(base_url, court_name, bench_id, response['find_data']) check_cases = select_one_query("SELECT total_cases, inserted_cases FROM tracker " "WHERE court_name=%s AND bench=%s", (court_name, bench_id)) if check_cases['total_cases'] == check_cases['inserted_cases']: break if response['error'] == 1 and response['data_errors'] == 0 and no_tries == NO_TRIES: update_query("UPDATE tracker SET no_nodata=no_nodata+1 WHERE court_name=%s and bench=%s", (court_name, bench_id)) no_tries += 1 update_query("UPDATE tracker SET no_tries=%s WHERE court_name=%s and bench=%s", (no_tries, court_name, bench_id)) if no_tries == NO_TRIES: insert_query("INSERT INTO alerts (court_name, bench, start_date, error_message) VALUES " "(%s, %s, %s, %s)", (court_name, bench_id, start_date, str(response))) update_query("UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s and bench=%s", (court_name, bench_id)) update_query("UPDATE tracker SET end_date=%s WHERE court_name=%s and bench=%s", (start_date, court_name, bench_id)) create_transfer_json_bench(court_name, bench_id) update_history_tracker_bench(court_name, bench_id) start_date = (datetime.datetime.strptime(str(start_date), "%Y-%m-%d") + datetime.timedelta(days=DAYS) ).strftime("%Y-%m-%d") return True except Exception as e: traceback.print_exc() logging.error("Failed to request: %s", e) insert_query("INSERT INTO alerts (court_name, bench, start_date, error_message) VALUES (%s, %s, %s, %s)", (court_name, bench_id, start_date, str(e))) update_query("UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s and bench=%s", (court_name, bench_id)) return False
def parser(court_name, bench_id, response): tbody = BeautifulSoup(str(response), "html.parser").find_all('tbody')[0] tr_list = BeautifulSoup(str(tbody), "html.parser").find_all('tr') update_query( "UPDATE tracker SET total_cases=%s, inserted_cases=0, no_pdf=0, no_text=0, transferred_pdf=0," "transferred_text=0 WHERE court_name=%s AND bench=%s", (str(len(tr_list)), court_name, bench_id)) for tr in tr_list: emergency_exit = select_one_query( "SELECT emergency_exit FROM tracker WHERE court_name=%s AND bench=%s", (court_name, bench_id)) if emergency_exit is not None: if emergency_exit['emergency_exit'] == 1: break case_id = None judgment_date = None party = None section = None court_name_ = None order_passed_by = None pdf_url = None i = 0 td_list = BeautifulSoup(str(tr), "html.parser").find_all('td') for td in td_list: i += 1 if i == 1: case_id = escape_string(str(td.decode_contents())) elif i == 2: judgment_date = escape_string(str(td.decode_contents())) elif i == 3: party = escape_string(str(td.decode_contents())) elif i == 4: section = escape_string(str(td.decode_contents())) elif i == 5: court_name_ = escape_string(str(td.decode_contents())) elif i == 6: order_passed_by = escape_string(str(td.decode_contents())) elif i == 7: a_tag = BeautifulSoup(str(td), "html.parser").a if a_tag: pdf_url = escape_string(str(a_tag.get('href'))) else: pdf_url = None if select_count_query(str(court_name), str(case_id), 'judgment_date', judgment_date): pdf_filename = slugify(court_name + '-' + case_id + '-' + judgment_date) + '.pdf' text_filename = slugify(court_name + '-' + case_id + '-' + judgment_date) + '.txt' pdf_filepath = request_pdf(pdf_url, pdf_filename, court_name, bench_id, case_id) if pdf_filepath is not None: pdf_text_data = escape_string( str(pdf_to_text_api(pdf_filepath))) if pdf_text_data is not None: text_filepath = module_directory + "/../data_files/text_files/" + court_name + '_' + text_filename fw = open(text_filepath, "w") fw.write(pdf_text_data) else: text_filepath = None text_filename = None else: text_filepath = None pdf_text_data = None pdf_filename = None if insert_query( "INSERT INTO national_company_law_appellate_tribunal (case_id, judgment_date, party, section, " "court_name, order_passed_by, pdf_url, pdf_filename, text_filename, bench) VALUES (%s, %s, %s, %s, " "%s, %s, %s, %s, %s, %s)", (case_id, judgment_date, party, section, court_name_, order_passed_by, pdf_url, pdf_filename, text_filename, bench_id)): update_query( "UPDATE tracker SET inserted_cases=inserted_cases+1 WHERE court_name=%s AND bench=%s", (court_name, bench_id)) else: update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s AND bench=%s", (court_name, bench_id)) insert_query( "INSERT INTO alerts (court_name, bench, case_id, error_message) VALUES (%s, %s, %s, %s)", (court_name, bench_id, case_id, 'Failed to insert court data in table')) if update_query( "UPDATE national_company_law_appellate_tribunal SET text_data=%s WHERE case_id=%s", (pdf_text_data, case_id)): update_query( "UPDATE tracker SET no_text=no_text+1 WHERE court_name=%s AND bench=%s", (court_name, bench_id)) else: insert_query( "INSERT INTO alerts (court_name, bench, case_id, error_message) VALUES (%s, %s, %s, %s)", (court_name, bench_id, case_id, 'Failed to insert text data.')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s AND bench=%s", (court_name, bench_id)) if transfer_to_bucket('PDF_Files', pdf_filepath): update_query( "UPDATE tracker SET transferred_pdf=transferred_pdf+1 WHERE court_name=%s AND bench=%s", (court_name, bench_id)) os.remove(pdf_filepath) else: insert_query( "INSERT INTO alerts (court_name, bench, case_id, error_message) VALUES (%s, %s, %s, %s)", (court_name, bench_id, case_id, 'Failed to transfer pdf to bucket.')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s AND bench=%s", (court_name, bench_id)) if transfer_to_bucket('Text_Files', text_filepath): update_query( "UPDATE tracker SET transferred_text=transferred_text+1 WHERE court_name=%s AND bench=%s", (court_name, bench_id)) os.remove(text_filepath) else: insert_query( "INSERT INTO alerts (court_name, bench, case_id, error_message) VALUES (%s, %s, %s, %s)", (court_name, bench_id, case_id, 'Failed to transfer text to bucket.')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s AND bench=%s", (court_name, bench_id)) else: update_query( "UPDATE tracker SET inserted_cases=inserted_cases+1, no_pdf=no_pdf+1, no_text=no_text+1," "transferred_pdf=transferred_pdf+1, transferred_text=transferred_text+1 WHERE court_name=%s " "AND bench=%s", (court_name, bench_id))
def request_data(base_url, headers, court_name, bench_id): start_date = None try: response = None querystring = {"page_id": "225"} start_date = select_one_query( "SELECT end_date FROM tracker WHERE court_name=%s and bench=%s", (court_name, bench_id))['end_date'] today = datetime.datetime.now() while datetime.datetime.strptime(str(start_date), "%m/%d/%Y") <= today: emergency_exit = select_one_query( "SELECT emergency_exit FROM tracker WHERE court_name=%s and bench=%s", (court_name, bench_id)) if emergency_exit['emergency_exit'] == 1: update_history_tracker_bench(court_name, bench_id) return True update_query( "UPDATE tracker SET no_tries=0, no_alerts=0 WHERE court_name=%s and bench=%s", (court_name, bench_id)) no_tries = select_one_query( "SELECT no_tries FROM tracker WHERE court_name=%s and bench=%s", (court_name, bench_id))['no_tries'] while no_tries < NO_TRIES: update_query( "UPDATE tracker SET total_cases=0, inserted_cases=0, no_pdf=0, no_text=0, " "transferred_pdf=0, transferred_text=0 WHERE court_name=%s and bench=%s", (court_name, bench_id)) payload = "from_date=" + start_date + \ "&to_date=" + start_date + \ "&court=" + str(bench_id) + \ "&fifth=fifth" response = requests.request("POST", base_url, data=payload, params=querystring, headers=headers, proxies=proxy_dict).text if "no record found" in response.lower(): update_query( "UPDATE tracker SET no_nodata=no_nodata+1 WHERE court_name=%s and bench=%s", (court_name, bench_id)) else: parser(court_name, bench_id, response) check_cases = select_one_query( "SELECT total_cases, inserted_cases FROM tracker " "WHERE court_name=%s AND bench=%s", (court_name, bench_id)) if check_cases['total_cases'] == check_cases[ 'inserted_cases']: break no_tries += 1 update_query( "UPDATE tracker SET no_tries=%s WHERE court_name=%s and bench=%s", (no_tries, court_name, bench_id)) if no_tries == NO_TRIES: insert_query( "INSERT INTO alerts (court_name, bench, start_date, error_message) VALUES " "(%s, %s, %s, %s)", (court_name, bench_id, start_date, str(response))) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s and bench=%s", (court_name, bench_id)) update_query( "UPDATE tracker SET end_date=%s WHERE court_name=%s and bench=%s", (start_date, court_name, bench_id)) create_transfer_json_bench(court_name, bench_id) update_history_tracker_bench(court_name, bench_id) start_date = ( datetime.datetime.strptime(str(start_date), "%m/%d/%Y") + datetime.timedelta(days=DAYS)).strftime("%m/%d/%Y") return True except Exception as e: traceback.print_exc() logging.error("Failed to request: %s", e) insert_query( "INSERT INTO alerts (court_name, bench, start_date, error_message) VALUES (%s, %s, %s, %s)", (court_name, bench_id, start_date, str(e))) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s and bench=%s", (court_name, bench_id)) return False
def parser(base_url, court_name, response): table = BeautifulSoup(response, "html.parser").find_all( 'table', {'class': 'custum-tbl table table-bordered'})[0] tbody = BeautifulSoup(str(table), "html.parser").find_all('tbody')[0] tr_list = BeautifulSoup(str(tbody), "html.parser").find_all('tr') update_query( "UPDATE tracker SET total_cases=%s, inserted_cases=0, no_pdf=0, no_text=0, transferred_pdf=0," "transferred_text=0 WHERE court_name=%s", (str(len(tr_list)), court_name)) for tr in tr_list: emergency_exit = select_one_query( "SELECT emergency_exit FROM tracker WHERE court_name=%s", (court_name)) if emergency_exit is not None: if emergency_exit['emergency_exit'] == 1: break state = None name_of_appellant = None brief_of_order_in_appeal = None appeal_order_no = None appeal_order_date = None pdf_url = None aar_order_no = None aar_order_date = None aar_pdf_url = None i = 0 td_list = BeautifulSoup(str(tr), "html.parser").find_all('td') for td in td_list: i += 1 if i == 2: state = escape_string(str(td.decode_contents())) elif i == 3: name_of_appellant = escape_string(str(td.decode_contents())) elif i == 4: brief_of_order_in_appeal = escape_string( str(td.decode_contents())) elif i == 5: appeal_order = str(td.decode_contents()).lower() if 'dated' in appeal_order: appeal_order = appeal_order.split('dated') elif 'dt.' in appeal_order: appeal_order = appeal_order.split('dt.') appeal_order_no = escape_string(appeal_order[0]) appeal_order_date = escape_string(appeal_order[1]) elif i == 6: a_tag = BeautifulSoup(str(td), "html.parser").a pdf_url = escape_string(str(base_url + a_tag.get('href'))) elif i == 7: if str(td.decode_contents()) != '-': a_tag = BeautifulSoup(str(td), "html.parser").a aar_pdf_url = escape_string(str(a_tag.get('href'))) aar_order = str(a_tag.decode_contents()).lower() if 'dated' in aar_order: aar_order = aar_order.split('dated') elif 'dt.' in aar_order: aar_order = aar_order.split('dt.') elif 'dtd.' in aar_order: aar_order = aar_order.split('dtd.') aar_order_no = escape_string(aar_order[0]) aar_order_date = escape_string(aar_order[1]) if select_count_query(str(court_name), str(appeal_order_no), 'appeal_order_date', appeal_order_date): pdf_filename = slugify('appeal-' + appeal_order_no + appeal_order_date) + '.pdf' text_filename = slugify('appeal-' + appeal_order_no + appeal_order_date) + '.txt' pdf_filepath = request_pdf(pdf_url, pdf_filename, court_name, appeal_order_no) if pdf_filepath is not None: pdf_text_data = escape_string( str(pdf_to_text_api(pdf_filepath))) text_filepath = module_directory + "/../data_files/text_files/" + court_name + '_' + text_filename fw = open(text_filepath, "w") fw.write(pdf_text_data) else: text_filepath = None pdf_text_data = None pdf_filename = None text_filename = None if aar_order_no is not None: aar_pdf_filename = slugify('aar-' + aar_order_no + aar_order_date) + '.pdf' aar_text_filename = slugify('aar-' + aar_order_no + aar_order_date) + '.txt' aar_pdf_filepath = request_pdf(aar_pdf_url, aar_pdf_filename, court_name, aar_order_no) if aar_pdf_filepath is not None: aar_text_data = escape_string( pdf_to_text_api(aar_pdf_filepath)) aar_text_filepath = module_directory + "/../data_files/text_files/" \ "" + court_name + '_' + aar_text_filename fw = open(aar_text_filepath, "w") fw.write(aar_text_data) else: aar_text_filepath = None aar_text_data = None else: aar_pdf_filename = None aar_text_filename = None aar_text_data = None aar_pdf_filepath = None aar_text_filepath = None if insert_query( "INSERT INTO gst_appellate (case_id, appeal_order_no, appeal_order_date, name_of_appellant, " "brief_of_order_in_appeal, state, aar_order_no, aar_order_date, pdf_url, pdf_filename, " "text_filename, aar_pdf_url, arr_pdf_filename, aar_text_filename) VALUES " "(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", (appeal_order_no, appeal_order_no, appeal_order_date, name_of_appellant, brief_of_order_in_appeal, state, aar_order_no, aar_order_date, pdf_url, pdf_filename, text_filename, aar_pdf_url, aar_pdf_filename, aar_text_filename)): update_query( "UPDATE tracker SET inserted_cases=inserted_cases+1 WHERE court_name=%s", (court_name)) else: update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) insert_query( "INSERT INTO alerts (court_name, case_id, error_message) VALUES (%s, %s, %s)", (court_name, appeal_order_no, 'Failed to insert court data in table')) if update_query( "UPDATE gst_appellate SET text_data=%s WHERE case_id=%s", (pdf_text_data, appeal_order_no)): update_query( "UPDATE tracker SET no_text=no_text+1 WHERE court_name=%s", (court_name)) else: insert_query( "INSERT INTO alerts (court_name, case_id, error_message) VALUES (%s, %s, %s)", (court_name, appeal_order_no, 'Failed to insert text data.')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) if update_query( "UPDATE gst_appellate SET aar_text_data=%s WHERE case_id=%s", (aar_text_data, appeal_order_no)): update_query( "UPDATE tracker SET no_text=no_text+1 WHERE court_name=%s", (court_name)) else: insert_query( "INSERT INTO alerts (court_name, case_id, error_message) VALUES (%s, %s, %s)", (court_name, appeal_order_no, 'Failed to insert aar text data.')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) if transfer_to_bucket('PDF_Files', pdf_filepath): update_query( "UPDATE tracker SET transferred_pdf=transferred_pdf+1 WHERE court_name=%s", (court_name)) os.remove(pdf_filepath) else: insert_query( "INSERT INTO alerts (court_name, case_id, error_message) VALUES (%s, %s, %s)", (court_name, appeal_order_no, 'Failed to transfer pdf to bucket.')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) if transfer_to_bucket('Text_Files', text_filepath): update_query( "UPDATE tracker SET transferred_text=transferred_text+1 WHERE court_name=%s", (court_name)) os.remove(text_filepath) else: insert_query( "INSERT INTO alerts (court_name, case_id, error_message) VALUES (%s, %s, %s)", (court_name, appeal_order_no, 'Failed to transfer text to bucket.')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) if aar_order_no is not None: if transfer_to_bucket('PDF_Files', aar_pdf_filepath): update_query( "UPDATE tracker SET transferred_pdf=transferred_pdf+1 WHERE court_name=%s", (court_name)) os.remove(aar_pdf_filepath) else: insert_query( "INSERT INTO alerts (court_name, case_id, error_message) VALUES (%s, %s, %s)", (court_name, appeal_order_no, 'Failed to transfer aar pdf to bucket.')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) if transfer_to_bucket('Text_Files', aar_text_filepath): update_query( "UPDATE tracker SET transferred_text=transferred_text+1 WHERE court_name=%s", (court_name)) os.remove(aar_text_filepath) else: insert_query( "INSERT INTO alerts (court_name, case_id, error_message) VALUES (%s, %s, %s)", (court_name, appeal_order_no, 'Failed to transfer aar text to bucket.')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) else: update_query( "UPDATE tracker SET inserted_cases=inserted_cases+1, no_pdf=no_pdf+1, no_text=no_text+1," "transferred_pdf=transferred_pdf+1, transferred_text=transferred_text+1 WHERE court_name=%s", (court_name))
def request_data(base_url, court_name): try: url = base_url + "/orders-appellate-authority-advance-ruling" emergency_exit = select_one_query( "SELECT emergency_exit FROM tracker WHERE court_name=%s", (court_name)) if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True update_query( "UPDATE tracker SET no_tries=0, no_alerts=0 WHERE court_name=%s", (court_name)) no_tries = select_one_query( "SELECT no_tries FROM tracker WHERE court_name=%s", (court_name))['no_tries'] while no_tries < NO_TRIES: update_query( "UPDATE tracker SET total_cases=0, inserted_cases=0, no_pdf=0, no_text=0, transferred_pdf=0, " "transferred_text=0 WHERE court_name=%s", (court_name)) response = requests.request("GET", url, proxies=proxy_dict) if response.status_code == 200: parser(base_url, court_name, str(response.text)) check_cases = select_one_query( "SELECT total_cases, inserted_cases FROM tracker WHERE court_name=%s", (court_name)) if check_cases['total_cases'] == check_cases['inserted_cases']: break elif response.status_code != 200 and no_tries == NO_TRIES: update_query( "UPDATE tracker SET no_nodata=no_nodata+1 WHERE court_name=%s", (court_name)) no_tries += 1 update_query("UPDATE tracker SET no_tries=%s WHERE court_name=%s", (no_tries, court_name)) if no_tries == NO_TRIES: insert_query( "INSERT INTO alerts (court_name, error_message) VALUES (%s, %s)", (court_name, 'Failed to get HTML.')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) create_transfer_json(court_name) update_history_tracker(court_name) return True except Exception as e: traceback.print_exc() logging.error("Failed to request: %s", e) insert_query( "INSERT INTO alerts (court_name, error_message) VALUES (%s, %s)", (court_name, str(e))) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) return False
def request_data(base_url, headers, court_name, bench_id): start_date = None try: response = None start_date = select_one_query( "SELECT end_date FROM tracker WHERE court_name=%s and bench=%s", (court_name, bench_id))['end_date'] today = datetime.datetime.now() while datetime.datetime.strptime(str(start_date), "%m/%d/%Y") <= today: emergency_exit = select_one_query( "SELECT emergency_exit FROM tracker WHERE court_name=%s and bench=%s", (court_name, bench_id)) if emergency_exit['emergency_exit'] == 1: update_history_tracker_bench(court_name, bench_id) return True update_query( "UPDATE tracker SET no_tries=0, no_alerts=0 WHERE court_name=%s and bench=%s", (court_name, bench_id)) no_tries = select_one_query( "SELECT no_tries FROM tracker WHERE court_name=%s and bench=%s", (court_name, bench_id))['no_tries'] while no_tries < NO_TRIES: update_query( "UPDATE tracker SET total_cases=0, inserted_cases=0, no_pdf=0, no_text=0, " "transferred_pdf=0, transferred_text=0 WHERE court_name=%s and bench=%s", (court_name, bench_id)) querystring = { "field_date_of_order_value[min][date]": str(start_date), "field_date_of_order_value[max][date]": str(start_date), "entity_type": "field_content_description_title", "search_key_word": "", "op": "Search" } response = requests.request("GET", base_url + bench_id, params=querystring, headers=headers, proxies=proxy_dict).text if "no result found" in response.lower(): update_query( "UPDATE tracker SET no_nodata=no_nodata+1 WHERE court_name=%s and bench=%s", (court_name, bench_id)) else: parser(court_name, bench_id, response) check_cases = select_one_query( "SELECT total_cases, inserted_cases FROM tracker " "WHERE court_name=%s AND bench=%s", (court_name, bench_id)) if check_cases['total_cases'] == check_cases[ 'inserted_cases']: break no_tries += 1 update_query( "UPDATE tracker SET no_tries=%s WHERE court_name=%s and bench=%s", (no_tries, court_name, bench_id)) if no_tries == NO_TRIES: insert_query( "INSERT INTO alerts (court_name, bench, start_date, error_message) VALUES " "(%s, %s, %s, %s)", (court_name, bench_id, start_date, str(response))) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s and bench=%s", (court_name, bench_id)) update_query( "UPDATE tracker SET end_date=%s WHERE court_name=%s and bench=%s", (start_date, court_name, bench_id)) create_transfer_json_bench(court_name, bench_id) update_history_tracker_bench(court_name, bench_id) start_date = ( datetime.datetime.strptime(str(start_date), "%m/%d/%Y") + datetime.timedelta(days=DAYS)).strftime("%m/%d/%Y") return True except Exception as e: traceback.print_exc() logging.error("Failed to request: %s", e) insert_query( "INSERT INTO alerts (court_name, bench, start_date, error_message) VALUES (%s, %s, %s, %s)", (court_name, bench_id, start_date, str(e))) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s and bench=%s", (court_name, bench_id)) return False
def parser(html_str, court_name, headers): soup = BeautifulSoup(html_str, "html.parser") table_list = soup.find_all('table', {'id': 'tables11'}) table_soup = BeautifulSoup(str(table_list), "html.parser") tr_list = table_soup.find_all('tr') if int(len(tr_list) - 3) > 50: tr_list = tr_list[:-2] update_query("UPDATE tracker SET total_cases=%s, inserted_cases=0, no_pdf=0, no_text=0, transferred_pdf=0," "transferred_text=0 WHERE court_name=%s", (str(len(tr_list) - 3), court_name)) tr_count = 0 for tr in tr_list: emergency_exit = select_one_query("SELECT emergency_exit FROM tracker WHERE court_name=%s", (court_name)) if emergency_exit is not None: if emergency_exit['emergency_exit'] == 1: break tr_count += 1 if tr_count <= 3: continue case_id = None petitioner = None respondent = None judgment_date = None pdf_url = None table_soup = BeautifulSoup(str(tr), "html.parser") td_list = table_soup.find_all('td') i = 0 for td in td_list: i += 1 if i == 1: continue elif i == 2: a_tag = BeautifulSoup(str(td), "html.parser").a case_id = escape_string(str(a_tag.text)) elif i == 3: party = str(td.decode_contents()).split("Vs") petitioner = escape_string(str(party[0])) respondent = escape_string(str(party[1])) elif i == 4: judgment_date = escape_string(str(td.decode_contents())) elif i == 5: if str(td.decode_contents()).lower() != 'file not available': a_link = BeautifulSoup(str(td), "html.parser").a.get('onclick') a_formatted = str(str(a_link).replace("window.open('", "")).replace("')", "") pdf_url = escape_string(base_url + "/" + a_formatted) if select_count_query(str(court_name), str(case_id), 'judgment_date', judgment_date) and case_id is not None: pdf_filename = escape_string(slugify(case_id + '-' + judgment_date)) + '.pdf' text_filename = escape_string(slugify(case_id + '-' + judgment_date)) + '.txt' pdf_filepath = request_pdf(pdf_url, headers, pdf_filename, court_name, case_id) if pdf_filepath is not None: pdf_text_data = escape_string(str(pdf_to_text_api(pdf_filepath))) text_filepath = module_directory + "/../data_files/text_files/" + court_name + "_" + text_filename fw = open(text_filepath, "w") fw.write(pdf_text_data) else: text_filepath = None pdf_text_data = None pdf_filename = None text_filename = None if insert_query( "INSERT INTO punjab_haryana (case_id, judgment_date, petitioner, respondent, pdf_url, " "pdf_filename, text_filename) VALUES (%s, %s, %s, %s, %s, %s, %s)", (case_id, judgment_date, petitioner, respondent, pdf_url, pdf_filename, text_filename)): update_query("UPDATE tracker SET inserted_cases=inserted_cases+1 WHERE court_name=%s", (court_name)) else: update_query("UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) insert_query("INSERT INTO alerts (court_name, case_id, error_message) VALUES (%s, %s, %s)", (court_name, case_id, 'Failed to insert court data in table')) if update_query("UPDATE punjab_haryana SET text_data=%s WHERE case_id=%s", (pdf_text_data, case_id)): update_query("UPDATE tracker SET no_text=no_text+1 WHERE court_name=%s", (court_name)) else: insert_query("INSERT INTO alerts (court_name, case_id, error_message) VALUES (%s, %s, %s)", (court_name, case_id, 'Failed to insert text data.')) update_query("UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) if transfer_to_bucket('PDF_Files', pdf_filepath): update_query("UPDATE tracker SET transferred_pdf=transferred_pdf+1 WHERE court_name=%s", (court_name)) os.remove(pdf_filepath) else: insert_query("INSERT INTO alerts (court_name, case_id, error_message) VALUES (%s, %s, %s)", (court_name, case_id, 'Failed to transfer PDF to bucket.')) update_query("UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) if transfer_to_bucket('Text_Files', text_filepath): update_query("UPDATE tracker SET transferred_text=transferred_text+1 WHERE court_name=%s", (court_name)) os.remove(text_filepath) else: insert_query("INSERT INTO alerts (court_name, case_id, error_message) VALUES (%s, %s, %s)", (court_name, case_id, 'Failed to transfer text to bucket.')) update_query("UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) else: update_query("UPDATE tracker SET inserted_cases=inserted_cases+1, no_pdf=no_pdf+1, no_text=no_text+1," "transferred_pdf=transferred_pdf+1, transferred_text=transferred_text+1 WHERE court_name=%s", (court_name))
def request_data(court_name, headers): start_date = None response = None try: url = base_url + "/home.php" start_date = select_one_query("SELECT end_date FROM tracker WHERE court_name=%s", (court_name))['end_date'] today = datetime.datetime.now() while datetime.datetime.strptime(str(start_date), "%d/%m/%Y") <= today: emergency_exit = select_one_query("SELECT emergency_exit FROM tracker WHERE court_name=%s", (court_name)) if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True update_query("UPDATE tracker SET no_tries=0, no_alerts=0 WHERE court_name=%s", (court_name)) no_tries = select_one_query("SELECT no_tries FROM tracker WHERE court_name=%s", (court_name))['no_tries'] while no_tries < NO_TRIES: update_query("UPDATE tracker SET total_cases=0, inserted_cases=0, no_pdf=0, no_text=0, " "transferred_pdf=0, transferred_text=0 WHERE court_name=%s", (court_name)) querystring = {"search_param": "free_text_search_judgment"} payload = "t_case_type=" \ "&t_case_year=" \ "&submit=Search%20Case" \ "&from_date=" + str(start_date) + \ "&to_date=" + str(start_date) + \ "&pet_name=" \ "&res_name=" \ "&free_text=JUSTICE" response = requests.request("POST", url, data=payload, headers=headers, params=querystring, proxies=proxy_dict).text if "no case found" in response.lower(): update_query("UPDATE tracker SET no_nodata=no_nodata+1 WHERE court_name=%s", (court_name)) break else: parser(response, court_name, headers) check_cases = select_one_query( "SELECT total_cases, inserted_cases FROM tracker WHERE court_name=%s", (court_name)) if check_cases['total_cases'] == check_cases['inserted_cases']: break no_tries += 1 update_query("UPDATE tracker SET no_tries=%s WHERE court_name=%s", (no_tries, court_name)) if no_tries == NO_TRIES: insert_query("INSERT INTO alerts (court_name, start_date, error_message) VALUES (%s, %s, %s)", (court_name, start_date, str(response))) update_query("UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) update_query("UPDATE tracker SET end_date=%s WHERE court_name=%s", (start_date, court_name)) create_transfer_json(court_name) update_history_tracker(court_name) start_date = (datetime.datetime.strptime(str(start_date), "%d/%m/%Y") + datetime.timedelta(days=DAYS) ).strftime("%d/%m/%Y") return True except Exception as e: traceback.print_exc() logging.error("Failed to request: %s", e) insert_query("INSERT INTO alerts (court_name, start_date, error_message) VALUES (%s, %s, %s)", (court_name, start_date, str(e))) update_query("UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) return False
def create_transfer_json(court_name): db = db_connect() try: cursor = db.cursor() cursor.execute("select count(id) as num_rows from " + str(court_name) + " WHERE is_json=0") result = cursor.fetchall() cursor.close() no_rows = result[0]['num_rows'] no_of_data_per_iteration = 1000 no_of_iteration = floor(int(no_rows) / no_of_data_per_iteration) + 1 j_count = select_one_query( "SELECT no_json FROM tracker WHERE court_name=%s", (court_name))['no_json'] for i in range(0, no_of_iteration): cursor = db.cursor() cursor.execute("SELECT * FROM " + str(court_name) + " WHERE is_json=0 LIMIT " + str(no_of_data_per_iteration) + " OFFSET " + str(i * no_of_data_per_iteration)) result = cursor.fetchall() cursor.close() if result: file_path = module_directory + "/../data_files/json_files/new-" + str( court_name) + "-" + str(i + 1 + j_count) + ".json" fw = open(file_path, "w") fw.write(json.dumps(result)) if transfer_to_bucket('JSON_Files', file_path): for record in result: update_query("UPDATE " + court_name + " SET is_json=1 WHERE id='" + str(record['id']) + "'") update_query( "UPDATE tracker SET no_json=no_json+1, transferred_json=transferred_json+1 " "WHERE court_name=%s", (court_name)) os.remove(file_path) else: insert_query( "INSERT INTO alerts (court_name, error_message) VALUES (%s, %s)", (court_name, 'JSON Failed to transfer to bucket.')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) db.close() return True except Exception as e: insert_query( "INSERT INTO alerts (court_name, error_message) VALUES (%s, %s)", (court_name, 'JSON Error')) update_query( "UPDATE tracker SET no_alerts=no_alerts+1 WHERE court_name=%s", (court_name)) traceback.print_exc() logging.error("Failed select query: %s", e) db.close() return False