Python update_queryの例、Utils.db.update_query Pythonの例

コード例 #1

0

ファイルを表示

def parse_html(html_str, court_name, m_sideflg):
    try:
        soup = BeautifulSoup(html_str, "html.parser")
        table_soup = BeautifulSoup(str(soup.find_all('form')[0]),
                                   "html.parser")
        table_soup = BeautifulSoup(
            str(table_soup.find_all('table', {"width": "100%"})[0]),
            "html.parser")
        tr_list = table_soup.find_all('tr')

        tr_count = 0
        for tr in tr_list:
            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit is not None:
                if emergency_exit['emergency_exit'] == 1:
                    break

            tr_count += 1
            if tr_count <= 4 or tr_count % 2 == 0:
                continue

            case_no = "NULL"
            petitioner = "NULL"
            respondent = "NULL"
            judgment_date = "NULL"
            coram = "NULL"
            pdf_data = "NULL"
            pdf_file = "NULL"
            # insert_check = False

            tr_soup = BeautifulSoup(str(tr), "html.parser")
            td_list = tr_soup.find_all('td')

            i = 0
            for td in td_list:
                i += 1

                if i == 1 or i == 6 or str(td.decode_contents()).replace("\n", "").strip() == \
                        '<font color="blue">LBR  : Larger Benches Referred Matter</font>':
                    continue

                if i == 2:
                    coram = escape_string(str(td.decode_contents()))

                if i == 3:
                    data1 = escape_string(str(td.decode_contents()))
                    data1_list = data1.split("<b>")
                    petitioner = data1_list[0]
                    respondent = str(data1_list[1]).split("</b>")[1]

                if i == 4:
                    data2 = escape_string(str(td.decode_contents()))
                    data2_list = data2.split("<br/>")
                    judgment_date = data2_list[0]

                if i == 5:
                    a_tag = BeautifulSoup(str(td), "html.parser").a
                    pdf_file = base_url + a_tag.get('href')
                    case_no = str(a_tag.text).replace("\n", "")
                    pdf_data = escape_string(
                        request_pdf(pdf_file, case_no, court_name))

                # if select_count_query(str(court_name), str(case_no), 'judgment_date', judgment_date):
                #     insert_check = True

            # if case_no != "NULL" and insert_check:
            if case_no != "NULL":
                sql_query = "INSERT INTO " + str(court_name) + " (m_sideflg, case_no, petitioner, respondent, " \
                                                               "judgment_date, coram, pdf_file, pdf_filename) VALUE " \
                                                               "('" + m_sideflg +\
                            "', '" + case_no + "', '" + petitioner + "', '" + respondent + "', '" + judgment_date + \
                            "', '" + coram + "', '" + pdf_file + "', '" + court_name + "_" + slugify(case_no) + ".pdf')"
                insert_query(sql_query)

                update_query("UPDATE " + court_name + " SET pdf_data = '" +
                             str(pdf_data) + "' WHERE case_no = '" +
                             str(case_no) + "'")
                update_query(
                    "UPDATE Tracker SET No_Cases = No_Cases + 1 WHERE Name = '"
                    + str(court_name) + "'")

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to parse the html: %s", e)
        update_query(
            "UPDATE Tracker SET No_Error = No_Error + 1 WHERE Name = '" +
            str(court_name) + "'")
        return False

コード例 #2

0

ファイルを表示

def parse_html(html_str, court_name, bench, child_url):
    try:
        soup = BeautifulSoup(html_str, "html.parser")
        div_soup = BeautifulSoup(str(soup.find_all('div', {'id': 'text'})[0]),
                                 'html.parser')
        tr_list = div_soup.find_all('tr')

        tr_count = 0
        for tr in tr_list:

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit is not None:
                if emergency_exit['emergency_exit'] == 1:
                    break

            tr_count += 1
            if tr_count == 1:
                continue

            case_no = "NULL"
            date_of_order = "NULL"
            description = "NULL"
            section = "NULL"
            pdf_data = "NULL"
            pdf_file = "NULL"
            # insert_check = False

            tr_soup = BeautifulSoup(str(tr), "html.parser")
            td_list = tr_soup.find_all('td')

            i = 0
            for td in td_list:
                i += 1
                if i == 1:
                    case_no = escape_string(
                        str(td.text).strip().replace("\n", ""))

                if i == 2:
                    date_of_order = escape_string(
                        str(td.text).strip().replace("\n", ""))

                # if select_count_query(str(court_name), str(case_no), 'date_of_order', date_of_order):
                #     insert_check = True

                if i == 3:
                    description = escape_string(str(td.text).strip())
                    a_tag = BeautifulSoup(str(td), "html.parser").font.a
                    pdf_url = base_url + child_url + a_tag.get('href')
                    pdf_file = escape_string(pdf_url)
                    pdf_data = escape_string(
                        request_pdf(pdf_url, case_no, court_name))

                if i == 4:
                    section = str(td.text)

            # if case_no != "NULL" and insert_check:
            if case_no != "NULL":
                sql_query = "INSERT INTO " + str(court_name) + " (case_no, date_of_order, description, section, " \
                                                               "pdf_file, bench_code, pdf_filename) VALUE ('" + \
                            case_no + "', '" + date_of_order + "', '" + description + "', '" + section + "', '" + \
                            pdf_file + "', '" + str(bench) + "', '" + court_name + "_" + slugify(case_no) + ".pdf')"
                insert_query(sql_query)

                update_query("UPDATE " + court_name + " SET pdf_data = '" +
                             str(pdf_data) + "' WHERE case_no = '" +
                             str(case_no) + "'")
                update_query(
                    "UPDATE Tracker SET No_Cases = No_Cases + 1 WHERE Name = '"
                    + str(court_name) + "'")

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to parse the html: %s", e)
        update_query(
            "UPDATE Tracker SET No_Error = No_Error + 1 WHERE Name = '" +
            str(court_name) + "'")
        return False

コード例 #3

0

ファイルを表示

ファイル: Intellectual_Property_Appellate.py プロジェクト: abhishekjain12/CourtScrappingWebApp

def parse_html(html_str, court_name):
    try:
        soup = BeautifulSoup(html_str, "html.parser")
        table_list = soup.find_all('table', {'style': 'width:100%; margin-top: 10px; font-size: 12px;'})
        table_soup = BeautifulSoup(str(table_list), "html.parser")
        tr_list = table_soup.find_all('tr')

        tr_count = 0
        for tr in tr_list:

            emergency_exit = select_one_query("SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'")
            if emergency_exit is not None:
                if emergency_exit['emergency_exit'] == 1:
                    break

            tr_count += 1
            if tr_count == 1:
                continue

            appeal_no = "NULL"
            appellant = "NULL"
            respondent = "NULL"
            date_of_order = "NULL"
            filed_by = "NULL"
            pdf_data = "NULL"
            pdf_file = "NULL"
            order_type = "NULL"
            # insert_check = False

            tr_soup = BeautifulSoup(str(tr), "html.parser")
            td_list = tr_soup.find_all('td')

            i = 0
            for td in td_list:
                i += 1
                if i == 1:
                    appeal_no = escape_string(str(td.text).strip().replace("\n", ""))

                if i == 2:
                    filed_by = escape_string(str(td.text).strip().replace('\n', ''))

                if i == 3:
                    appellant = escape_string(str(td.text).strip().replace('\n', ''))

                if i == 4:
                    respondent = escape_string(str(td.text).strip().replace('\n', ''))

                if i == 5:
                    a_tag = BeautifulSoup(str(td), "html.parser").a
                    details_url = a_tag.get('href')
                    date_of_order, pdf_file, order_type = details_parse(details_url, appeal_no, court_name)

                    # if select_count_query_other(str(court_name), 'appeal_no', str(appeal_no), 'date_of_order',
                    #                             date_of_order):
                    #     insert_check = True

                    pdf_data = escape_string(str(request_pdf(pdf_file, appeal_no, court_name)).replace("'", ""))

            # if appeal_no != "NULL" and insert_check:
            if appeal_no != "NULL":
                sql_query = "INSERT INTO " + str(court_name) + " (appeal_no, appellant, respondent, filed_by, " \
                                                               "bench_code, pdf_filename ) VALUE ('" + appeal_no + \
                            "', '" + appellant + "', '" + respondent + "', '" + filed_by + "', '" + court_name + \
                            "_" + slugify(appeal_no) + ".pdf')"
                insert_query(sql_query)

                update_query("UPDATE " + court_name + " SET pdf_data = '" + str(pdf_data) + "', date_of_order ='" +
                             date_of_order + "', pdf_file = '" + pdf_file + "', order_type = '" + order_type +
                             "' WHERE appeal_no = '" + str(appeal_no) + "'")
                update_query("UPDATE Tracker SET No_Cases = No_Cases + 1 WHERE Name = '" + str(court_name) + "'")

        return True

    except Exception as e:
        logging.error("Failed to parse the html: %s", e)
        update_query("UPDATE Tracker SET No_Error = No_Error + 1 WHERE Name = '" + str(court_name) + "'")
        return False

コード例 #4

0

ファイルを表示

def request_data(court_name, m_sideflg, start_date, end_date_):
    try:
        url = base_url + "ordqryrepact_action.php"
        headers = {
            'Content-Type': "application/x-www-form-urlencoded",
            'Cache-Control': "no-cache"
        }

        i = 0
        while True:
            i += 1

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            end_date = (
                datetime.datetime.strptime(str(start_date), "%d-%m-%Y") +
                datetime.timedelta(days=180)).strftime("%d-%m-%Y")

            if datetime.datetime.strptime(end_date_, "%d-%m-%Y") + datetime.timedelta(days=180) < \
                    datetime.datetime.strptime(str(end_date), "%d-%m-%Y"):
                logging.error("DONE")
                break

            update_query("UPDATE Tracker SET Start_Date = '" +
                         str(start_date) + "', End_Date = '" + str(end_date) +
                         "' WHERE Name = '" + str(court_name) + "'")

            payload = "pageno=1" \
                      "&frmaction=" \
                      "&m_sideflg=" + str(m_sideflg) + \
                      "&actcode=0" \
                      "&frmdate=" + str(start_date) + \
                      "&todate=" + str(end_date)

            response = requests.request("POST",
                                        url,
                                        data=payload,
                                        headers=headers)
            res = response.text

            if "invalid inputs given" in res.lower():
                logging.error("NO data Found for start date: " +
                              str(start_date))
                update_query(
                    "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                    + str(court_name) + "'")

                start_date = end_date
                continue

            if not parse_html(res, court_name, m_sideflg):
                logging.error("Failed to parse data from date: " +
                              str(start_date))

            start_date = end_date

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False

コード例 #5

0

ファイルを表示

def request_data(court_name, start_date, end_date_):
    try:
        if int(start_date[-4:]) < 2010:
            update_query(
                "UPDATE Tracker SET status = 'IN_NO_DATA_FOUND', emergency_exit=true WHERE Name = '"
                + str(court_name) + "'")
            if int(end_date_[-4:]) < 2010:
                update_history_tracker(court_name)
                return True

        for month_year in month_list_([str(start_date), str(end_date_)]):
            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            data = {
                'ctl00$CPHBody$DropDownListYear':
                str(month_year[-4:]),
                'ctl00$CPHBody$DropDownListMonth':
                str(month_year[:-4]).lstrip("0"),
                'ctl00$CPHBody$TextBox1':
                '',
                'ctl00$CPHBody$SM1':
                'ctl00$CPHBody$SM1|ctl00$CPHBody$DropDownListMonth'
            }

            with requests.Session() as s:
                page = s.get(base_url + 'judgement.aspx')
                soup = BeautifulSoup(page.content, "html.parser")

                data["__VIEWSTATE"] = soup.select_one("#__VIEWSTATE")["value"]
                data["__VIEWSTATEGENERATOR"] = soup.select_one(
                    "#__VIEWSTATEGENERATOR")["value"]
                data["__EVENTVALIDATION"] = soup.select_one(
                    "#__EVENTVALIDATION")["value"]

                update_query("UPDATE Tracker SET Start_Date = '" +
                             str(month_year) + "' WHERE Name = '" +
                             str(court_name) + "'")

                response = s.post(base_url + 'judgement.aspx', data=data)
                res = response.text

                if "no records were found." in res.lower(
                ) or "application error" in res.lower():
                    logging.error("NO data Found for start date: " +
                                  str(month_year))
                    update_query(
                        "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                        + str(court_name) + "'")
                    continue

                if not parse_html(res, court_name):
                    logging.error("Failed to parse data")

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False

コード例 #6

0

ファイルを表示

def parse_html(html_str, court_name, dc):
    try:
        soup = BeautifulSoup(html_str, "html.parser")
        tr_list = soup.find_all('tr')

        tr_count = 0
        for tr in tr_list:

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit is not None:
                if emergency_exit['emergency_exit'] == 1:
                    break

            tr_count += 1
            if tr_count == 1:
                continue

            case_no = "NULL"
            petitioner = "NULL"
            respondent = "NULL"
            judgment_date = "NULL"
            corrigendum = "NULL"
            pdf_data = "NULL"
            pdf_file = "NULL"
            # insert_check = False

            tr_soup = BeautifulSoup(str(tr), "html.parser")
            td_list = tr_soup.find_all('td')

            i = 0
            for td in td_list:
                i += 1
                if i == 1:
                    continue

                if i == 2:
                    a_tag = BeautifulSoup(str(td), "html.parser").a
                    pdf_file = base_url + a_tag.get('href')
                    case_no = str(a_tag.text).replace("\n", "")
                    pdf_data = escape_string(
                        request_pdf(pdf_file, case_no, court_name))

                if i == 3:
                    span_tag = BeautifulSoup(str(td), "html.parser").span
                    judgment_date = escape_string(
                        str(span_tag.decode_contents()))

                # if select_count_query(str(court_name), str(case_no), 'judgment_date', judgment_date):
                #     insert_check = True

                if i == 5:
                    span_tag = BeautifulSoup(str(td), "html.parser").span
                    corrigendum = escape_string(str(
                        span_tag.decode_contents()))

                if i == 4:
                    td_soup = BeautifulSoup(str(td), "html.parser")
                    span_list = td_soup.find_all('span')

                    j = 0
                    for span in span_list:
                        j += 1

                        if j == 1:
                            petitioner = escape_string(
                                str(span.decode_contents()))
                        if j == 3:
                            respondent = escape_string(
                                str(span.decode_contents()))

            # if case_no != "NULL" and insert_check:
            if case_no != "NULL":
                sql_query = "INSERT INTO " + str(court_name) + " (case_no, petitioner, respondent, judgment_date, " \
                                                               "corrigendum, pdf_file, bench_code, pdf_filename) VALUE"\
                                                               " ('" + case_no + "', '" + petitioner + "', '" + \
                            respondent + "', '" + judgment_date + "', '" + corrigendum + "', '" + pdf_file + "', " + \
                            str(dc) + ", '" + court_name + "_" + slugify(case_no) + ".pdf')"
                insert_query(sql_query)

                update_query("UPDATE " + court_name + " SET pdf_data = '" +
                             str(pdf_data) + "' WHERE case_no = '" +
                             str(case_no) + "'")
                update_query(
                    "UPDATE Tracker SET No_Cases = No_Cases + 1 WHERE Name = '"
                    + str(court_name) + "'")

        return True

    except Exception as e:
        logging.error("Failed to parse the html: %s", e)
        update_query(
            "UPDATE Tracker SET No_Error = No_Error + 1 WHERE Name = '" +
            str(court_name) + "'")
        return False

コード例 #7

0

ファイルを表示

def parse_html(html_str, court_name, bench):
    try:
        soup = BeautifulSoup(
            html_str.replace("<b>", "").replace("</b>", "").replace(
                "<br>",
                "").replace("</br>",
                            "").replace("<b",
                                        "").replace("<br< p="
                                                    "></br<>", ""),
            "html.parser")
        tr_list = soup.find_all('tr')
        del tr_list[0:7]

        case_no = "NULL"
        petitioner = "NULL"
        respondent = "NULL"
        petitioner_advocate = "NULL"
        respondent_advocate = "NULL"
        judgment_date = "NULL"
        disposal_date = "NULL"
        judge_name = "NULL"
        pdf_data = "NULL"
        pdf_file = "NULL"

        tr_count = 0
        for tr in tr_list:
            tr_count += 1

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit is not None:
                if emergency_exit['emergency_exit'] == 1:
                    break

            # insert_check = False
            if tr_count == 1:
                td_count = 0
                tr_soup = BeautifulSoup(str(tr), "html.parser")
                td_list = tr_soup.find_all('td')
                for td in td_list:
                    td_count += 1
                    if td_count == 3:
                        case_no = escape_string(str(td.text).strip())

                    if td_count == 4:
                        td_text = str(td.text)
                        if td_text.find("NA") == -1:
                            a_tag = BeautifulSoup(str(td), "html.parser").a
                            if a_tag:
                                a_link = a_tag.get('href')
                                pdf_data = escape_string(
                                    request_pdf(base_url + a_link, case_no,
                                                court_name))
                                pdf_file = base_url + a_link

                        judgment_date = escape_string(
                            td_text.replace("Judgement", "").replace(
                                "Orders", "").replace("r", "").replace(
                                    "(AFR)", "").replace("NA", "").strip())

            # if select_count_query_bench(str(court_name), str(case_no), bench, 'judgment_date', judgment_date):
            #     insert_check = True

            if tr_count == 2:
                td_count = 0
                tr_soup = BeautifulSoup(str(tr), "html.parser")
                td_list = tr_soup.find_all('td')
                for td in td_list:
                    td_count += 1
                    if td_count == 3:
                        petitioner = escape_string(str(td.text).strip())

            if tr_count == 3:
                td_count = 0
                tr_soup = BeautifulSoup(str(tr), "html.parser")
                td_list = tr_soup.find_all('td')
                for td in td_list:
                    td_count += 1
                    if td_count == 3:
                        respondent = escape_string(str(td.text).strip())

            if tr_count == 4:
                td_count = 0
                tr_soup = BeautifulSoup(str(tr), "html.parser")
                td_list = tr_soup.find_all('td')
                for td in td_list:
                    td_count += 1
                    if td_count == 3:
                        petitioner_advocate = escape_string(
                            str(td.text).strip())

            if tr_count == 5:
                td_count = 0
                tr_soup = BeautifulSoup(str(tr), "html.parser")
                td_list = tr_soup.find_all('td')
                for td in td_list:
                    td_count += 1
                    if td_count == 3:
                        respondent_advocate = escape_string(
                            str(td.text).strip())

            if tr_count == 6:
                td_count = 0
                tr_soup = BeautifulSoup(str(tr), "html.parser")
                td_list = tr_soup.find_all('td')
                for td in td_list:
                    td_count += 1
                    if td_count == 3:
                        judge_name = escape_string(str(td.text).strip())

            if tr_count == 7:
                td_count = 0
                tr_soup = BeautifulSoup(str(tr), "html.parser")
                td_list = tr_soup.find_all('td')
                for td in td_list:
                    td_count += 1
                    if td_count == 3:
                        disposal_date = escape_string(str(td.text).strip())

                # if case_no != "NULL" and insert_check:
                if case_no != "NULL":
                    sql_query = "INSERT INTO " + str(court_name) + \
                                " (case_no, petitioner, respondent, petitioner_advocate, respondent_advocate, " \
                                "judgment_date, disposal_date, bench, judge_name, pdf_file, pdf_filename)" \
                                " VALUE ('" + case_no + "', '" + petitioner + "', '" + respondent + "', '" + \
                                petitioner_advocate + "', '" + respondent_advocate + "', '" + judgment_date + "', '" + \
                                disposal_date + "', '" + bench + "', '" + judge_name + "', '" + pdf_file + "', '" + \
                                court_name + "_" + slugify(case_no) + ".pdf')"
                    insert_query(sql_query)

                    update_query("UPDATE " + court_name + " SET pdf_data = '" +
                                 str(pdf_data) + "' WHERE case_no = '" +
                                 str(case_no) + "'")
                    update_query(
                        "UPDATE Tracker SET No_Cases = No_Cases + 1 WHERE Name = '"
                        + str(court_name) + "'")

            if tr_count == 9:
                tr_count = 0
                case_no = "NULL"
                petitioner = "NULL"
                respondent = "NULL"
                petitioner_advocate = "NULL"
                respondent_advocate = "NULL"
                judgment_date = "NULL"
                judge_name = "NULL"
                pdf_data = "NULL"
                pdf_file = "NULL"

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to parse the html: %s", e)
        update_query(
            "UPDATE Tracker SET No_Error = No_Error + 1 WHERE Name = '" +
            str(court_name) + "'")
        return False

コード例 #8

0

ファイルを表示

ファイル: Manipur.py プロジェクト: abhishekjain12/CourtScrappingWebApp

def request_data(headers, start_date, end_date_):
    try:
        url = base_url + "/ByDate.php"

        i = 0
        while True:
            i += 1

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            end_date = (
                datetime.datetime.strptime(str(start_date), "%d-%m-%Y") +
                datetime.timedelta(days=180)).strftime("%d-%m-%Y")

            if datetime.datetime.strptime(str(end_date_), "%d-%m-%Y") + datetime.timedelta(days=180) < \
                    datetime.datetime.strptime(str(end_date), "%d-%m-%Y"):
                logging.error("DONE")
                break

            update_query("UPDATE Tracker SET Start_Date = '" +
                         str(start_date) + "', End_Date = '" + str(end_date) +
                         "' WHERE Name = '" + str(court_name) + "'")

            payload = "date_day=" + str(start_date[0:2]).replace("0", "") + \
                      "&date_month=" + str(start_date[3:5]).replace("0", "") + \
                      "&date_year=" + str(start_date[6:]) + \
                      "&date_day1=" + str(end_date[0:2]).replace("0", "") + \
                      "&date_month1=" + str(end_date[3:5]).replace("0", "") + \
                      "&date_year1=" + str(end_date[6:]) + \
                      "&submit=Submit"

            response = requests.request("POST",
                                        url,
                                        data=payload,
                                        headers=headers,
                                        proxies=proxy_dict)
            res = response.text

            if "invalid inputs given" in res.lower():
                logging.error("NO data Found for start date: " +
                              str(start_date))
                update_query(
                    "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                    + str(court_name) + "'")

                start_date = end_date
                continue

            if not offset_link(res, headers):
                logging.error("Failed to parse data from date: " +
                              str(start_date))

            start_date = end_date

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False

コード例 #9

0

ファイルを表示

ファイル: Goa.py プロジェクト: abhishekjain12/CourtScrappingWebApp

def parse_html(html_str, court_name):
    try:
        soup = BeautifulSoup(html_str, "html.parser")
        select_soup = BeautifulSoup(
            str(soup.find_all('select', {'id': 'txtlist'})[0]), "html.parser")
        tr_list = select_soup.find_all('option')

        for tr in tr_list:
            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit is not None:
                if emergency_exit['emergency_exit'] == 1:
                    break

            # insert_check = False
            pdf_value = tr['value']

            res = BeautifulSoup(
                str(tr['onmouseover']).replace("return overlib('",
                                               "").replace("')", ""),
                "html.parser")
            [s.extract() for s in res('font')]
            res = str(res).replace('\n', '').strip().split('<br/>')

            petitioner = escape_string(res[0])
            respondent = escape_string(res[1])
            judge = escape_string(res[2])
            judgment_date = escape_string(res[3])
            mix_data = str(res[4]).replace("', CAPTION, '", '')

            reportable = mix_data[0:2]
            case_no = escape_string(mix_data[3:])

            if reportable == 'No':
                continue

            # if select_count_query(str(court_name), str(case_no), 'judgment_date', judgment_date):
            #     insert_check = True

            # if case_no != "NULL" and insert_check:
            if case_no != "NULL":
                pdf_data = escape_string(
                    request_pdf(case_no, court_name, pdf_value))

                sql_query = "INSERT INTO " + str(court_name) + " (case_no, petitioner, respondent, judgment_date, " \
                                                               "judge, pdf_file, pdf_filename, reportable) VALUE ('" + \
                            case_no + "', '" + petitioner + "', '" + respondent + "', '" + judgment_date + "', '" + \
                            judge + "', '" + pdf_value + "', '" + court_name + "_" + slugify(case_no) + ".pdf', '" + \
                            reportable + "')"
                insert_query(sql_query)

                update_query("UPDATE " + court_name + " SET pdf_data = '" +
                             str(pdf_data) + "' WHERE case_no = '" +
                             str(case_no) + "'")
                update_query(
                    "UPDATE Tracker SET No_Cases = No_Cases + 1 WHERE Name = '"
                    + str(court_name) + "'")

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to parse the html: %s", e)
        update_query(
            "UPDATE Tracker SET No_Error = No_Error + 1 WHERE Name = '" +
            str(court_name) + "'")
        return False

コード例 #10

0

ファイルを表示

def request_data(court_name, headers, start_date, end_date_):
    try:
        url = base_url + '/php/hc/judgement/judgement_pro_all.php'

        i = 0
        while True:
            i += 1

            end_date = (
                datetime.datetime.strptime(str(start_date), "%d-%m-%Y") +
                datetime.timedelta(days=1)).strftime("%d-%m-%Y")

            if datetime.datetime.strptime(str(end_date_), "%d-%m-%Y") + datetime.timedelta(days=1) < \
                    datetime.datetime.strptime(str(end_date), "%d-%m-%Y"):
                logging.error("END date Exceed.")
                break

            benches = ['IND', 'JBP', 'GWL']
            for bench in benches:
                emergency_exit = select_one_query(
                    "SELECT emergency_exit FROM Tracker WHERE Name='" +
                    court_name + "'")
                if emergency_exit['emergency_exit'] == 1:
                    update_history_tracker(court_name)
                    return True

                update_query("UPDATE Tracker SET Start_Date = '" +
                             str(start_date) + "', End_Date = '" +
                             str(end_date) + "' WHERE Name = '" +
                             str(court_name) + "'")

                payload = "lst_judge=0" \
                          "&lst_pet=" \
                          "&txtparty=" \
                          "&lst_counsel=" \
                          "&txtcounsel=" \
                          "&date1=" + str(start_date) + \
                          "&date2=" + str(end_date) + \
                          "&court=" + str(bench) + \
                          "&lst_judge1=0" \
                          "&lst_judge2=0" \
                          "&btn_search=is" \
                          "&bench=" \
                          "&sort=jo" \
                          "&ad=DESC" \
                          "&code="

                if int(end_date[-4:]) <= 2014 and int(start_date[-4:]) <= 2014:
                    payload += "&onlyafr=N"
                else:
                    payload += "&onlyafr=Y"

                response = requests.request("POST",
                                            url,
                                            data=payload,
                                            headers=headers,
                                            proxies=proxy_dict)
                res = response.text

                if "no jugdement or order found that you want to search" in res.lower(
                ):
                    logging.error("NO data Found for start date: " +
                                  str(start_date))
                    update_query(
                        "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                        + str(court_name) + "'")
                    sleep(2)

                    start_date = end_date
                    continue

                if not parse_html(res, court_name, bench):
                    logging.error("Failed to parse data from date: " +
                                  str(start_date))

            start_date = end_date

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False

コード例 #11

0

ファイルを表示

ファイル: Goa.py プロジェクト: abhishekjain12/CourtScrappingWebApp

def request_data(court_name, start_date, end_date_):
    try:
        url = base_url + "date_JQ.asp"
        headers = {
            'Content-Type': "application/x-www-form-urlencoded",
            'Cache-Control': "no-cache"
        }

        i = 0
        while True:
            i += 1

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            end_date = (
                datetime.datetime.strptime(str(start_date), "%d-%m-%Y") +
                datetime.timedelta(days=1)).strftime("%d-%m-%Y")

            if datetime.datetime.strptime(end_date_, "%d-%m-%Y") + datetime.timedelta(days=1) < \
                    datetime.datetime.strptime(str(end_date), "%d-%m-%Y"):
                logging.error("DONE")
                break

            update_query("UPDATE Tracker SET Start_Date = '" +
                         str(start_date) + "', End_Date = '" + str(end_date) +
                         "' WHERE Name = '" + str(court_name) + "'")

            payload = "txtday=" + str(start_date[0:2]).lstrip('0') + \
                      "&txtmonth=" + str(start_date[3:5]).lstrip('0') +  \
                      "&txtyear=" + str(start_date[-4:])

            response = requests.request("POST",
                                        url,
                                        data=payload,
                                        headers=headers,
                                        proxies=proxy_dict)
            res = response.text

            if "no judgement found for your search" in res.lower():
                logging.error("NO data Found for start date: " +
                              str(start_date))
                update_query(
                    "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                    + str(court_name) + "'")

                start_date = end_date
                continue

            if not parse_html(res, court_name):
                logging.error("Failed to parse data from date: " +
                              str(start_date))

            start_date = end_date

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False

コード例 #12

0

ファイルを表示

def cancel_json(court_name):
    return jsonify(
        update_query(
            "UPDATE Tracker_JSON SET status='IN_ABORT', emergency_exit=true WHERE Name='"
            + court_name + "'"))

コード例 #13

0

ファイルを表示

def cancel_pdf():
    return jsonify(
        update_query(
            "UPDATE Tracker_pdf SET status='IN_ABORT', emergency_exit=true WHERE 1"
        ))

コード例 #14

0

ファイルを表示

def request_data(court_name, start_date, end_date_):
    try:
        url = base_url + "/hcs/hcourt/hg_judgement_search"
        headers = {
            'Content-Type': "application/x-www-form-urlencoded",
            'Accept': "application/json",
            'Cache-Control': "no-cache"
        }

        if int(start_date[-2:]) < 11:
            update_query(
                "UPDATE Tracker SET status = 'IN_NO_DATA_FOUND', emergency_exit=true WHERE Name = '"
                + str(court_name) + "'")
            if int(end_date_[-2:]) < 11:
                update_history_tracker(court_name)
                return True

        for month_year in month_list_([str(start_date), str(end_date_)]):
            year = int(month_year[-2:]) - 10

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            update_query("UPDATE Tracker SET Start_Date = '" +
                         str(month_year) + "', End_Date = '" + str(end_date_) +
                         "' WHERE Name = '" + str(court_name) + "'")

            querystring = {"ajax_form": "1", "_wrapper_format": "drupal_ajax"}

            payload = "form_build_id=form-BS37MKVfuGmv9fgHWUqr3U9nFCjolonq-Nnenj3Ks24" \
                      "&form_id=ajax_example_form" \
                      "&ordermonth=" + str(month_year[:-2]).lstrip("0") + \
                      "&orderyear=" + str(year) + \
                      "&_triggering_element_name=op" \
                      "&_triggering_element_value=Search" \
                      "&_drupal_ajax=1" \
                      "&ajax_page_state%5Btheme%5D=mytheme" \
                      "&ajax_page_state%5Btheme_token%5D=%20" \
                      "&ajax_page_state%5Blibraries%5D=asset_injector%2Fcss%2Fanimation_accordin%2Casset_injector" \
                      "%2Fcss%2Fside_bar%2Casset_injector%2Fcss%2Ftable%2Casset_injector%2Fjs%2Fseperate_tab_%2C" \
                      "core%2Fdrupal.ajax%2Ccore%2Fhtml5shiv%2Ccore%2Fjquery.form%2Cmytheme%2Fmylibrarynew%2C" \
                      "system%2Fbase%2Cviews%2Fviews.module"

            response = requests.request("POST",
                                        url,
                                        data=payload,
                                        headers=headers,
                                        params=querystring,
                                        proxies=proxy_dict)

            json_res = json.loads(response.text)
            res = None
            for json_r in json_res:
                if "data" in json_r:
                    res = BeautifulSoup(str(json_r['data']), "html.parser")
                    break

            if res is None:
                logging.error("NO data Found for start date: " +
                              str(month_year))
                update_query(
                    "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                    + str(court_name) + "'")
                continue

            if not parse_html(res, court_name):
                logging.error("Failed to parse data from date: " +
                              str(month_year))

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False

コード例 #15

0

ファイルを表示

ファイル: Karnataka.py プロジェクト: abhishekjain12/CourtScrappingWebApp

def parse_html(html_str, court_name):
    try:
        soup = BeautifulSoup(html_str, "html.parser")
        table_tag = soup.find_all('table', {'class': 'miscTable'})[0]

        table_soup = BeautifulSoup(str(table_tag), "html.parser")
        tr_list = table_soup.find_all('tr')

        tr_count = 0
        for tr in tr_list:

            emergency_exit = select_one_query("SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'")
            if emergency_exit is not None:
                if emergency_exit['emergency_exit'] == 1:
                    break

            tr_count += 1
            if tr_count == 1:
                continue

            case_no = "NULL"
            judgment_date = "NULL"
            judge_name = "NULL"
            petitioner = "NULL"
            respondent = "NULL"
            bench = "NULL"
            pdf_data = "NULL"
            pdf_file = "NULL"
            # insert_check = False

            tr_soup = BeautifulSoup(str(tr), "html.parser")
            td_list = tr_soup.find_all('td')

            i = 0
            for td in td_list:
                i += 1

                if i == 1:
                    judgment_date = escape_string(str(td.decode_contents()))

                if i == 2:
                    a_tag = BeautifulSoup(str(td), "html.parser").a
                    case_no = escape_string(str(a_tag.text).replace("\n", ""))

                    # if select_count_query(str(court_name), str(case_no), 'judgment_date', judgment_date):
                    #     insert_check = True

                    new_url = base_url + a_tag.get('href')
                    response = requests.request('GET', new_url, headers=headers, proxies=proxy_dict)

                    new_soup = BeautifulSoup(str(response.text), "html.parser")
                    new_td_tag = new_soup.find_all('td', {'headers': 't1'})[0]
                    new_a_href = BeautifulSoup(str(new_td_tag), "html.parser").a.get('href')

                    pdf_file = escape_string(base_url + new_a_href)
                    pdf_data = escape_string(request_pdf(base_url + new_a_href, case_no, court_name))

                if i == 3:
                    judge_name = escape_string(str(td.text))

                if i == 4:
                    petitioner = escape_string(str(td.text))

                if i == 5:
                    respondent = escape_string(str(td.text))

                if i == 6:
                    bench = escape_string(str(td.text))

            # if case_no != "NULL" and insert_check:
            if case_no != "NULL":
                sql_query = "INSERT INTO " + str(court_name) + "(case_no, judgment_date, judge_name, petitioner, " \
                                                               "respondent, bench, pdf_file, pdf_filename) VALUE ('" +\
                            case_no + "', '" + judgment_date + "', '" + judge_name + "', '" + petitioner + "', '" + \
                            respondent + "', '" + bench + "', '" + pdf_file + "', '" + court_name + "_" + \
                            slugify(case_no) + ".pdf')"
                insert_query(sql_query)

                update_query("UPDATE " + court_name + " SET pdf_data = '" + str(pdf_data) + "' WHERE case_no = '" +
                             str(case_no) + "'")
                update_query("UPDATE Tracker SET No_Cases = No_Cases + 1 WHERE Name = '" + str(court_name) + "'")

        return True

    except Exception as e:
        logging.error("Failed to parse the html: %s", e)
        update_query("UPDATE Tracker SET No_Error = No_Error + 1 WHERE Name = '" + str(court_name) + "'")
        return False

コード例 #16

0

ファイルを表示

def parse_html(html_str, court_name):
    try:
        soup = BeautifulSoup(str(html_str).replace('&', ' '), "html.parser")
        tr_list = soup.find_all('tr')

        tr_count = 0
        for tr in tr_list:
            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit is not None:
                if emergency_exit['emergency_exit'] == 1:
                    break

            tr_count += 1
            if tr_count == 1:
                continue

            case_no = "NULL"
            petitioner = "NULL"
            respondent = "NULL"
            judgment_date = "NULL"
            judge_name = "NULL"
            pdf_data = "NULL"
            pdf_file = "NULL"
            # insert_check = False

            tr_soup = BeautifulSoup(str(tr), "html.parser")
            td_list = tr_soup.find_all('td')

            i = 0
            for td in td_list:
                i += 1
                if i == 1:
                    judgment_date = escape_string(str(td.decode_contents()))

                if i == 2:
                    judge_name = escape_string(str(td.decode_contents()))

                if i == 3:
                    case_no = escape_string(str(td.text))

                # if select_count_query(str(court_name), str(case_no), 'judgment_date', judgment_date):
                #     insert_check = True

                if i == 4:
                    party = str(td.decode_contents()).split("v/s")
                    petitioner = escape_string(str(party[0]))
                    respondent = escape_string(str(party[1]))

                if i == 5:
                    a_tag = BeautifulSoup(str(td), "html.parser").a
                    pdf_file = escape_string(str(base_url + a_tag.get('href')))
                    pdf_data = escape_string(
                        request_pdf(base_url + a_tag.get('href'), case_no,
                                    court_name))

            # if case_no != "NULL" and insert_check:
            if case_no != "NULL":
                sql_query = "INSERT INTO " + str(court_name) + " (case_no, petitioner, respondent, judgment_date, " \
                                                               "judge_name, pdf_file, pdf_filename) VALUE ('" + \
                            case_no + "', '" + petitioner + "', '" + respondent + "', '" + judgment_date + "', '" + \
                            judge_name + "', '" + pdf_file + "', '" + court_name + "_" + slugify(case_no) + ".pdf')"
                insert_query(sql_query)

                update_query("UPDATE " + court_name + " SET pdf_data = '" +
                             str(pdf_data) + "' WHERE case_no = '" +
                             str(case_no) + "'")
                update_query(
                    "UPDATE Tracker SET No_Cases = No_Cases + 1 WHERE Name = '"
                    + str(court_name) + "'")

        return True

    except Exception as e:
        logging.error("Failed to parse the html: %s", e)
        update_query(
            "UPDATE Tracker SET No_Error = No_Error + 1 WHERE Name = '" +
            str(court_name) + "'")
        return False

コード例 #17

0

ファイルを表示

ファイル: Himachal_Pradesh.py プロジェクト: abhishekjain12/CourtScrappingWebApp

def parse_html(html_str, court_name):
    try:
        soup = BeautifulSoup(html_str, "html.parser")
        table_tag = soup.find_all('table')[1]

        table_soup = BeautifulSoup(str(table_tag), "html.parser")
        tr_list = table_soup.find_all('tr')

        tr_count = 0
        for tr in tr_list:

            emergency_exit = select_one_query("SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'")
            if emergency_exit is not None:
                if emergency_exit['emergency_exit'] == 1:
                    break

            tr_count += 1
            if tr_count <= 2 or tr_count > 17:
                continue

            case_no = "NULL"
            judgment_date = "NULL"
            coram = "NULL"
            type_ = "NULL"
            status = "NULL"
            pdf_data = "NULL"
            pdf_file = "NULL"
            # insert_check = False

            tr_soup = BeautifulSoup(str(tr), "html.parser")
            td_list = tr_soup.find_all('td')

            i = 0
            for td in td_list:
                i += 1
                if i == 1:
                    case_no = escape_string(str(td.decode_contents()))

                # if select_count_query(str(court_name), str(case_no), 'judgment_date', judgment_date):
                #     insert_check = True

                if i == 2:
                    coram = escape_string(str(td.decode_contents()))

                if i == 3:
                    judgment_date = escape_string(str(td.decode_contents()))

                if i == 5:
                    type_ = escape_string(str(td.decode_contents()))

                if i == 6:
                    status = escape_string(str(td.decode_contents()))

                if i == 4:
                    a_tag = BeautifulSoup(str(td), "html.parser").a
                    pdf_file = escape_string(base_url + a_tag.get('href'))
                    pdf_data = escape_string(request_pdf(base_url + a_tag.get('href'), case_no, court_name))

            # if case_no != "NULL" and insert_check and case_no.find("DISCLAIMER") == -1:
            if case_no != "NULL" and case_no.find("DISCLAIMER") == -1:

                sql_query = "INSERT INTO " + str(court_name) + " (case_no, judgment_date, coram, type, status, " \
                                                               "pdf_file, pdf_filename) VALUE ('" + case_no + "', '" + \
                            judgment_date + "', '" + coram + "', '" + type_ + "', '" + status + "', '" + pdf_file + \
                            "', '" + court_name + "_" + slugify(case_no) + ".pdf')"
                insert_query(sql_query)

                update_query("UPDATE " + court_name + " SET pdf_data = '" + str(pdf_data) + "' WHERE case_no = '" +
                             str(case_no) + "'")
                update_query("UPDATE Tracker SET No_Cases = No_Cases + 1 WHERE Name = '" + str(court_name) + "'")

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to parse the html: %s", e)
        update_query("UPDATE Tracker SET No_Error = No_Error + 1 WHERE Name = '" + str(court_name) + "'")
        return False

コード例 #18

0

ファイルを表示

ファイル: Manipur.py プロジェクト: abhishekjain12/CourtScrappingWebApp

def parse_html(html_str):
    try:
        soup = BeautifulSoup(str(html_str), "html.parser")

        table_soup = BeautifulSoup(
            str(soup.find_all('table', {"width": "100%"})[0]), "html.parser")
        tr_list = table_soup.select('tr')

        tr_count = 0
        for tr in tr_list:

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit is not None:
                if emergency_exit['emergency_exit'] == 1:
                    break

            tr_count += 1
            if tr_count <= 2:
                continue

            case_no = "NULL"
            petitioner = "NULL"
            respondent = "NULL"
            judgment_date = "NULL"
            judge_name = "NULL"
            pdf_data = "NULL"
            pdf_file = "NULL"
            # insert_check = False

            tr_soup = BeautifulSoup(str(tr), "html.parser")
            td_list = tr_soup.select('td')

            i = 0
            for td in td_list:
                i += 1

                if i == 1:
                    continue

                if i == 2 and td.get('align') is None:
                    font_tag = BeautifulSoup(str(td), "html.parser").font
                    case_no = escape_string(str(font_tag.text))

                # if select_count_query(str(court_name), str(case_no), 'judgment_date', judgment_date):
                #     insert_check = True

                if i == 3 and td.get('align') is None:
                    font_tag = BeautifulSoup(str(td), "html.parser").font
                    respondent = escape_string(str(font_tag.text))

                if i == 4 and td.get('align') is None:
                    font_tag = BeautifulSoup(str(td), "html.parser").font
                    petitioner = escape_string(str(font_tag.text))

                if i == 5 and td.get('align') is None:
                    font_tag = BeautifulSoup(str(td), "html.parser").font
                    judgment_date = escape_string(str(font_tag.text))

                if td.get('align') == 'left':
                    td_soup1 = BeautifulSoup(str(td), "html.parser")
                    judge_name = escape_string(str(td_soup1.text))

                if td.get('align') == 'center':
                    font_tag = BeautifulSoup(str(td), "html.parser").font
                    a_tag = BeautifulSoup(str(font_tag), "html.parser").a
                    pdf_file = escape_string(base_url + "/" +
                                             a_tag.get('href'))
                    pdf_data = escape_string(
                        bytes(
                            str(
                                request_pdf(base_url + "/" + a_tag.get('href'),
                                            case_no)),
                            'utf-8').decode("utf-8", 'ignore'))

            # if case_no != "NULL" and insert_check:
            if case_no != "NULL":
                sql_query = "INSERT INTO " + str(court_name) + " (case_no, petitioner, respondent, judgment_date, " \
                                                               "judge_name, pdf_file, pdf_filename) VALUE ('" + \
                            case_no + "', '" + petitioner + "', '" + respondent + "', '" + judgment_date + "', '" + \
                            judge_name + "', '" + pdf_file + "', '" + court_name + "_" + slugify(case_no) + ".pdf')"
                insert_query(sql_query)

                update_query("UPDATE " + court_name + " SET pdf_data = '" +
                             str(pdf_data) + "' WHERE case_no = '" +
                             str(case_no) + "'")
                update_query(
                    "UPDATE Tracker SET No_Cases = No_Cases + 1 WHERE Name = '"
                    + str(court_name) + "'")

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to parse the html: %s", e)
        update_query(
            "UPDATE Tracker SET No_Error = No_Error + 1 WHERE Name = '" +
            str(court_name) + "'")
        return False

コード例 #19

0

ファイルを表示

ファイル: Intellectual_Property_Appellate.py プロジェクト: abhishekjain12/CourtScrappingWebApp

def full_details_parse(res, appeal_no, court_name):
    try:
        filed_on = 'NULL'
        assessment_year = 'NULL'
        bench_allotted = 'NULL'
        case_status = 'NULL'
        date_of_first_hearing = 'NULL'
        date_of_last_hearing = 'NULL'
        date_of_next_hearing = 'NULL'
        date_of_final_hearing = 'NULL'
        date_of_tribunal_order = 'NULL'
        date_of_pronouncement = 'NULL'
        order_result = 'NULL'

        soup = BeautifulSoup(res, "html.parser")
        table_list = soup.find_all('table', {'class': 'table table-striped table-bordered manage-efects'})
        table_soup = BeautifulSoup(str(table_list), "html.parser")
        tr_list = table_soup.find_all('tr')

        tr_count = 0
        for tr in tr_list:
            tr_count += 1
            if tr_count != 5:
                continue

            tr_soup = BeautifulSoup(str(tr), "html.parser")
            td_list = tr_soup.find_all('td')

            i = 0
            for td in td_list:
                i += 1
                if i == 2:
                    filed_on = escape_string(str(td.text).strip().replace("\n", ""))
                if i == 3:
                    assessment_year = escape_string(str(td.text).strip().replace("\n", ""))
                if i == 4:
                    bench_allotted = escape_string(str(td.text).strip().replace("\n", ""))
                if i == 5:
                    case_status = escape_string(str(td.text).strip().replace("\n", ""))

        soup = BeautifulSoup(res, "html.parser")
        table_list = soup.find_all('section', {'id': 'panel2-3'})
        table_soup = BeautifulSoup(str(table_list), "html.parser")
        tr_list = table_soup.find_all('tr')

        tr_count = 0
        for tr in tr_list:
            tr_count += 1
            if tr_count == 1:
                continue

            tr_soup = BeautifulSoup(str(tr), "html.parser")
            td_list = tr_soup.find_all('td')

            if tr_count == 2:
                i = 0
                for td in td_list:
                    i += 1
                    if i == 1:
                        date_of_first_hearing = escape_string(str(td.text).strip().replace("\n", ""))
                    if i == 2:
                        date_of_tribunal_order = escape_string(str(td.text).strip().replace("\n", ""))

            if tr_count == 3:
                i = 0
                for td in td_list:
                    i += 1
                    if i == 1:
                        date_of_last_hearing = escape_string(str(td.text).strip().replace("\n", ""))
                    if i == 2:
                        date_of_pronouncement = escape_string(str(td.text).strip().replace("\n", ""))

            if tr_count == 4:
                i = 0
                for td in td_list:
                    i += 1
                    if i == 1:
                        date_of_next_hearing = escape_string(str(td.text).strip().replace("\n", ""))
                    if i == 2:
                        order_result = escape_string(str(td.text).strip().replace("\n", ""))

            if tr_count == 5:
                i = 0
                for td in td_list:
                    i += 1
                    if i == 1:
                        date_of_final_hearing = escape_string(str(td.text).strip().replace("\n", ""))

        update_query("UPDATE " + court_name + " SET filed_on = '" + str(filed_on) + "', assessment_year = '" +
                     str(assessment_year) + "', bench_allotted = '" + str(bench_allotted) + "', case_status = '" +
                     str(case_status) + "', date_of_first_hearing = '" +
                     str(date_of_first_hearing) + "', date_of_last_hearing = '" + str(date_of_last_hearing) +
                     "', date_of_next_hearing = '" + str(date_of_next_hearing) + "', date_of_final_hearing = '" +
                     str(date_of_final_hearing) + "', date_of_tribunal_order = '" + str(date_of_tribunal_order) +
                     "', date_of_pronouncement = '" + str(date_of_pronouncement) + "', order_result = '" +
                     str(order_result) + "' WHERE appeal_no = '" + str(appeal_no) + "'")

    except Exception as e:
        logging.error("Failed to parse the details html: %s", e)

コード例 #20

0

ファイルを表示

def request_data(court_name, start_date, end_date_):
    try:
        url = base_url + 'php/getJBJ.php'
        headers = {
            'Content-Type': "application/x-www-form-urlencoded; charset=UTF-8",
            'Cache-Control': "no-cache"
        }

        i = 0
        while True:
            i += 1

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            end_date = (
                datetime.datetime.strptime(str(start_date), "%d-%m-%Y") +
                datetime.timedelta(days=30)).strftime("%d-%m-%Y")

            if datetime.datetime.strptime(end_date_, "%d-%m-%Y") + datetime.timedelta(days=30) < \
                    datetime.datetime.strptime(str(end_date), "%d-%m-%Y"):
                logging.error("END date Exceed.")
                break

            update_query("UPDATE Tracker SET Start_Date = '" +
                         str(start_date) + "', End_Date = '" + str(end_date) +
                         "' WHERE Name = '" + str(court_name) + "'")

            payload = "jorrop=J" \
                      "&JBJfrom_date=" + str(start_date) + \
                      "&JBJto_date=" + str(end_date)

            response = requests.request("POST",
                                        url,
                                        data=payload,
                                        headers=headers,
                                        verify=False,
                                        proxies=proxy_dict)
            res = response.text

            if "no data found" in res.lower():
                logging.error("NO data Found for start date: " +
                              str(start_date))
                update_query(
                    "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                    + str(court_name) + "'")
                start_date = end_date
                continue

            if not parse_html(res, court_name):
                logging.error("Failed to parse data from date: " +
                              str(start_date))

            start_date = end_date

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)

        return False

コード例 #21

0

ファイルを表示

def request_data(court_name, dc, headers, start_date, end_date_):
    try:
        url = base_url + "/juddt1.php"

        i = 0
        while True:
            i += 1

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            end_date = (
                datetime.datetime.strptime(str(start_date), "%d/%m/%Y") +
                datetime.timedelta(days=1)).strftime("%d/%m/%Y")

            if datetime.datetime.strptime(str(end_date_), "%d/%m/%Y") + datetime.timedelta(days=1) < \
                    datetime.datetime.strptime(str(end_date), "%d/%m/%Y"):
                logging.error("DONE")
                break

            update_query("UPDATE Tracker SET Start_Date = '" +
                         str(start_date) + "', End_Date = '" + str(end_date) +
                         "' WHERE Name = '" + str(court_name) + "'")

            querystring = {"dc": str(dc), "fflag": "1"}
            payload = "juddt=" + str(start_date) + "&Submit=Submit"

            response = requests.request("POST",
                                        url,
                                        data=payload,
                                        headers=headers,
                                        params=querystring,
                                        proxies=proxy_dict)
            res = response.text

            if "NO ROWS" in res.upper():
                logging.error("NO data Found for start date: " +
                              str(start_date))
                update_query(
                    "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                    + str(court_name) + "'")

                start_date = end_date
                continue

            if not offset_link(res, headers, court_name, dc):
                logging.error("Failed to parse data from date: " +
                              str(start_date))

            start_date = end_date

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False