def offset_link(html_str, url, querystring, court_name):
    try:
        if not parse_html(html_str, court_name):
            return False

        querystring['sort_by'] = "1"
        querystring['etal'] = "-1"

        soup = BeautifulSoup(html_str, "html.parser")
        div_tag = soup.find_all('div', {'class': 'browse_range'})[0]

        total_records = int(re.findall('\d+', str(div_tag.text))[-1])
        total_calls = ceil(total_records/200)

        next_num = 0
        for page_link in range(0, total_calls):
            next_num += 200

            emergency_exit = select_one_query("SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            querystring['offset'] = str(next_num)
            response = requests.request("GET", url, headers=headers, params=querystring, proxies=proxy_dict)
            res = response.text

            if not parse_html(res, court_name):
                logging.error("Failed for url: " + str(next_num))
                return False

        return True
    except Exception as e:
        logging.error("Error in offset_link. %s", e)
        return False
Example #2
0
def request_data(court_name, bench, start_date, end_date_):
    try:
        url = base_url + "/tribunalorders"
        headers = {
            'Content-Type': "application/x-www-form-urlencoded",
            'Cache-Control': "no-cache"
        }
        i = 0
        while True:
            i += 1

            emergency_exit = select_one_query("SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            end_date = (datetime.datetime.strptime(str(start_date), "%d/%m/%Y") + datetime.timedelta(days=1)
                        ).strftime("%d/%m/%Y")

            if datetime.datetime.strptime(str(end_date_), "%d/%m/%Y") + datetime.timedelta(days=1) < \
                    datetime.datetime.strptime(str(end_date), "%d/%m/%Y"):
                logging.error("DONE")
                break

            update_query("UPDATE Tracker SET Start_Date = '" + str(start_date) + "', End_Date = '" +
                         str(end_date) + "' WHERE Name = '" + str(court_name) + "'")

            payload = "bench=" + str(bench) + \
                      "&appeal_type=" \
                      "&hearingdate=" \
                      "&pronouncementdate=" \
                      "&orderdate=" + str(start_date) + \
                      "&member=" \
                      "&assesseename="

            response = requests.request("POST", url, data=payload, headers=headers, verify=False, proxies=proxy_dict)
            res = response.text

            if res is None:
                logging.error("NO data Found.")
                update_query("UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" +
                             str(court_name) + "'")

                start_date = end_date
                continue

            if not parse_html(res, court_name, bench):
                logging.error("Failed to parse data from bench: " + str(bench))

            start_date = end_date

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from bench: " + str(bench))
        logging.error("Failed to request: %s", e)
        return False
def request_data(court_name, headers, start_date, end_date_):
    try:
        url = base_url + "coram-reported-judgment.php"

        i = 0
        while True:
            i += 1

            emergency_exit = select_one_query("SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            end_date = (datetime.datetime.strptime(str(start_date), "%d-%m-%Y") + datetime.timedelta(days=1)
                        ).strftime("%d-%m-%Y")

            if datetime.datetime.strptime(str(end_date_), "%d-%m-%Y") + datetime.timedelta(days=1) < \
                    datetime.datetime.strptime(str(end_date), "%d-%m-%Y"):
                logging.error("DONE")
                break

            update_query("UPDATE Tracker SET Start_Date = '" + str(start_date) + "', End_Date = '" +
                         str(end_date) + "' WHERE Name = '" + str(court_name) + "'")

            payload = "coram=0" \
                      "&ojtype=1" \
                      "&bench_type=0" \
                      "&reported=Y" \
                      "&startdate=" + str(start_date) + \
                      "&enddate=" + str(end_date) + \
                      "&coramqueryreported=0"

            response = requests.request("POST", url, data=payload, headers=headers, proxies=proxy_dict)
            res = response.text

            if "NO ROWS" in res.upper():
                logging.error("NO data Found for start date: " + str(start_date))
                update_query("UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" +
                             str(court_name) + "'")

                start_date = end_date
                continue

            if not offset_link(res, payload, court_name, headers):
                logging.error("Failed to parse data from date: " + str(start_date))

            start_date = end_date

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False
Example #4
0
def request_data(court_name, start_date, end_date_):
    try:
        headers = {
            'Cache-Control': "no-cache",
        }

        if int(start_date[-2:]) < 10:
            update_query("UPDATE Tracker SET status = 'IN_NO_DATA_FOUND', emergency_exit=true WHERE Name = '" +
                         str(court_name) + "'")
            if int(end_date_[-2:]) < 10:
                update_history_tracker(court_name)
                return True

        for month_year in month_list_([str(start_date), str(end_date_)]):
            month_year = date_fix(month_year)

            emergency_exit = select_one_query("SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            url = base_url + "JDMT" + str(month_year) + ".html"

            update_query("UPDATE Tracker SET Start_Date = '" + str(month_year) + "', End_Date = '" +
                         str(end_date_) + "' WHERE Name = '" + str(court_name) + "'")

            response = requests.request("GET", url, headers=headers, proxies=proxy_dict)
            res = response.text

            if "file or directory not found" in res.upper():
                logging.error("NO data Found for start date: " + str(month_year))

                update_query("UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" +
                             str(court_name) + "'")

                continue

            if str(month_year[-2:]) == '10' or str(month_year) == 'Jan11':
                if not parse_html(res, court_name, True):
                    logging.error("Failed to parse data from date: " + str(month_year))
            else:
                if not parse_html(res, court_name, False):
                    logging.error("Failed to parse data from date: " + str(month_year))

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)

        return False
Example #5
0
def request_data(court_name, bench, start_date, end_date_):
    try:
        for year in range(start_date, end_date_ + 1):
            if int(year) < 2010 or int(year) > 2016:
                logging.error("NO data Found for start date: " +
                              str(start_date))
                update_query(
                    "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                    + str(court_name) + "'")
                continue

            section_types = ['111_111_A', '397_398', 'Others']
            for section_type in section_types:

                child_url = str(bench) + '/' + str(year) + '/' + str(
                    section_type) + '/'
                url = base_url + child_url + 'index.html'

                emergency_exit = select_one_query(
                    "SELECT emergency_exit FROM Tracker WHERE Name='" +
                    court_name + "'")
                if emergency_exit['emergency_exit'] == 1:
                    update_history_tracker(court_name)
                    return True

                update_query("UPDATE Tracker SET Start_Date = '" + str(year) +
                             "', End_Date = '" + str(year) +
                             "' WHERE Name = '" + str(court_name) + "'")

                response = requests.request("GET", url, proxies=proxy_dict)
                res = response.text

                if res is None:
                    logging.error("NO data Found for year: " + str(year))
                    update_query(
                        "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                        + str(court_name) + "'")
                    continue

                if not parse_html(res, court_name, bench, child_url):
                    logging.error("Failed to parse data for year: " +
                                  str(year))

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False
def request_data(court_name, start_date, end_date_):
    try:
        if int(start_date) < 2012:
            update_query("UPDATE Tracker SET status = 'IN_NO_DATA_FOUND', emergency_exit=true WHERE Name = '" +
                         str(court_name) + "'")
            if int(end_date_) < 2012:
                update_history_tracker(court_name)
                return True

        for year_ in range(int(start_date), int(end_date_) + 1):
            emergency_exit = select_one_query("SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            if int(year_) == 2018:
                year_ = ''

            url = base_url + "DecisionsHeadline" + str(year_) + ".html"

            update_query("UPDATE Tracker SET Start_Date = '" + str(year_) + "', End_Date = '" + str(end_date_) +
                         "' WHERE Name = '" + str(court_name) + "'")

            response = requests.request("GET", url, proxies=proxy_dict)
            res = response.text

            if "file or directory not found" in res.lower():
                logging.error("NO data Found for start date: " + str(year_))

                update_query("UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" +
                             str(court_name) + "'")

                continue

            if not parse_html(res, court_name):
                logging.error("Failed to parse data from date: " + str(year_))

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False
Example #7
0
def start_scrap():
    court_name = request.form['court_name']
    bench = request.form['bench']
    start_date = request.form['start_date']
    end_date = request.form['end_date']

    update_query(
        "UPDATE Tracker SET status='IN_CANCELLED', emergency_exit=true WHERE status='IN_RUNNING'"
    )
    update_query(
        "UPDATE Tracker SET status='IN_RUNNING', emergency_exit=false, No_Cases=0, No_Year_NoData=0, "
        "No_Year_Error=0, No_Error=0, Start_Date='" + start_date +
        "', End_Date='" + end_date + "', bench='" + str(bench) +
        "' WHERE Name='" + court_name + "'")

    res = court_controller(court_name, bench, start_date, end_date)
    update_query(
        "UPDATE Tracker SET status = 'IN_BUCKET_TRANSFER' WHERE Name = '" +
        str(court_name) + "'")

    for filename in glob(
            "/home/karaa_krypt/CourtScrappingWebApp/Data_Files/PDF_Files/" +
            str(court_name) + "*.pdf"):
        if transfer_to_bucket('PDF_Files', filename):
            os.remove(filename)

    for filename in glob(
            "/home/karaa_krypt/CourtScrappingWebApp/Data_Files/Text_Files/" +
            str(court_name) + "*.txt"):
        if transfer_to_bucket('Text_Files', filename):
            os.remove(filename)

    if res:
        update_query(
            "UPDATE Tracker SET status = 'IN_SUCCESS', emergency_exit=true WHERE Name = '"
            + str(court_name) + "'")
    else:
        update_query(
            "UPDATE Tracker SET No_Year_Error = No_Year_Error + 1, status = 'IN_FAILED', "
            "emergency_exit=true WHERE Name = '" + str(court_name) + "'")

    update_history_tracker(court_name)
    return jsonify(res)
def request_data_old(court_name, start_date, end_date):
    try:
        url = base_url + "/judgments/browse"

        update_query("UPDATE Tracker SET Start_Date = '" + start_date + "', End_Date = '" + end_date +
                     "' WHERE Name = '" + str(court_name) + "'")

        querystring = {"type": "reported", "value": "Reportable", "sort_by": "1", "order": "ASC", "rpp": "357",
                       "etal": "0", "submit_browse": "Update"}

        response = requests.request("GET", url, headers=headers, params=querystring, proxies=proxy_dict)
        res = response.text

        if "NO ROWS" in res.upper():
            update_query("UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" +
                         str(court_name) + "'")

        if not parse_html(res, court_name):
            logging.error("Failed to parse data old")

        update_query("UPDATE Tracker SET status = 'IN_SUCCESS', emergency_exit=true WHERE Name = '" +
                     str(court_name) + "'")
        update_history_tracker(court_name)

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)

        update_query("UPDATE Tracker SET No_Year_Error = No_Year_Error + 1, status = 'IN_FAILED' WHERE Name = '" +
                     str(court_name) + "'")
        update_history_tracker(court_name)

        return False
Example #9
0
def request_data(court_name, dc, headers, start_date, end_date_):
    try:
        url = base_url + "/juddt1.php"

        i = 0
        while True:
            i += 1

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            end_date = (
                datetime.datetime.strptime(str(start_date), "%d/%m/%Y") +
                datetime.timedelta(days=1)).strftime("%d/%m/%Y")

            if datetime.datetime.strptime(str(end_date_), "%d/%m/%Y") + datetime.timedelta(days=1) < \
                    datetime.datetime.strptime(str(end_date), "%d/%m/%Y"):
                logging.error("DONE")
                break

            update_query("UPDATE Tracker SET Start_Date = '" +
                         str(start_date) + "', End_Date = '" + str(end_date) +
                         "' WHERE Name = '" + str(court_name) + "'")

            querystring = {"dc": str(dc), "fflag": "1"}
            payload = "juddt=" + str(start_date) + "&Submit=Submit"

            response = requests.request("POST",
                                        url,
                                        data=payload,
                                        headers=headers,
                                        params=querystring,
                                        proxies=proxy_dict)
            res = response.text

            if "NO ROWS" in res.upper():
                logging.error("NO data Found for start date: " +
                              str(start_date))
                update_query(
                    "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                    + str(court_name) + "'")

                start_date = end_date
                continue

            if not offset_link(res, headers, court_name, dc):
                logging.error("Failed to parse data from date: " +
                              str(start_date))

            start_date = end_date

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False
def request_data(court_name, court_id, start_date, end_date_):
    try:
        url = base_url + "dtquery_new_v1.asp"
        headers = {
            'Content-Type': "application/x-www-form-urlencoded",
            'Cache-Control': "no-cache"
        }

        i = 0
        while True:
            i += 1

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            end_date = (
                datetime.datetime.strptime(str(start_date), "%d/%m/%Y") +
                datetime.timedelta(days=180)).strftime("%d/%m/%Y")

            if datetime.datetime.strptime(end_date_, "%d/%m/%Y") + datetime.timedelta(days=180) < \
                    datetime.datetime.strptime(str(end_date), "%d/%m/%Y"):
                logging.error("DONE")
                break

            update_query("UPDATE Tracker SET Start_Date = '" +
                         str(start_date) + "', End_Date = '" + str(end_date) +
                         "' WHERE Name = '" + str(court_name) + "'")

            payload = "action=validate_login" \
                      "&Court_Id=" + str(court_id) + \
                      "&party=jus" \
                      "&FromDt=" + str(start_date) + \
                      "&ToDt=" + str(end_date)

            response = requests.request("POST",
                                        url,
                                        data=payload,
                                        headers=headers,
                                        proxies=proxy_dict)
            res = response.text

            if "no data found" in res.lower():
                logging.error("NO data Found for start date: " +
                              str(start_date))
                update_query(
                    "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                    + str(court_name) + "'")

                start_date = end_date
                continue

            if not parse_html(res, court_name, court_id):
                logging.error("Failed to parse data from date: " +
                              str(start_date))

            start_date = end_date

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False
def request_data(court_name, bench, headers, start_date, end_date_):
    try:
        url = base_url + '/' + str(bench) + "/services/judgement_status.php"

        i = 0
        while True:
            i += 1

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            end_date = (
                datetime.datetime.strptime(str(start_date), "%Y-%m-%d") +
                datetime.timedelta(days=30)).strftime("%Y-%m-%d")

            if datetime.datetime.strptime(str(end_date_), "%Y-%m-%d") + datetime.timedelta(days=30) < \
                    datetime.datetime.strptime(str(end_date), "%Y-%m-%d"):
                logging.error("DONE")
                break

            update_query("UPDATE Tracker SET Start_Date = '" +
                         str(start_date) + "', End_Date = '" + str(end_date) +
                         "' WHERE Name = '" + str(court_name) + "'")

            payload = "case_no=" \
                      "&case_type=0" \
                      "&case_year=" \
                      "&filing_no=" \
                      "&from_date=" \
                      "&from_date1=" + str(start_date) + \
                      "&judge_detail=0" \
                      "&search_type=3" \
                      "&to_date=" \
                      "&to_date1=" + str(end_date) + \
                      "&txtState=" \
                      "&txtSubject="

            response = requests.request("POST",
                                        url,
                                        data=payload,
                                        headers=headers,
                                        proxies=proxy_dict)
            res = response.text

            if res is None:
                logging.error("NO data Found for start date: " +
                              str(start_date))
                update_query(
                    "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                    + str(court_name) + "'")

                start_date = end_date
                continue

            if not parse_html(res, court_name, bench, start_date):
                logging.error("Failed to parse data from date: " +
                              str(start_date))

            start_date = end_date

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False
def request_data(court_name, headers, start_date, end_date_):
    try:
        url = base_url + "/home.php"

        i = 0
        while True:
            i += 1

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            end_date = (
                datetime.datetime.strptime(str(start_date), "%d/%m/%Y") +
                datetime.timedelta(days=1)).strftime("%d/%m/%Y")

            if datetime.datetime.strptime(end_date_, "%d/%m/%Y") + datetime.timedelta(days=1) < \
                    datetime.datetime.strptime(str(end_date), "%d/%m/%Y"):
                logging.error("DONE")
                break

            update_query("UPDATE Tracker SET Start_Date = '" +
                         str(start_date) + "', End_Date = '" + str(end_date) +
                         "' WHERE Name = '" + str(court_name) + "'")

            querystring = {"search_param": "free_text_search_judgment"}

            payload = "t_case_type=" \
                      "&t_case_year=" \
                      "&submit=Search%20Case" \
                      "&from_date=" + str(start_date) + \
                      "&to_date=" + str(end_date) + \
                      "&pet_name=" \
                      "&res_name=" \
                      "&free_text=Justice"

            response = requests.request("POST",
                                        url,
                                        data=payload,
                                        headers=headers,
                                        params=querystring,
                                        verify=False,
                                        proxies=proxy_dict)
            res = response.text

            if "no data found" in res.lower():
                logging.error("NO data Found for start date: " +
                              str(start_date))
                update_query(
                    "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                    + str(court_name) + "'")
                sleep(2)

                start_date = end_date
                continue

            if not offset_link(res, headers, court_name):
                logging.error("Failed to parse data from date: " +
                              str(start_date))

            start_date = end_date

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False
Example #13
0
def request_data(court_name, start_date, end_date_):
    try:
        headers = {
            'Content-Type': "application/x-www-form-urlencoded",
            'Cache-Control': "no-cache",
        }
        url = base_url + '/judgementsdetails.asp'

        appeal_types = [
            'NDPS/FPA/ND', 'PMLA/FPA-PMLA', 'SAFEMA/FPA-1', 'FPA/BP',
            'FEMA/FERA/FPA-FE'
        ]

        if int(start_date[-4:]) < 2013:
            update_query(
                "UPDATE Tracker SET status = 'IN_NO_DATA_FOUND', emergency_exit=true WHERE Name = '"
                + str(court_name) + "'")
            if int(end_date_[-4:]) < 2013:
                update_history_tracker(court_name)
                return True

        for month_year in month_list_([str(start_date), str(end_date_)]):
            for appeal_type in appeal_types:
                emergency_exit = select_one_query(
                    "SELECT emergency_exit FROM Tracker WHERE Name='" +
                    court_name + "'")
                if emergency_exit['emergency_exit'] == 1:
                    update_history_tracker(court_name)
                    return True

                update_query("UPDATE Tracker SET Start_Date = '" +
                             str(month_year) + "', End_Date = '" +
                             str(month_year) + "' WHERE Name = '" +
                             str(court_name) + "'")

                payload = "ACTAPPEALTYPE=" + appeal_type + \
                          "&DDMONTH=" + str(month_year[:-4]) + \
                          "&DDYEAR=" + str(month_year[-4:])

                response = requests.request("POST",
                                            url,
                                            data=payload,
                                            headers=headers,
                                            proxies=proxy_dict)
                res = response.text

                if 'there are no records at present' in res.lower():
                    logging.error("NO data Found for year: " + str(month_year))
                    update_query(
                        "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                        + str(court_name) + "'")
                    continue

                if not parse_html(res, court_name, appeal_type):
                    logging.error("Failed to parse data for year: " +
                                  str(month_year))

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False
Example #14
0
def request_data(court_name, start_date, end_date_):
    try:
        url = base_url + 'php/getJBJ.php'
        headers = {
            'Content-Type': "application/x-www-form-urlencoded; charset=UTF-8",
            'Cache-Control': "no-cache"
        }

        i = 0
        while True:
            i += 1

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            end_date = (
                datetime.datetime.strptime(str(start_date), "%d-%m-%Y") +
                datetime.timedelta(days=30)).strftime("%d-%m-%Y")

            if datetime.datetime.strptime(end_date_, "%d-%m-%Y") + datetime.timedelta(days=30) < \
                    datetime.datetime.strptime(str(end_date), "%d-%m-%Y"):
                logging.error("END date Exceed.")
                break

            update_query("UPDATE Tracker SET Start_Date = '" +
                         str(start_date) + "', End_Date = '" + str(end_date) +
                         "' WHERE Name = '" + str(court_name) + "'")

            payload = "jorrop=J" \
                      "&JBJfrom_date=" + str(start_date) + \
                      "&JBJto_date=" + str(end_date)

            response = requests.request("POST",
                                        url,
                                        data=payload,
                                        headers=headers,
                                        verify=False,
                                        proxies=proxy_dict)
            res = response.text

            if "no data found" in res.lower():
                logging.error("NO data Found for start date: " +
                              str(start_date))
                update_query(
                    "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                    + str(court_name) + "'")
                start_date = end_date
                continue

            if not parse_html(res, court_name):
                logging.error("Failed to parse data from date: " +
                              str(start_date))

            start_date = end_date

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)

        return False
Example #15
0
def request_data(court_name, start_date, end_date_):
    try:
        if int(start_date[-4:]) < 2010:
            update_query(
                "UPDATE Tracker SET status = 'IN_NO_DATA_FOUND', emergency_exit=true WHERE Name = '"
                + str(court_name) + "'")
            if int(end_date_[-4:]) < 2010:
                update_history_tracker(court_name)
                return True

        for month_year in month_list_([str(start_date), str(end_date_)]):
            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            data = {
                'ctl00$CPHBody$DropDownListYear':
                str(month_year[-4:]),
                'ctl00$CPHBody$DropDownListMonth':
                str(month_year[:-4]).lstrip("0"),
                'ctl00$CPHBody$TextBox1':
                '',
                'ctl00$CPHBody$SM1':
                'ctl00$CPHBody$SM1|ctl00$CPHBody$DropDownListMonth'
            }

            with requests.Session() as s:
                page = s.get(base_url + 'judgement.aspx')
                soup = BeautifulSoup(page.content, "html.parser")

                data["__VIEWSTATE"] = soup.select_one("#__VIEWSTATE")["value"]
                data["__VIEWSTATEGENERATOR"] = soup.select_one(
                    "#__VIEWSTATEGENERATOR")["value"]
                data["__EVENTVALIDATION"] = soup.select_one(
                    "#__EVENTVALIDATION")["value"]

                update_query("UPDATE Tracker SET Start_Date = '" +
                             str(month_year) + "' WHERE Name = '" +
                             str(court_name) + "'")

                response = s.post(base_url + 'judgement.aspx', data=data)
                res = response.text

                if "no records were found." in res.lower(
                ) or "application error" in res.lower():
                    logging.error("NO data Found for start date: " +
                                  str(month_year))
                    update_query(
                        "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                        + str(court_name) + "'")
                    continue

                if not parse_html(res, court_name):
                    logging.error("Failed to parse data")

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False
def request_data(headers, start_date, end_date_):
    try:
        url = base_url + "/ByDate.php"

        i = 0
        while True:
            i += 1

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            end_date = (
                datetime.datetime.strptime(str(start_date), "%d-%m-%Y") +
                datetime.timedelta(days=180)).strftime("%d-%m-%Y")

            if datetime.datetime.strptime(str(end_date_), "%d-%m-%Y") + datetime.timedelta(days=180) < \
                    datetime.datetime.strptime(str(end_date), "%d-%m-%Y"):
                logging.error("DONE")
                break

            update_query("UPDATE Tracker SET Start_Date = '" +
                         str(start_date) + "', End_Date = '" + str(end_date) +
                         "' WHERE Name = '" + str(court_name) + "'")

            payload = "date_day=" + str(start_date[0:2]).replace("0", "") + \
                      "&date_month=" + str(start_date[3:5]).replace("0", "") + \
                      "&date_year=" + str(start_date[6:]) + \
                      "&date_day1=" + str(end_date[0:2]).replace("0", "") + \
                      "&date_month1=" + str(end_date[3:5]).replace("0", "") + \
                      "&date_year1=" + str(end_date[6:]) + \
                      "&submit=Submit"

            response = requests.request("POST",
                                        url,
                                        data=payload,
                                        headers=headers,
                                        proxies=proxy_dict)
            res = response.text

            if "invalid inputs given" in res.lower():
                logging.error("NO data Found for start date: " +
                              str(start_date))
                update_query(
                    "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                    + str(court_name) + "'")

                start_date = end_date
                continue

            if not offset_link(res, headers):
                logging.error("Failed to parse data from date: " +
                              str(start_date))

            start_date = end_date

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False
Example #17
0
def request_data(court_name, start_date, end_date_):
    try:
        url = base_url + "/hcs/hcourt/hg_judgement_search"
        headers = {
            'Content-Type': "application/x-www-form-urlencoded",
            'Accept': "application/json",
            'Cache-Control': "no-cache"
        }

        if int(start_date[-2:]) < 11:
            update_query(
                "UPDATE Tracker SET status = 'IN_NO_DATA_FOUND', emergency_exit=true WHERE Name = '"
                + str(court_name) + "'")
            if int(end_date_[-2:]) < 11:
                update_history_tracker(court_name)
                return True

        for month_year in month_list_([str(start_date), str(end_date_)]):
            year = int(month_year[-2:]) - 10

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            update_query("UPDATE Tracker SET Start_Date = '" +
                         str(month_year) + "', End_Date = '" + str(end_date_) +
                         "' WHERE Name = '" + str(court_name) + "'")

            querystring = {"ajax_form": "1", "_wrapper_format": "drupal_ajax"}

            payload = "form_build_id=form-BS37MKVfuGmv9fgHWUqr3U9nFCjolonq-Nnenj3Ks24" \
                      "&form_id=ajax_example_form" \
                      "&ordermonth=" + str(month_year[:-2]).lstrip("0") + \
                      "&orderyear=" + str(year) + \
                      "&_triggering_element_name=op" \
                      "&_triggering_element_value=Search" \
                      "&_drupal_ajax=1" \
                      "&ajax_page_state%5Btheme%5D=mytheme" \
                      "&ajax_page_state%5Btheme_token%5D=%20" \
                      "&ajax_page_state%5Blibraries%5D=asset_injector%2Fcss%2Fanimation_accordin%2Casset_injector" \
                      "%2Fcss%2Fside_bar%2Casset_injector%2Fcss%2Ftable%2Casset_injector%2Fjs%2Fseperate_tab_%2C" \
                      "core%2Fdrupal.ajax%2Ccore%2Fhtml5shiv%2Ccore%2Fjquery.form%2Cmytheme%2Fmylibrarynew%2C" \
                      "system%2Fbase%2Cviews%2Fviews.module"

            response = requests.request("POST",
                                        url,
                                        data=payload,
                                        headers=headers,
                                        params=querystring,
                                        proxies=proxy_dict)

            json_res = json.loads(response.text)
            res = None
            for json_r in json_res:
                if "data" in json_r:
                    res = BeautifulSoup(str(json_r['data']), "html.parser")
                    break

            if res is None:
                logging.error("NO data Found for start date: " +
                              str(month_year))
                update_query(
                    "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                    + str(court_name) + "'")
                continue

            if not parse_html(res, court_name):
                logging.error("Failed to parse data from date: " +
                              str(month_year))

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False
Example #18
0
def request_data(court_name, m_sideflg, start_date, end_date_):
    try:
        url = base_url + "ordqryrepact_action.php"
        headers = {
            'Content-Type': "application/x-www-form-urlencoded",
            'Cache-Control': "no-cache"
        }

        i = 0
        while True:
            i += 1

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            end_date = (
                datetime.datetime.strptime(str(start_date), "%d-%m-%Y") +
                datetime.timedelta(days=180)).strftime("%d-%m-%Y")

            if datetime.datetime.strptime(end_date_, "%d-%m-%Y") + datetime.timedelta(days=180) < \
                    datetime.datetime.strptime(str(end_date), "%d-%m-%Y"):
                logging.error("DONE")
                break

            update_query("UPDATE Tracker SET Start_Date = '" +
                         str(start_date) + "', End_Date = '" + str(end_date) +
                         "' WHERE Name = '" + str(court_name) + "'")

            payload = "pageno=1" \
                      "&frmaction=" \
                      "&m_sideflg=" + str(m_sideflg) + \
                      "&actcode=0" \
                      "&frmdate=" + str(start_date) + \
                      "&todate=" + str(end_date)

            response = requests.request("POST",
                                        url,
                                        data=payload,
                                        headers=headers)
            res = response.text

            if "invalid inputs given" in res.lower():
                logging.error("NO data Found for start date: " +
                              str(start_date))
                update_query(
                    "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                    + str(court_name) + "'")

                start_date = end_date
                continue

            if not parse_html(res, court_name, m_sideflg):
                logging.error("Failed to parse data from date: " +
                              str(start_date))

            start_date = end_date

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False
def request_data(court_name, start_date, end_date_):
    try:
        url = base_url + "date_JQ.asp"
        headers = {
            'Content-Type': "application/x-www-form-urlencoded",
            'Cache-Control': "no-cache"
        }

        i = 0
        while True:
            i += 1

            emergency_exit = select_one_query(
                "SELECT emergency_exit FROM Tracker WHERE Name='" +
                court_name + "'")
            if emergency_exit['emergency_exit'] == 1:
                update_history_tracker(court_name)
                return True

            end_date = (
                datetime.datetime.strptime(str(start_date), "%d-%m-%Y") +
                datetime.timedelta(days=1)).strftime("%d-%m-%Y")

            if datetime.datetime.strptime(end_date_, "%d-%m-%Y") + datetime.timedelta(days=1) < \
                    datetime.datetime.strptime(str(end_date), "%d-%m-%Y"):
                logging.error("DONE")
                break

            update_query("UPDATE Tracker SET Start_Date = '" +
                         str(start_date) + "', End_Date = '" + str(end_date) +
                         "' WHERE Name = '" + str(court_name) + "'")

            payload = "txtday=" + str(start_date[0:2]).lstrip('0') + \
                      "&txtmonth=" + str(start_date[3:5]).lstrip('0') +  \
                      "&txtyear=" + str(start_date[-4:])

            response = requests.request("POST",
                                        url,
                                        data=payload,
                                        headers=headers,
                                        proxies=proxy_dict)
            res = response.text

            if "no judgement found for your search" in res.lower():
                logging.error("NO data Found for start date: " +
                              str(start_date))
                update_query(
                    "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                    + str(court_name) + "'")

                start_date = end_date
                continue

            if not parse_html(res, court_name):
                logging.error("Failed to parse data from date: " +
                              str(start_date))

            start_date = end_date

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False
Example #20
0
def request_data(court_name, headers, start_date, end_date_):
    try:
        url = base_url + '/php/hc/judgement/judgement_pro_all.php'

        i = 0
        while True:
            i += 1

            end_date = (
                datetime.datetime.strptime(str(start_date), "%d-%m-%Y") +
                datetime.timedelta(days=1)).strftime("%d-%m-%Y")

            if datetime.datetime.strptime(str(end_date_), "%d-%m-%Y") + datetime.timedelta(days=1) < \
                    datetime.datetime.strptime(str(end_date), "%d-%m-%Y"):
                logging.error("END date Exceed.")
                break

            benches = ['IND', 'JBP', 'GWL']
            for bench in benches:
                emergency_exit = select_one_query(
                    "SELECT emergency_exit FROM Tracker WHERE Name='" +
                    court_name + "'")
                if emergency_exit['emergency_exit'] == 1:
                    update_history_tracker(court_name)
                    return True

                update_query("UPDATE Tracker SET Start_Date = '" +
                             str(start_date) + "', End_Date = '" +
                             str(end_date) + "' WHERE Name = '" +
                             str(court_name) + "'")

                payload = "lst_judge=0" \
                          "&lst_pet=" \
                          "&txtparty=" \
                          "&lst_counsel=" \
                          "&txtcounsel=" \
                          "&date1=" + str(start_date) + \
                          "&date2=" + str(end_date) + \
                          "&court=" + str(bench) + \
                          "&lst_judge1=0" \
                          "&lst_judge2=0" \
                          "&btn_search=is" \
                          "&bench=" \
                          "&sort=jo" \
                          "&ad=DESC" \
                          "&code="

                if int(end_date[-4:]) <= 2014 and int(start_date[-4:]) <= 2014:
                    payload += "&onlyafr=N"
                else:
                    payload += "&onlyafr=Y"

                response = requests.request("POST",
                                            url,
                                            data=payload,
                                            headers=headers,
                                            proxies=proxy_dict)
                res = response.text

                if "no jugdement or order found that you want to search" in res.lower(
                ):
                    logging.error("NO data Found for start date: " +
                                  str(start_date))
                    update_query(
                        "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '"
                        + str(court_name) + "'")
                    sleep(2)

                    start_date = end_date
                    continue

                if not parse_html(res, court_name, bench):
                    logging.error("Failed to parse data from date: " +
                                  str(start_date))

            start_date = end_date

        return True

    except Exception as e:
        traceback.print_exc()
        logging.error("Failed to get data from date: " + str(start_date))
        logging.error("Failed to request: %s", e)
        return False