def save_registration_data(start_idx: int) -> list: """ 제품 등록 데이터 수집 후 저장. :param start_idx: 등록정보 페이지 번호 :return: 제품 등록정보 """ try: registration_param["start_idx"] = start_idx req = requests.post(REGISTER_URL, data=registration_param) req.encoding = "utf-8" total_data = [] for js in req.json(): register_no = js["no"] product_report_search_no = js["prdlst_report_ledg_no"] report_num = js["prdlst_report_no"] product_name = js["prdlst_nm"] company_name = js["bssh_nm"] register_date = js["prms_dt"] data = [ register_no, product_report_search_no, report_num, product_name, company_name, register_date ] total_data.append(data) insert_in_db(data=data, conn=CONN, cursor=CURSOR, sql=SQL) return total_data except pymysql.Error as e: print(e)
def save_all_data(sql: str, conn, cursor) -> list: """ 자바스크립트 이벤트로 페이지네이션 한 후 모든 데이터를 디비에 저장. :param cursor: db 커서 :param conn: db 연결 세션 :param sql: sql 쿼리문 :return: all_data or None """ try: all_data = [] while True: page_data = get_current_page_data() print("==========Get Current Page data==============\n") if page_data is None: continue all_data.append(page_data) for data in page_data: insert_in_db(data=data, conn=conn, cursor=cursor, sql=sql) if page_data[0][0] == "1": break conn.close() return all_data except Exception as e: print(e) except pymysql.Error as e: print(e)
def save_in_db(conn, cursor, sql: str): try: for num in REPORT_NUMS: BROWSER = set_chrome_browser(PATH) BROWSER.get(SITE_URL) page = get_product_page(num, BROWSER) product_info = get_product_info(num, page) print(product_info) insert_in_db(data=product_info, conn=conn, cursor=cursor, sql=sql) BROWSER.close() conn.close() except pymysql.Error as e: print(e) except Exception as e: print(e)
def save_material_data(start_idx: int) -> list: """ 제품 등록 데이터 수집 후 저장. :param start_idx: 등록정보 페이지 번호 :return: 제품 등록정보 """ print(f"Start Idx = {start_idx}\n") try: material_param["start_idx"] = start_idx req = requests.post(REGISTER_URL, data=material_param) # req.encoding = "utf8" print(req.json()) total_data = [] for js in req.json()["list"]: material_no = js["ntctxt_no"] print(material_no) material_url = "https://www.foodsafetykorea.go.kr/portal/board/boardDetail.do" material_params = { "menu_no": 2660, "menu_grp": "MENU_NEW01", "copyUrl": "https://www.foodsafetykorea.go.kr:443/portal/board/board.do?menu_no=2660&menu_grp=MENU_NEW01", "bbs_no": "bbs987", "ntctxt_no": material_no, } material_req = requests.post(material_url, material_params) req.encoding = "utf8" soup = BeautifulSoup(material_req.text, "html.parser") material_info = soup.find("p", {"id": "bdt_pre"}).text material_info = material_info.split("○")[1:] print(f"Before Preprocess Data = {material_info}\n") material_info = preprocess_data(material_info) print(f"material_info len = {len(material_info)}") pprint(material_info) insert_in_db(data=material_info, conn=CONN, cursor=CURSOR, sql=SQL) # return total_data except pymysql.Error as e: print(e)
def insert_data(conn, data_file): """We connect to the database and record data.""" check_data = "SELECT * FROM auctions WHERE auct_num = ? AND date_in = ?;" insert_data = "INSERT INTO auctions (auct_num, date_in, date_out, money, \ percent, val_code, stock_code) \ VALUES (?, ?, ?, ?, ?, ?, ?);" data = get_data(data_file) if data: PRIMARY_KEY = slice(0, 2) for row in data: id_exists = get_from_db(conn, check_data, row[PRIMARY_KEY]) if not id_exists: insert_in_db(conn, insert_data, row) if conn.total_changes: conn.commit() # show_report the result. show_report() show_report("Rows in data: {}".format(len(data))) show_report("Total changes: {}".format(conn.total_changes)) show_report()
def save_product_detail_data(detail_keywords: tuple): try: search_no, report_no = detail_keywords product_detail_param = { "prdlstReportLedgNo": search_no, "menu_grp": "MENU_NEW01", "menu_no": 2823, "search_code": "01", "start_idx": "1", } req = requests.post(DETAIL_URL, product_detail_param) soup = BeautifulSoup(req.text, "html.parser") product_info = soup.find_all("td") info = [] for d in product_info[:12]: info.append(d.text) # add materials info.append(get_materials_data(report_no)) insert_in_db(data=info, conn=CONN, cursor=CURSOR, sql=SQL) return info except pymysql.Error as e: print(e)