def concat_xlsx_files_from_folder(xls_folder): files_list = os.listdir(os.path.join(project_folder, xls_folder)) df_xlsx_list = [] for file in files_list: if ".xlsx" in file: df_xlsx = load_some_xlsx(file, os.path.join(project_folder, xls_folder)) df_xlsx["file_name"] = file[:-5] print(df_xlsx) df_xlsx_list.append(df_xlsx) df_xlsxs = pd.concat(df_xlsx_list, ignore_index=True) return df_xlsxs
def clean_tenders_names(dataframe): """ clean tender's and lot's names """ replace_patterns = load_some_xlsx("replace_names_patterns.xlsx", folder="actual_funcs") replace_patterns.fillna('', inplace=True) for f, r in zip(replace_patterns["Find"], replace_patterns["Replace"]): dataframe["Название тендера и лота"] = dataframe["Название тендера и лота"].str.replace( pat=f, repl=r, case=False ) dataframe["Название тендера и лота"] = dataframe["Название тендера и лота"].str.capitalize() return dataframe
from non_actual_funcs.concat_xlsx_files import concat_xlsx_files_from_folder from libs.db_libs.write import write_some_xlsx from libs.db_libs.load import load_some_xlsx #cnct = concat_xlsx_files_from_folder("test_xlsxs") #write_some_xlsx(cnct, "plans_new.xlsx") from actual_funcs.set_hyperlinks import set_hyperlinlks_in_excel_col hyperlinks_adresses = load_some_xlsx( "планы.xlsx", folder="")["Номер тендера в системе бикотендер"].tolist() hyperlinks_adresses = [str(adr) for adr in hyperlinks_adresses] set_hyperlinlks_in_excel_col( hyperlinks_adresses, hyperlinks_texts=None, xl_col=1, tip="", preffix="http://www.bicotender.ru/tc/tender/show/tender_id/", xls_file="hyperlinks.xlsx", xls_worksheet="hyperlinks", folder="", )
import pandas as pd from libs.db_libs.load import load_some_xlsx from actual_funcs.set_hyperlinks import set_hyperlinlks_in_excel_col xls = load_some_xlsx("с регионами.xlsx", "") links_adresses = xls["Ссылка на поставщика"].tolist() links_texts = xls["Победитель"].tolist() for i in links_adresses: print(i) set_hyperlinlks_in_excel_col( hyperlinks_adresses=links_adresses, hyperlinks_texts=links_texts, #preffix="https://www.bicotender.ru/tc/tender/show/tender_id/", xl_col=1, xls_file="winner_hyper_links.xlsx")
import os from actual_funcs.parse_search_results import download_html, parse_html_tables_from_folder, \ beatify_parsed_tables from libs.db_libs.write import write_some_xlsx from actual_funcs.clean_tenders_and_lots_names import clean_tenders_names from config.config import project_folder from actual_funcs.mark_duplicates import mark_duplicates from libs.db_libs.load import load_some_xlsx my_link = """ https://www.bicotender.ru/crm/analytics/list/?atype=field_1&filter_id=302437&competitorRowNumber[to]=1&showConf[asLot]=2&showConf[competitorFilterMode]=2&submit=1 """ download_html(my_link, results_qty=4000) parsed_df = parse_html_tables_from_folder("html_downloads") beatyfied_df = beatify_parsed_tables(parsed_df) cleaned_names_df = clean_tenders_names(beatyfied_df) marked_duplicates = mark_duplicates(cleaned_names_df) write_some_xlsx(marked_duplicates, os.path.join(project_folder, "marked_duplicates.xlsx"), index=True) print(load_some_xlsx("marked_duplicates.xlsx", ""))