def create_condiciones(): # path to raw file table_name = "condiciones" file = read_path / (TABLES[table_name] + READ_FILES_FORMAT) df = read_csv(file, usecols=TABLE_COLS[table_name]) # creating pivot id_col = "NUM_SECU_EXPED" condition = "CONDICION" value_col = "VALOR_CONDICION" # test if all values are numbers, if not change by null # df[value_col] = pd.to_numeric(df[value_col], errors="coerce") # getting last repeated record pv_df = df.pivot_table(index=id_col, columns=condition, values=value_col, aggfunc="last") # renaming columns pv_df = pv_df.add_prefix("cond_") # Create new variables pv_df["total_condicion"] = pv_df.sum(axis=1) pv_df["es_gte_5"] = pv_df["total_condicion"] >= 5 # save file tmp_file = Path(TMP_FOLDER) pv_df.reset_index().to_feather(tmp_file / (TABLES[table_name] + ".feather"))
def create_vigabt_polizas(): # path to raw file table_name = "vigabt" file = read_path / (TABLES[table_name] + READ_FILES_FORMAT) df = read_csv(file, usecols=TABLE_COLS[table_name]) # transform dataset df.rename(columns={"ID": "CIF_ID"}, inplace=True) df.drop_duplicates(subset=["NUM_SECU_POL"], keep="last", inplace=True) to_date = [ 'FECHA_PROCESO', 'FECHA_VENC_POL', 'FECHA_VIG_ORIG_POL', 'FECHA_VIG_POL', ] date_format = "%d/%m/%Y" for col in to_date: df[col] = pd.to_datetime(df[col], format=date_format, errors="coerce") df["cambio_cobro"] = (df["COD_COBRO"] != df["COD_COBRO_ANTERIOR"]) & ( df["COD_COBRO_ANTERIOR"].notna()) df["ANTIG_calc"] = (df["FECHA_VIG_POL"] - df["FECHA_VIG_ORIG_POL"]).dt.days # df["CONV_COMISIONARIO"] = df["CONV_COMISIONARIO"].to_string() df["CONV_COMISIONARIO"] = df["CONV_COMISIONARIO"].astype(str) # tmp save file tmp_file = Path(TMP_FOLDER) df.reset_index(drop=True).to_feather(tmp_file / (TABLES[table_name] + ".feather"))
def search_minutes(self): """ Searches through csv file and finds if minutes is in the row and gives it back """ if helpers.read_csv() != "empty": minutes = input( "Type in Number of minutes spent on task to search for it ") counter = 1 for value in helpers.my_dict.keys(): if minutes in helpers.my_dict[value]: print("Title : {}".format(helpers.my_dict[value][0])) print("Date : {}".format(helpers.my_dict[value][1])) print("Minuest Spent : {}".format( helpers.my_dict[value][2])) print("Notes :{} ".format(helpers.my_dict[value][3])) print("{} out of {}".format(counter, helpers.find_counts(minutes))) ask = input("[N]ext [R]eturn menu ") clear() if ask.lower() == "n": counter += 1 elif ask.lower() == "r": break else: input("No more entriies press enter to go back to menu") clear() else: input(" You have no entries press return to go back to menu") clear()
def range_dates(self): """ Searches through csv file and finds all tasks between two dates and gives it back """ if helpers.read_csv() != "empty": counter = 1 first_date = helpers.add_date()[1] print("Now second Date") second_date = helpers.add_date()[1] for value in helpers.my_dict.keys(): if first_date <= datetime.strptime(helpers.my_dict[value][1], "%m/%d/%Y") <= second_date: self.print_statement(value) print("{} out of {}".format( counter, helpers.find_range(first_date, second_date))) ask = input("[N]ext [E]dit [R]eturn menu ") clear() if ask.lower() == "n": counter += 1 elif ask.lower() == "r": break else: input("No more entriies press enter to go back to menu") clear() else: input(" You have no entries press return to go back to menu") clear()
def create_siniestros(): # path to raw file table_name = "siniestros" file = read_path / (TABLES[table_name] + READ_FILES_FORMAT) usecols = set(TABLE_COLS[table_name]) extra_cols = [ 'FECHA_SINI', 'FEC_DENU_SINI', 'FECHA_NAC_ASEG', 'FECHA_NAC_TERC', "FECHA_FORMAL", "NUM_SECU_EXPED", "NUM_SECU_POL", "TIPO_EXPED" ] usecols.update(extra_cols) df = read_csv(file, usecols=usecols) df.drop_duplicates(subset=["NUM_SECU_EXPED"], keep="last", inplace=True) to_date = [ 'FECHA_SINI', 'FEC_DENU_SINI', 'FECHA_NAC_ASEG', 'FECHA_NAC_TERC', "FECHA_FORMAL" ] date_format = "%d/%m/%Y" for col in to_date: df[col] = pd.to_datetime(df[col], format=date_format, errors="coerce") # df["TIPO_EXPED"] = df["TIPO_EXPED"].to_string() df["TIPO_EXPED"] = df["TIPO_EXPED"].astype(str) # making new variables if "MCA_COASEG" in df.columns: df["MCA_COASEG"] = df["MCA_COASEG"] == "SI" df["dist_fformal_fsini"] = (df["FECHA_FORMAL"] - df["FECHA_SINI"]).dt.days df["dist_fformal_fdenu"] = (df["FECHA_FORMAL"] - df["FEC_DENU_SINI"]).dt.days df["dias_entre_denu_y_sini"] = (df["FEC_DENU_SINI"] - df["FECHA_SINI"]).dt.days df["edad_aseg"] = df["FECHA_SINI"].dt.year - df['FECHA_NAC_ASEG'].dt.year df["edad_terc"] = df["FECHA_SINI"].dt.year - df['FECHA_NAC_TERC'].dt.year df["existe_FECHA_FORMAL"] = df["FECHA_FORMAL"].notna() # adding list to vars to drop after finishing the transformation to_drop = [ 'FEC_DENU_SINI', 'FECHA_NAC_ASEG', 'FECHA_NAC_TERC', "FECHA_FORMAL" ] df.drop(columns=to_drop, inplace=True) # fixing export error to_drop = [] for v in df["NUM_SECU_POL"].unique(): try: int(v) except Exception as e: print(f"{e}: {v}") to_drop.append(v) to_drop df = df[~df["NUM_SECU_POL"].isin(to_drop)] # df["NUM_SECU_POL"] = df["NUM_SECU_POL"].to_string() # df["NUM_SECU_EXPED"] = df["NUM_SECU_EXPED"].to_string() # this explote # obj_cols = df.select_dtypes("object").columns # df[obj_cols] = df[obj_cols].to_string() # for col in df.select_dtypes("object").columns: # df[col] = df[col].to_string() # tmp save file tmp_file = Path(TMP_FOLDER) df.reset_index(drop=True).to_feather(tmp_file / (TABLES[table_name] + ".feather"))
def create_tb_cif(): # path to raw file table_name = "cif" file = read_path / (TABLES[table_name] + READ_FILES_FORMAT) df = read_csv(file, usecols=TABLE_COLS[table_name]) # transform dataset df.rename(columns={"ID": "CIF_ID"}, inplace=True) # tmp save file tmp_file = Path(TMP_FOLDER) df.reset_index(drop=True).to_feather(tmp_file / (TABLES[table_name] + ".feather"))
def control_file_structure(): """Check if all files have the ID column and all other required columns.""" print("Controlling files structure ", end="") # loading files dfs = {} for k, v in TABLES.items(): file = Path(READ_FOLDER) / (v + READ_FILES_FORMAT) dfs[k] = read_csv(file, nrows=SAMPLE_SIZE) # controlling files control_id_column(dfs) print(".", end="") # control_merging_compatibility(dfs) print(".", end="") print(" OK")
def create_asegurados(): # path to raw file table_name = "asegurados" file = read_path / (TABLES[table_name] + READ_FILES_FORMAT) df = read_csv(file, usecols=TABLE_COLS[table_name]) # transform dataset df.drop_duplicates(subset=["CIF_ID"], keep="last", inplace=True) to_date = ["FECHA_DESDE", "FECHA_NACIMIENTO"] date_format = "%d/%m/%Y" for col in to_date: df[col] = pd.to_datetime(df[col], format=date_format, errors="coerce") # replace values df["TIPO_ACTIVIDAD"].replace("SinDato", np.nan, inplace=True) # tmp save file tmp_file = Path(TMP_FOLDER) df.reset_index(drop=True).to_feather(tmp_file / (TABLES[table_name] + ".feather"))
def search_pattern(self): """ Searches through csv file and finds if regex pattern matches is in the row and gives it back """ if helpers.read_csv() != "empty": string = "" with open("log.csv") as log: spamreader = csv.reader(log, delimiter=',', quotechar='|') for row in spamreader: string += ', '.join(row) pattern = input("Type in your Regular Expressions ") string_pattern = r"{}".format(pattern) pattern1 = re.compile(pattern) search = re.findall(string_pattern, string) counter = 1 for value in helpers.my_dict.keys(): if pattern1.match(helpers.my_dict[value][0]) or pattern1.match( helpers.my_dict[value][3]): print("Title : {}".format(helpers.my_dict[value][0])) print("Date : {}".format(helpers.my_dict[value][1])) print("Minuest Spent : {}".format( helpers.my_dict[value][2])) print("Notes :{} ".format(helpers.my_dict[value][3])) print("{} out of {}".format(counter, helpers.find_counts(pattern1))) ask = input("[N]ext [R]eturn menu ") clear() if ask.lower() == "n": counter += 1 continue elif ask.lower() == "r": break else: input(" You have no entries press return to go back to menu") clear() else: input("press return ") clear()
def create_preguntas(): # path to raw file table_name = "preguntas" file = read_path / (TABLES[table_name] + READ_FILES_FORMAT) df = read_csv(file, usecols=TABLE_COLS[table_name]) # creating pivot id_col = "NUM_SECU_EXPED" condition = "COD_PREGUNTA" value_col = "VALOR_PREGUNTA" # test if all values are numbers, if not change by null # df[value_col] = pd.to_numeric(df[value_col], errors="coerce") # getting oldest repeated record pv_df = df.pivot_table(index=id_col, columns=condition, values=value_col, aggfunc="last") #renaming columns pv_df = pv_df.add_prefix("preg_") # tmp save file tmp_file = Path(TMP_FOLDER) pv_df.reset_index().to_feather(tmp_file / (TABLES[table_name] + ".feather"))
def sheet(): return render_template("sheet.html", tasks=read_csv())
from helpers import get_config, parse_input_args, read_csv, write_row_csv, google_search, proxy_builder args = parse_input_args() config = get_config(args) proxy_csv = config.get('PROXY_SCRAPPER', 'csv_proxies') proxies = read_csv(proxy_csv) proxies_count = range(0, len(proxies['ip'])) for i in proxies_count: is_valid = True try: response = google_search('test', proxy_builder(proxies, i)) if i % 10: print('Proccessed: %s/%s\n' % (str(i), str(proxies_count))) if response.status_code != 200: raise Exception('Not valid proxy') except Exception as e: is_valid = False write_row_csv(proxy_csv, i, 'valid', str(is_valid)) i += 1 print('Proccessed: %s/%s\n' % (str(proxies_count), str(proxies_count))) print('Verification finished\n')
from decimal import * from finance import Bank, Facility, Covenant, Loan from helpers import read_csv, write_csv # Read the csv file from banks.csv and generate bank dictionary banks_rows = read_csv('banks.csv') banks = {} for row in banks_rows: bank_id = int(row[0]) bank_name = row[1] bank = Bank(bank_id, bank_name) banks[bank.bank_id] = bank # Read the csv file from facilities.csv and assign facilities to banks facilities_rows = read_csv('facilities.csv') for row in facilities_rows: facility_id = int(row[2]) bank_id = int(row[3]) interest_rate = Decimal(row[1]) amount = Decimal(row[0]) facility = Facility(facility_id, bank_id, interest_rate, amount) banks[facility.bank_id].facilities[facility_id] = facility # Read the csv file from covenants.csv and assign covenants to banks or facilities covenants_rows = read_csv('covenants.csv') for row in covenants_rows: bank_id = int(row[2]) if row[0] is not None and len(row[0]) > 0: