def create_template_csv_from_serie(serie, list_name): df = pd.DataFrame(columns=["Page or Account URL", "List"]) df["Page or Account URL"] = serie.index df["List"] = list_name export_data(df, 'crowdtangle_list', list_name + '.csv') return df
def _exec_crf_test(input_text): with tempfile.NamedTemporaryFile(mode='w') as input_file: input_file.write(utils.export_data(input_text)) input_file.flush() return check_output( ['crf_test', '--verbose=1', '--model', 'ingredient-parser.crfmodel', input_file.name])
def last_export(): raw_data_file = utils.get_latest_export() if raw_data_file is None: raw_data_file = utils.export_data(db) no_ext = raw_data_file.split(".")[0] date = no_ext.split("_")[1] return date
def admin_index(): if len(request.args) > 0 and int(request.args.get("export")) == 1: # re-export data from database to CSV file data_fn = utils.export_data(db) data_path = url_for("static", filename="data/" + data_fn) if data_path.startswith("/"): data_path = data_path[1:] data_path = request.url_root + data_path return render_template("admin/index.html", exported_data=data_path) else: return render_template("admin/index.html")
def parse_ingredientForCRF(ingredients): returnArr = [] eachIngre = [] _, tmpFile = tempfile.mkstemp() _, tmpFile2 = tempfile.mkstemp() with open(tmpFile, 'w') as outfile: outfile.write(utils.export_data(ingredients)) outfile.close() tmpFilePath = "./tmp/model_file" modelFilename = os.path.join(os.path.dirname(__file__), tmpFilePath) aa = "crf_test -m %s %s " % (modelFilename, tmpFile) bb = " > %s " % (tmpFile2) cc = " || exit 1 " fullCommand = aa + bb + cc os.system(fullCommand) os.system("rm %s" % tmpFile) f = open(tmpFile2) parsedData = f.read() f.close() os.system("rm %s" % tmpFile) os.system("rm %s" % tmpFile2) data = parsedData.split('\n') for sen in xrange(len(data)): words = data[sen].split('\t') name = words[0] name = name.translate(None, string.punctuation).lower() val = words[len(words) - 1] val2 = val.split('-') if val2[len(val2) - 1] != '' and name != '': obj = (name, val2[len(val2) - 1]) eachIngre.append(obj) else: if len(eachIngre) > 0: returnArr.append(eachIngre) eachIngre = [] return returnArr
url_df = url_df.dropna(subset=['scientific_topic']) return url_df def keep_only_topic_data(url_df, TOPIC): if TOPIC in ["climate", "health", "covid"]: return url_df[url_df["scientific_topic"] == TOPIC] else: return url_df if __name__ == "__main__": DATE = sys.argv[1] TOPIC = sys.argv[2] if len(sys.argv) >= 3 else "" url_df = import_data(folder="sciencefeedback", file_name="Appearances-Grid view " + DATE + ".csv") url_df = keep_only_the_urls_considered_fake_by_facebook(url_df) url_df = clean_url_format(url_df) url_df = add_info_from_fact_check_table(url_df) url_df = keep_only_topic_data(url_df, TOPIC) url_df = url_df[[ 'url', 'url_cleaned', 'domain_name', 'Item reviewed', 'Date of publication', 'scientific_topic' ]] print("There are {} fake news urls.".format(len(url_df))) export_data(url_df, 'sciencefeedback', "appearances_" + DATE + "_" + TOPIC + ".csv")
#!/usr/bin/env python from __future__ import print_function import os import sys import tempfile import utils if len(sys.argv) < 2: sys.stderr.write('Usage: parse-ingredients.py FILENAME') sys.exit(1) FILENAME = str(sys.argv[1]) _, tmpFile = tempfile.mkstemp() with open(FILENAME) as infile, open(tmpFile, 'w') as outfile: outfile.write(utils.export_data(infile.readlines())) tmpFilePath = "/tmp/model_file" modelFilename = os.path.join(os.path.dirname(__file__), tmpFilePath) os.system("crf_test -v 1 -m %s %s" % (modelFilename, tmpFile)) os.system("rm %s" % tmpFile)
clean_df = pd.DataFrame(columns=[ "account_name", "account_id", "date", "share", "comment", "reaction" ]) clean_df['account_name'] = df['account_name'].astype(str) clean_df['account_id'] = df['account_id'].astype(int) clean_df['date'] = pd.to_datetime(df['date']) clean_df["share"] = df[["actual_share_count"]].astype(int) clean_df["comment"] = df[["actual_comment_count"]].astype(int) clean_df["reaction"] = df[[ "actual_like_count", "actual_favorite_count", "actual_love_count", "actual_wow_count", "actual_haha_count", "actual_sad_count", "actual_angry_count", "actual_thankful_count" ]].sum(axis=1).astype(int) return clean_df if __name__ == "__main__": DATE = sys.argv[1] SUFFIX = sys.argv[2] df = import_data(folder="crowdtangle_group", file_name='posts_group_' + DATE + '.csv') clean_df = clean_columns(df) export_data(clean_df, 'crowdtangle_group', 'posts_' + SUFFIX + '.csv')
def index(): raw_data_file = utils.get_latest_export() if raw_data_file is None: raw_data_file = utils.export_data(db) raw_data_file = "data/" + raw_data_file return render_template("index.html", raw_data=raw_data_file)
def main(): def age_category(age): if 18 <= age < 25: return '[18, 25)' elif 25 <= age < 35: return '[25, 35)' elif 35 <= age < 55: return '[35, 55)' elif age != age: return 'None' else: return '[55, inf)' def income_category(income): if isinstance(income, str) and not len(income): income = float('nan') else: income = float(income) if income < 40000: return '[0-40000)' elif 40000 <= income < 100000: return '[40000, 100000)' elif 100000 <= income < 200000: return '[100000, 200000)' elif 200000 <= income < 500000: return '[200000, 500000)' elif income != income: return 'None' else: return '[500000, inf)' def age_of_car_category(age_of_car): if isinstance(age_of_car, str) and not len(age_of_car): age_of_car = float('nan') else: age_of_car = float(age_of_car) if age_of_car < 1: return '[0, 1)' elif 1 <= age_of_car < 3: return '[1, 3)' elif age_of_car != age_of_car: return 'None' else: return '[3, inf)' def score_client(client): def age_score(age): if age == '[18, 25)': return 10 elif age == '[25, 35)': return 20 elif age == '[35, 55)': return 25 elif age == 'None': return 0 elif age == '[55, inf)': return 15 def family_score(family): if family == 1: return 10 elif family == 2: return 15 elif family == 3: return 25 elif family == 4: return 20 else: return 0 def income_score(income): if income == '[0-40000)': return 5 elif income == '[40000, 100000)': return 10 elif income == '[100000, 200000)': return 15 elif income == '[200000, 500000)': return 20 elif income == 'None': return 0 elif income == '[500000, inf)': return 25 def house_ownership_score(house_ownership): if house_ownership: return 25 else: return 0 def age_of_car_score(age_of_car): if age_of_car == '[0, 1)': return 25 elif age_of_car == '[1, 3)': return 15 elif age_of_car == 'None': return 0 elif age_of_car == '[3, inf)': return 5 def employed_by_score(employed_by): employed_by_score_dict = { "Business Entity Type 3": 25, "Business Entity Type 2": 25, "Government": 20, "Military": 5, "Security Ministries": 15, "Emergency": 5, "Security": 5, "Construction": 5, "Electricity": 5, "XNA": 0, "Other": 0, } return employed_by_score_dict.get(employed_by, 10) def education_score(education): if education == "Higher education": return 25 elif education == "Secondary / secondary special": return 10 elif education == "Incomplete higher": return 20 else: return 0 def marital_status_score(marital_status): if marital_status == "Married": return 25 elif marital_status == "Single / not married": return 10 elif marital_status == "Civil marriage": return 15 elif marital_status == "Widow": return 5 elif marital_status == "Separated": return 5 else: return 0 def position_score(position): if position == "Core staff": return 25 elif position == "Accountants": return 25 elif position == "Managers": return 25 elif position == "Security staff": return 10 elif position == "<undefined>": return 0 else: return 15 def income_type_score(income_type): if income_type == "State servant": return 15 if income_type == "Working": return 10 if income_type == "Commercial associate": return 25 if income_type == "Pensioner": return 5 else: return 0 def housing_score(housing): if housing == "House / apartment": return 25 elif housing == "Rented apartment": return 15 elif housing == "With parents": return 10 else: return 0 score_dict = dict() score_dict['age'] = age_score(client.age) score_dict['family'] = family_score(client.family) score_dict['income'] = income_score(client.income) score_dict['house_ownership'] = house_ownership_score( client.house_ownership) score_dict['age_of_car'] = age_of_car_score(client.age_of_car) score_dict['employed_by'] = employed_by_score(client.employed_by) score_dict['education'] = education_score(client.education) score_dict['marital_status'] = marital_status_score( client.marital_status) score_dict['position'] = position_score(client.position) score_dict['income_type'] = income_type_score(client.income_type) score_dict['housing'] = housing_score(client.housing) return score_dict db_connection = DBhandler(db_path=settings.DATABASE_PATH) profile_data = pd.read_sql_query( "SELECT * FROM %s" % settings.PROFILE_TABLE, db_connection.connection) profile_data.birth = profile_data.birth.apply(parse_date) profile_data['age'] = profile_data.birth.apply( lambda x: (datetime.date.today() - x).days // 365) used_columns = [ 'employed_by', 'education', 'marital_status', 'position', 'income_type', 'housing' ] for column in used_columns: profile_data[column] = convert_data_from_categorical( profile_data[column].values, column) profile_data['age'] = profile_data.age.apply(age_category) profile_data['income'] = profile_data.income.apply(income_category) profile_data['age_of_car'] = profile_data.age_of_car.apply( age_of_car_category) score_table = pd.DataFrame([], columns=[ 'id', 'age', 'family', 'income', 'house_ownership', 'age_of_car', 'employed_by', 'education', 'marital_status', 'position', 'income_type', 'housing', ]) curr_pos = 0 for i in range(len(profile_data)): client = profile_data.iloc[i] score_dict = score_client(client) score_table.loc[curr_pos] = [ client.id, score_dict['age'], score_dict['family'], score_dict['income'], score_dict['house_ownership'], score_dict['age_of_car'], score_dict['employed_by'], score_dict['education'], score_dict['marital_status'], score_dict['position'], score_dict['income_type'], score_dict['housing'], ] curr_pos = curr_pos + 1 print(score_table) db_connection.execute_sql("DROP TABLE IF EXISTS %s" % settings.CLIENT_SCORES_TABLE) score_table.set_index('id', drop=True).to_sql(settings.CLIENT_SCORES_TABLE, db_connection.connection) export_data("scores table", "clients_scores.csv", score_table.to_csv) print("Task 3 done!")
def main(): db_connection = DBhandler(db_path=settings.DATABASE_PATH) clients_scores = pd.read_sql_query("SELECT * FROM %s" % settings.CLIENT_SCORES_TABLE, db_connection.connection) score_weights = { "age": 0.4919, "age_of_car": 0.3890, "education": 0.0389, "employed_by": 1.0320, "family": 0.3844, "house_ownership": 0.0023, "housing": 0.1372, "income": 0.4485, "income_type": 0.2288, "marital_status": 0.1281, "position": 0.7185, } clients_scores['total_score'] = 0 for ind in clients_scores.index: row = clients_scores.loc[ind] temp_scores = dict() for column_name in score_weights.keys(): temp_scores[column_name] = row[column_name] * score_weights[column_name] clients_scores.loc[ind, 'total_score'] = sum(temp_scores.values()) print(clients_scores[['id', 'total_score']]) db_connection.execute_sql("DROP TABLE IF EXISTS %s" % settings.TOTAL_SCORES_TABLE) clients_scores[['id', 'total_score']].set_index('id', drop=True).to_sql(settings.TOTAL_SCORES_TABLE, db_connection.connection) export_data("client scores table", "total_scores.csv", clients_scores[['id', 'total_score']].to_csv) # risk horizon payments_df = pd.read_sql_query("SELECT * FROM %s" % settings.PAYMENT_TABLE, db_connection.connection) payments_df.payment_date = payments_df.payment_date.apply(parse_date) payments_df['overdue_days'] = -1 for contract in payments_df.contract_id.unique(): temp_df = payments_df[payments_df.contract_id == contract].sort_values(['payment_date']) index_arr = temp_df.index.tolist() for i in range(1, len(index_arr)): prev_ind = index_arr[i - 1] ind = index_arr[i] if payments_df.loc[ind, 'amount_paid'] < payments_df.loc[ind, 'amount_due']: if payments_df.loc[prev_ind, 'overdue_days'] == -1: payments_df.loc[ind, 'overdue_days'] = 0 else: new_ov_days = 30 payments_df.loc[ind, 'overdue_days'] = payments_df.loc[prev_ind, 'overdue_days'] + new_ov_days elif payments_df.loc[ind, 'amount_paid'] > payments_df.loc[ind, 'amount_due']: months_covered = int(round(payments_df.loc[ind, 'amount_paid'] / payments_df.loc[ind, 'amount_due'], 0)) months_covered = months_covered - 1 payments_df.loc[ind, 'overdue_days'] = payments_df.loc[prev_ind, 'overdue_days'] - months_covered * 30 if payments_df.loc[ind, 'overdue_days'] < 0: payments_df.loc[ind, 'overdue_days'] = -1 # Если погасил весь долг - нет просрочки default_dates = payments_df[payments_df.overdue_days == 90].groupby(['contract_id'], as_index=False).agg({"payment_date": "max"}) contracts_df = pd.read_sql_query("SELECT * FROM %s" % settings.CONTRACT_TABLE, db_connection.connection, index_col='contract_id') contracts_df.contract_date = contracts_df.contract_date.apply(parse_date) payments_df['contract_date'] = np.nan payments_df['id_number'] = 0 for ind in payments_df.index: row = payments_df.loc[ind] payments_df.loc[ind, 'contract_date'] = contracts_df.loc[row.contract_id].contract_date payments_df.loc[ind, 'id_number'] = contracts_df.loc[row.contract_id].id payments_df['age'] = 0 for contract in payments_df.contract_id.unique(): temp_df = payments_df[payments_df.contract_id == contract].sort_values(['payment_date']) index_arr = temp_df.index.tolist() contract_date = temp_df.contract_date.iloc[0] # Берем первую дату договора из таблицы, т.к. они все одинаковые for i in range(0, len(index_arr)): ind = index_arr[i] curr_date = payments_df.loc[ind, 'payment_date'] curr_age = (curr_date.year - contract_date.year) * 12 + (curr_date.month - contract_date.month) payments_df.loc[ind, 'age'] = curr_age period_months = int(input("Please enter risk horizon (in months): ")) out_df = payments_df[payments_df.age == period_months][['contract_id', 'payment_date', 'contract_date', 'id_number', 'age']].reset_index(drop=True) # Берем только нужные поля и "сбрасываем" индекс (начинаем с 0) out_df['Default?'] = False for contract in out_df.contract_id.unique(): if contract in default_dates.contract_id.values: default_date = default_dates[default_dates.contract_id == contract].payment_date.iloc[0] current_date = out_df.loc[out_df.contract_id == contract, "payment_date"].iloc[0] if current_date >= default_date: out_df.loc[(out_df.contract_id == contract), "Default?"] = True out_df_gb = out_df.groupby(['id_number'], as_index=False).agg({"age": "max", "Default?": "max"}) out_df_gb = out_df_gb[out_df_gb.id_number.isin(clients_scores.id.unique())] out_df_gb.sort_values(['id_number'], inplace=True) out_df_gb['score'] = out_df_gb.id_number.apply(lambda x: clients_scores.set_index(['id']).loc[x].total_score) scores_arr = sorted(out_df_gb.score.unique()) x_plot = [] y_plot = [] y_true = out_df_gb['Default?'].astype(int).values for threshold in ([0.0] + scores_arr + [100.0]): y_pred = (out_df_gb.score <= threshold).values.astype(int) conf_mx = confusion_matrix(y_true, y_pred) tpr = conf_mx[1, 1] / conf_mx[1].sum() fpr = conf_mx[0, 1] / conf_mx[0].sum() x_plot.append(fpr) y_plot.append(tpr) roc_curve = plt.figure() plt.plot(x_plot, y_plot, label='ROC curve (area = %0.2f)' % auc(x_plot, y_plot)) plt.plot([0, 1], [0, 1], '--r') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC curve') plt.legend(loc="lower right") plt.ion() plt.show() plt.pause(0.01) input("Press [enter] to continue...") export_data("roc plot", "roc_plot.png", roc_curve.savefig) print("Task 4 done!")
def main(): db_connection = DBhandler(db_path=settings.DATABASE_PATH) payments_df = pd.read_sql_query( "SELECT * FROM %s" % settings.PAYMENT_TABLE, db_connection.connection) payments_df.payment_date = payments_df.payment_date.apply(parse_date) s_dates = payments_df.groupby(['contract_id'], as_index=False).agg({'payment_date': 'min'}) s_dates['amount_due'] = np.nan s_dates['amount_paid'] = np.nan for ind in s_dates.index: row = s_dates.loc[ind] t_df = payments_df[(payments_df.contract_id == row.contract_id) & (payments_df.payment_date == row.payment_date)] s_dates.loc[ind, 'amount_due'] = t_df.amount_due.iloc[0] s_dates.loc[ind, 'amount_paid'] = t_df.amount_paid.iloc[0] if np.any(s_dates.amount_paid - s_dates.amount_due < 0): mb_zero_default = s_dates[s_dates.amount_paid - s_dates.amount_due < 0].contract_id.unique() print("There may be overdues in the first period! Check contracts:", mb_zero_default) # Заполняем количество дней просрочки payments_df[ 'overdue_days'] = -1 # Ставим -1, там, где не было просрочки (потом уберем), а 0 будем ставить там, где должен был быть платеж, но не произошёл for contract in payments_df.contract_id.unique(): temp_df = payments_df[payments_df.contract_id == contract].sort_values( ['payment_date']) # Получаем массив индексов, т.к. индексация в "частях" pandas.DataFrame остаётся как и в исходном index_arr = temp_df.index.tolist() for i in range( 1, len(index_arr) ): # Берем индексы с 1, т.к. в 0 всегда просрочка 0 (первый платеж) prev_ind = index_arr[i - 1] ind = index_arr[i] if payments_df.loc[ind, 'amount_paid'] < payments_df.loc[ ind, 'amount_due']: # Платеж не поступил или был меньше if payments_df.loc[prev_ind, 'overdue_days'] == -1: # Не было просрочки payments_df.loc[ ind, 'overdue_days'] = 0 # С текущей даты пошла просрочка, но пока что 0 else: # Для простоты будем просто добавлять 30, т.к. платежи ровно через 1 мес. new_ov_days = 30 payments_df.loc[ind, 'overdue_days'] = payments_df.loc[ prev_ind, 'overdue_days'] + new_ov_days # Складываем прошлую просрочку с текущей elif payments_df.loc[ind, 'amount_paid'] > payments_df.loc[ ind, 'amount_due']: months_covered = int( round( payments_df.loc[ind, 'amount_paid'] / payments_df.loc[ind, 'amount_due'], 0)) # Считаем, за сколько месяцев оплатил months_covered = months_covered - 1 # Вычитаем 1 месяца из погашения, т.к. мы не прибавили 30 дней за текущий месяц # Вычитаем из просрочки количество "погашенных" просроченных платежей # В случае более сложного начисления (с процентами, неравномерные платежи) надо использовать логику сложнее payments_df.loc[ind, 'overdue_days'] = payments_df.loc[ prev_ind, 'overdue_days'] - months_covered * 30 if payments_df.loc[ind, 'overdue_days'] < 0: payments_df.loc[ ind, 'overdue_days'] = -1 # Если погасил весь долг - нет просрочки # Берем макс. дату выхода в просрочку 90+, т.к. вдруг клиент гасил просрочку 90 и возрашался в 60 default_dates = payments_df[payments_df.overdue_days == 90].groupby( ['contract_id'], as_index=False).agg({"payment_date": "max"}) print( default_dates.rename( { "contract_id": "Contract Number", "payment_date": "Default Date" }, axis=1)) contracts_df = pd.read_sql_query("SELECT * FROM %s" % settings.CONTRACT_TABLE, db_connection.connection, index_col='contract_id') contracts_df.contract_date = contracts_df.contract_date.apply(parse_date) # Определяем дату договора и заёмщика payments_df['contract_date'] = np.nan payments_df['id_number'] = 0 for ind in payments_df.index: row = payments_df.loc[ind] payments_df.loc[ind, 'contract_date'] = contracts_df.loc[ row.contract_id].contract_date payments_df.loc[ind, 'id_number'] = contracts_df.loc[row.contract_id].id # Определяем "возраст" договора в месяцах на каждую дату payments_df['age'] = 0 for contract in payments_df.contract_id.unique(): temp_df = payments_df[payments_df.contract_id == contract].sort_values( ['payment_date']) index_arr = temp_df.index.tolist() contract_date = temp_df.contract_date.iloc[ 0] # Берем первую дату договора из таблицы, т.к. они все одинаковые for i in range(0, len(index_arr)): ind = index_arr[i] curr_date = payments_df.loc[ind, 'payment_date'] curr_age = (curr_date.year - contract_date.year) * 12 + ( curr_date.month - contract_date.month) payments_df.loc[ind, 'age'] = curr_age period_months = int(input("Please enter risk horizon (in months): ")) out_df = payments_df[payments_df.age == period_months][[ 'contract_id', 'payment_date', 'contract_date', 'id_number', 'age' ]].reset_index( drop=True ) # Берем только нужные поля и "сбрасываем" индекс (начинаем с 0) out_df['Default?'] = False for contract in out_df.contract_id.unique(): if contract in default_dates.contract_id.values: default_date = default_dates[default_dates.contract_id == contract].payment_date.iloc[0] current_date = out_df.loc[out_df.contract_id == contract, "payment_date"].iloc[0] if current_date >= default_date: out_df.loc[(out_df.contract_id == contract), "Default?"] = True out_df_gb = out_df.groupby(['id_number'], as_index=False).agg({ "age": "max", "Default?": "max" }).rename({"age": "Age (months)"}, axis=1) print(out_df_gb) # Выбираем нужные данные о клиентах applications_df = pd.read_sql_query(""" SELECT * FROM %s """ % settings.PROFILE_TABLE, db_connection.connection, index_col='id') # Убираем ненужные колонки applications_df.drop(["issue_date"], axis=1, inplace=True) applications_df.birth = applications_df.birth.apply(parse_date) # Убираем клиента, по которому мало данных if 100076 in applications_df.index: applications_df.drop([100076], inplace=True) # Заполняем пустые поля applications_df.fillna(0.0, inplace=True) applications_df['default'] = 0 applications_df.loc[applications_df.index.isin(out_df_gb[ out_df_gb['Default?']].id_number.unique()), "default"] = 1 applications_df['age'] = ( datetime.date.today() - applications_df.birth).apply(lambda x: x.days // 365) applications_df.drop(['birth'], axis=1, inplace=True) applications_df.income = pd.qcut(applications_df.income, 5) def get_age_of_car_category(age): if age == 0: return '0' elif age <= 3: return '<=3' else: return '>3' applications_df.age_of_car = applications_df.age_of_car.apply( get_age_of_car_category) applications_df.age = pd.qcut(applications_df.age, 5) quantilized_columns = ['income', 'age', 'age_of_car'] categorical_columns = [ 'gender', 'employed_by', 'education', 'marital_status', 'position', 'income_type', 'housing', 'house_ownership', 'children', 'family', ] choices = dict( zip(range(len(categorical_columns + quantilized_columns)), sorted(categorical_columns + quantilized_columns))) for i in choices.keys(): print("%d: %s" % (i, choices[i])) column_choice = choices.get( int(input("Please choose column (default=0): ")), 0) temp_df = applications_df[[column_choice, 'default']].copy() temp_df['event'] = temp_df.default temp_df['non_event'] = 1 - temp_df.default df_gb = temp_df.groupby([column_choice]).agg({ "default": "sum", "event": lambda x: x.sum() / temp_df.event.sum(), "non_event": lambda x: x.sum() / temp_df.non_event.sum() }) df_gb['woe'] = np.log(np.clip(df_gb.event / df_gb.non_event, 0.001, np.inf)) df_gb['IV'] = (df_gb.event - df_gb.non_event) * df_gb.woe df_gb.index = df_gb.index.astype(str) woe_fig = plt.figure(figsize=(12, 7)) plt.scatter(list(range(df_gb.shape[0])), df_gb.woe) plt.grid() plt.xticks(list(range(df_gb.shape[0])), df_gb.index) plt.ylabel("WOE", fontsize=14) plt.xlabel(column_choice, fontsize=14) plt.title("WOE plot", fontsize=16) plt.ion() plt.show() iv_fig = plt.figure(figsize=(12, 7)) plt.scatter(list(range(df_gb.shape[0])), df_gb.IV) plt.grid() plt.xticks(list(range(df_gb.shape[0])), df_gb.index) plt.ylabel("IV", fontsize=14) plt.xlabel(column_choice, fontsize=14) plt.title("Information Value plot", fontsize=16) plt.ion() plt.show() df_gb.loc['SUM'] = df_gb.sum() print(df_gb) plt.pause(0.001) input("Press [enter] to continue...") export_data("woe plot", "woe_plot.png", woe_fig.savefig) export_data("iv plot", "iv_plot.png", iv_fig.savefig) export_data("woe iv table", "woe_iv_table.csv", df_gb.to_csv) export_data("defaults on horizon table", "defaults_horizon.csv", out_df_gb.to_csv) print("Task 2 done!")