def no_transaction_vs_harmonic(): merchant_data_path = os.path.join(DATASET_DIR, "MerchantSumAmountPerDay.txt") data_reader = MerchantData(merchant_data_path) X = data_reader.get_clean_data() # Data Selection harmonic_amount = X[:, 0] # Recency no_transaction = X[:, 1] # Frequency # Plotting title = "Trans vs Harmonic" labels = ("No Transaction", "Harmonic Sum") plotlyvisualize.scatter(no_transaction, harmonic_amount, title, labels, out_path=PLOT_OUT_DIR)
def no_transactions_vs_sum_amounts(): merchant_data_path = os.path.join(DATASET_DIR, "MerchantSumAmountPerDay.txt") data_reader = MerchantData(merchant_data_path) X = data_reader.get_clean_data() # Data Selection no_transaction = X[:, 1] # Frequency sum_amounts = X[:, 2] # Money # Plotting title = "No Transactions vs Sum Amounts" labels = ("No Transaction", "Sum Amounts") no_transaction = np.log(no_transaction) #sum_amounts = np.log(sum_amounts)/np.log(1.5) plotlyvisualize.scatter(no_transaction, sum_amounts, title, labels, out_path=PLOT_OUT_DIR)
import numpy as np import matplotlib.pyplot as plt from scipy.stats import norm from sklearn.neighbors import KernelDensity import os from readdata.merchantdata import MerchantData DATASET_DIR = "dataset" PLOT_OUT_DIR = "plotsout" merchant_data_path = os.path.join(DATASET_DIR, "MerchantSumAmountPerDay.txt") data_reader = MerchantData(merchant_data_path) X = data_reader.get_clean_data() # Data Selection no_transaction = X[:, 1] # Frequency sum_amounts = X[:, 2] # Money # Plot a 1D density example N = 100 np.random.seed(1) N = no_transaction.shape[0] X = no_transaction[:, np.newaxis] #np.random.normal(0, 1, 0.3 * N)[:, np.newaxis] X_plot = np.linspace(np.min(X), np.max(X), 1000)[:, np.newaxis] fig, ax = plt.subplots() for kernel in ['gaussian', 'tophat', 'epanechnikov']: kde = KernelDensity(kernel=kernel, bandwidth=0.5).fit(X) log_dens = kde.score_samples(X_plot)