def test_read_csv(self):
     from utils.file_utils import read_csv
     import numpy
     m = read_csv(data_file_path)
     m2 = numpy.asarray(m)
     self.assertGreater(len(m), 0)
     self.assertIsNotNone(m2)
 def test_normal_matrix(self):
     from utils.file_utils import read_csv
     from ex1_linear_regression.normal import norm_matrix
     import numpy
     m = read_csv("resource/ex1data2.txt")
     m2 = numpy.asarray(m)
     normed_matrix = norm_matrix(m2)
     self.assertIsNotNone(normed_matrix)
 def test_normal_eq_2(self):
     from utils.file_utils import read_csv
     from ex1_linear_regression.normalEqn import normal_eq_n
     import numpy
     m = read_csv("resource/ex1data2.txt")
     m2 = numpy.asarray(m)
     ret = normal_eq_n(m2)
     self.assertAlmostEqual(ret[0], 89597, delta=1)
     self.assertAlmostEqual(ret[1], 139, delta=1)
     self.assertAlmostEqual(ret[2], -8738, delta=1)
 def test_normal_eq_1(self):
     from utils.file_utils import read_csv
     from ex1_linear_regression.normalEqn import normal_eq_n
     import numpy
     m = read_csv("resource/ex1data1.txt")
     m2 = numpy.asarray(m)
     ret = normal_eq_n(m2)
     alpha = 0.1
     self.assertAlmostEqual(ret[0], -3.84, delta=alpha)
     self.assertAlmostEqual(ret[1], 1.18, delta=alpha)
 def test_gcd_multi_vars_1(self):
     from ex1_linear_regression.file_utils import read_csv
     import numpy
     m = read_csv("resource/ex1data1.txt")
     m2 = numpy.asarray(m)
     self.assertIsNotNone(m2)
     from ex1_linear_regression.multi_vars import gcd_m
     alpha = 0.01
     ret = gcd_m(m2, alpha)
     self.assertAlmostEqual(ret[0], -3.84, delta=alpha)
     self.assertAlmostEqual(ret[1], 1.18, delta=alpha)
 def test_gcd_multi_vars_2(self):
     from utils.file_utils import read_csv
     from ex1_linear_regression.normal import norm_matrix
     import numpy
     m = read_csv("resource/ex1data2.txt")
     m2 = numpy.asarray(m)
     normed_matrix = norm_matrix(m2)
     self.assertIsNotNone(normed_matrix)
     from ex1_linear_regression.multi_vars import gcd_m
     ret = gcd_m(normed_matrix, 0.001)
     self.assertAlmostEqual(ret[0], 89597, delta=1)
     self.assertAlmostEqual(ret[1], 139, delta=1)
     self.assertAlmostEqual(ret[2], -8738, delta=1)
Ejemplo n.º 7
0
# "TPP_INSURED_INDUSTRY_missing",
# "TPP_INSURED_INDUSTRY_high",
# "TPP_INSURED_INDUSTRY_medium",
# "TPP_INSURED_INDUSTRY_low",
# "TPP_INSURED_INDUSTRY_others",
# "TPP_INSURED_EDU_(A)Illiterate",
# "TPP_INSURED_EDU_(D)Others",
# "TPP_INSURED_EDU_(C)Grad & above",
# "TPP_INSURED_EDU_(B)Schooling", "TPP_INSURED_INCOME"]

ignore_col_list_lstm = ["POL_ID", "DATA_MONTH"]

print("Reading the data...")
ffn_train_data, ffn_train_label, _, _, _, _ = read_csv(
    ffn_train_path,
    split_ratio=split_ratio,
    header=True,
    ignore_cols=ignore_col_list_ffn,
    output_label="Lapse_Flag")
lstm_train_data, _, _, _, _, _ = read_csv(lstm_train_path,
                                          split_ratio=split_ratio,
                                          header=True,
                                          ignore_cols=ignore_col_list_lstm,
                                          output_label="Lapse_Flag")

print("ffn data")
print(ffn_train_data[0])
print(len(ffn_train_data[0]))
print(ffn_train_label[0])
print(len(ffn_train_label[0]))

print("lstm data")
Ejemplo n.º 8
0
trans_train_path = sys.argv[1]
# trans_test_path = "../data/trans_new_test.csv"
trans_test_path = sys.argv[2]

model_name = sys.argv[3]

learning_rate = 0.001
epochs = 100
batch_size = 512
display_count = 1000
split_ratio = [100, 0, 0]

print("Reading the data...")
trans_train_data, trans_train_label, _, _, _, _ = read_csv(
    trans_train_path,
    split_ratio=split_ratio,
    header=True,
    ignore_cols=["POL_ID", "DATA_MONTH", "TB_POL_BILL_MODE_CD", "MI"],
    output_label="Lapse_Flag")
trans_test_data, trans_test_label, _, _, _, _ = read_csv(
    trans_test_path,
    split_ratio=split_ratio,
    header=True,
    ignore_cols=["POL_ID", "DATA_MONTH", "TB_POL_BILL_MODE_CD", "MI"],
    output_label="Lapse_Flag")

print(trans_train_data[0])

print("Train Data Size - ", len(trans_train_data))
print("Test Data Size - ", len(trans_test_data))

print("Splitting the data...")
Ejemplo n.º 9
0
import os
import pandas as pd

# infer_path = "/Users/vivek/sample.csv"
infer_path = sys.argv[1]
model_name = sys.argv[2]
output_file = sys.argv[3]

batch_size = 512
display_count = 1000
split_ratio = [100, 0, 0]

print("Reading the data...")
inference_data, inference_label, _, _, _, _ = read_csv(
    infer_path,
    split_ratio=split_ratio,
    header=True,
    ignore_cols=["POL_ID", "DATA_MONTH"],
    output_label="Lapse_Flag")

print(inference_data[0])

print("Infer Data Size - ", len(inference_data))

print("Splitting the data...")
infer_y = divide_batches(inference_label, batch_size)

infer_batch_size = len(infer_y)

saved_model_dir = "../maxlife_models/"
if not os.path.isdir(saved_model_dir):
    os.mkdir(saved_model_dir)
Ejemplo n.º 10
0
#                    "TPP_INSURED_GENDER_MALE",
#                    "TPP_INSURED_GENDER_null",
# "TPP_INSURED_INDUSTRY_missing",
# "TPP_INSURED_INDUSTRY_high",
# "TPP_INSURED_INDUSTRY_medium",
# "TPP_INSURED_INDUSTRY_low",
# "TPP_INSURED_INDUSTRY_others",
# "TPP_INSURED_EDU_(A)Illiterate",
# "TPP_INSURED_EDU_(D)Others",
# "TPP_INSURED_EDU_(C)Grad & above",
# "TPP_INSURED_EDU_(B)Schooling", "TPP_INSURED_INCOME"]

print("Reading the data...")
trans_train_data, trans_train_label, _, _, _, _ = read_csv(
    trans_train_path,
    split_ratio=split_ratio,
    header=True,
    ignore_cols=ignore_col_list,
    output_label="Lapse_Flag")
trans_test_data, trans_test_label, _, _, _, _ = read_csv(
    trans_test_path,
    split_ratio=split_ratio,
    header=True,
    ignore_cols=ignore_col_list,
    output_label="Lapse_Flag")

print(trans_train_data[0])
print(trans_train_label[0])

pos_weight = len(trans_train_label) / sum(trans_train_label)

print("Train Data Size - ", len(trans_train_data))