def get_standard_data_for_cloumn(filename, header): f = FileProcess() d = f.file_get_data(filename, header) try: d = deal_with_NA(d) d = list(map(int, d)) except: d = list(map(float, d)) max_d = max(d) min_d = min(d) if max_d == min(d): return d #if header not in label_list: # return d if header == 'Creditability': return d for i in range(len(d)): #if d[i] == -1: #d[i] = 99 x = len(str(d[i])) d[i] = d[i] / (math.pow(10, x)) d[i] = round(d[i], 1) #d[i] = int((d[i] - min_d)*(9/(max_d - min_d))) return d
def data_get(): f = FileProcess() data_0 = [] data_1 = [] headers = [] headers, data = f.file_get_data_row(filename) for line in data: if line[CLASS] == '0': data_0.append(line[1:]) else: data_1.append(line[1:]) ## class 0 to class 1 ratio k = len(data_0)/len(data_1) print(k) return k, data_0, data_1,headers
def data_stard_write(): data_all = [] f = FileProcess() headers = f.file_get_headers(filename) for header in headers: d = get_standard_data_for_cloumn(filename, header) data_all.append(d) data_all = data_transpose(data_all) data_all = data_sort(data_all) data_train, data_val, data_test = get_train_val_test(data_all) f.write_csv('data_train.csv', headers, data_train) f.write_csv('data_val.csv', headers, data_val) f.write_csv('data_test.csv', headers, data_test)
def get_standard_data_for_cloumn(filename, header): f = FileProcess() d = f.file_get_data(filename, header) d = list(map(int, d)) max_d = max(d) min_d = min(d) #print(d) if max_d == min_d: return d if header == 'class': return d for i in range(len(d)): d[i] = int((d[i] - min_d) * (9 / (max_d - min_d))) #print(d) #print(max_d, min_d) return d
def get_standard_data_for_cloumn(filename, header): f = FileProcess() d = f.file_get_data(filename, header) try: d = deal_with_NA(d) d = list(map(int, d)) except: d = list(map(float, d)) max_d = max(d) min_d = min(d) #print(d) if max_d == min_d: return d if header == 'SeriousDlqin2yrs': return d for i in range(len(d)): d[i] = int((d[i] - min_d) * (99 / (max_d - min_d))) return d
def get_standard_data_for_cloumn(filename, header): f = FileProcess() d = f.file_get_data(filename, header) try: d = deal_with_NA(d) d = list(map(int, d)) ##list to int except: d = list(map(float, d)) ##list to int max_d = max(d) min_d = min(d) if max_d == min(d): return d if header == 'acc_now_delinq': return d for i in range(len(d)): #if d[i] == -1: #d[i] = 99 d[i] = int((d[i] - min_d)*(9/(max_d - min_d))) return d
print(data) s=Smote(data,N=100) s = (s.over_sampling()) s = s.tolist() smote = [] for line in s: l = [0] + line for i in range(1, len(line)+1): l[i] = int(line[i-1] + 0.5) print(l) smote.append(l) f = FileProcess() headers, d = f.file_get_data_row(filename) smote_data = smote + d f = FileProcess() f.write_csv('smote_data.csv', headers, smote_data) ''' a = [] for d in data_tmp: a.append(list(map(int,d)))