Beispiel #1
0
def get_standard_data_for_cloumn(filename, header):

    f = FileProcess()
    d = f.file_get_data(filename, header)
    try:
        d = deal_with_NA(d)
        d = list(map(int, d))
    except:
        d = list(map(float, d))

    max_d = max(d)
    min_d = min(d)
    if max_d == min(d):
        return d

    #if header not in label_list:
    #    return d

    if header == 'Creditability':
        return d
    for i in range(len(d)):
        #if d[i] == -1:
        #d[i] = 99

        x = len(str(d[i]))
        d[i] = d[i] / (math.pow(10, x))
        d[i] = round(d[i], 1)
        #d[i] = int((d[i] - min_d)*(9/(max_d - min_d)))

    return d
Beispiel #2
0
def data_get():
    f = FileProcess()
    data_0 = []
    data_1 = []
    headers = []
    headers, data = f.file_get_data_row(filename)
    for line in data:
        if line[CLASS] == '0':
            data_0.append(line[1:])
        else:
            data_1.append(line[1:])
    
    ## class 0 to class 1 ratio
    k = len(data_0)/len(data_1)
    print(k)
    return k, data_0, data_1,headers
Beispiel #3
0
def data_stard_write():
    data_all = []
    f = FileProcess()
    headers = f.file_get_headers(filename)
    for header in headers:
        d = get_standard_data_for_cloumn(filename, header)
        data_all.append(d)
    data_all = data_transpose(data_all)
    data_all = data_sort(data_all)
    data_train, data_val, data_test = get_train_val_test(data_all)
    f.write_csv('data_train.csv', headers, data_train)
    f.write_csv('data_val.csv', headers, data_val)
    f.write_csv('data_test.csv', headers, data_test)
Beispiel #4
0
def get_standard_data_for_cloumn(filename, header):
    f = FileProcess()
    d = f.file_get_data(filename, header)
    d = list(map(int, d))
    max_d = max(d)
    min_d = min(d)
    #print(d)
    if max_d == min_d:
        return d

    if header == 'class':
        return d
    for i in range(len(d)):
        d[i] = int((d[i] - min_d) * (9 / (max_d - min_d)))

    #print(d)
    #print(max_d, min_d)
    return d
Beispiel #5
0
def get_standard_data_for_cloumn(filename, header):
    f = FileProcess()
    d = f.file_get_data(filename, header)
    try:
        d = deal_with_NA(d)
        d = list(map(int, d))
    except:
        d = list(map(float, d))
    max_d = max(d)
    min_d = min(d)
    #print(d)
    if max_d == min_d:
        return d

    if header == 'SeriousDlqin2yrs':
        return d
    for i in range(len(d)):
        d[i] = int((d[i] - min_d) * (99 / (max_d - min_d)))

    return d
Beispiel #6
0
def get_standard_data_for_cloumn(filename, header):
    f =  FileProcess()
    d = f.file_get_data(filename, header)
    try:
        d = deal_with_NA(d)
        d = list(map(int, d)) ##list to int
    except:
        d = list(map(float, d)) ##list to int
    max_d = max(d)
    min_d = min(d)
    if max_d == min(d):
        return d
    
    if header == 'acc_now_delinq':
        return d
    for i in range(len(d)):
        #if d[i] == -1:
            #d[i] = 99
        
        d[i] = int((d[i] - min_d)*(9/(max_d - min_d)))
        
    
    return d
Beispiel #7
0
    print(data)

    s=Smote(data,N=100)
    s = (s.over_sampling())
    s = s.tolist()
    
    smote = []
    for line in s:
        l = [0] +  line
        for i in range(1, len(line)+1):
            l[i] = int(line[i-1] + 0.5)
        print(l)
            
        smote.append(l)
    
    f = FileProcess()
    headers, d = f.file_get_data_row(filename)
    
    smote_data = smote + d
    
    f =  FileProcess()
    f.write_csv('smote_data.csv',  headers, smote_data)
    
        

'''
a = []
for d in data_tmp:
    a.append(list(map(int,d)))