import numpy as np import smote_utility , smote fileName_="13_NonZeroDataset_Aggolo.csv" the_data_set = smote_utility.readCSVAsArray(fileName_) #print the_data_set # get the distribution per clss counter_dict = smote_utility.getCountPerClass(the_data_set) print counter_dict #{0.0: 393, 1.0: 106, 2.0: 2, 3.0: 3, 4.0: 6, 5.0: 3, 6.0: 7, 7.0: 6, 8.0: 6, 9.0: 7, 10.0: 5, 11.0: 4, 12.0: 1} print "smoting time for level 1 " ## smoting time for level 1 # get the records per sample classVal = float(1) records_per_class_1 = smote_utility.getRecordsPeClass(classVal, the_data_set) #print records_ array_shaped_record = np.array(records_per_class_1) print "original datatset ", array_shaped_record.shape count_extra_synthetic_samples = 300 nearest_nieghbors = 10 ### Expected n_neighbors <= n_samples smoted_dataset_1 = smote.SMOTE(array_shaped_record.shape, array_shaped_record, count_extra_synthetic_samples, nearest_nieghbors) print "smoted dataset shape: level-1::", smoted_dataset_1.shape print "-----" print "smoting time for level 2 " ## smoting time for level 2 # get the records per sample classVal = float(2) records_per_class_2 = smote_utility.getRecordsPeClass(classVal, the_data_set)
fileName_="5_NonZeroDataset_Aggolo.csv" the_data_set = smote_utility.readCSVAsArray(fileName_) #print the_data_set # get the distribution per clss counter_dict = smote_utility.getCountPerClass(the_data_set) print counter_dict #{0.0: 10, 1.0: 417, 2.0: 7, 3.0: 9, 4.0: 106 } ''' formula to fix no. of samples: no_of_samples_you_want = x * no. of neighbors / 100 you have to provide x , x must be < 100 or multiple of 100 ''' print "smoting time for level 0 " ## smoting time for level 0 # get the records per sample classVal = float(0) records_per_class_0 = smote_utility.getRecordsPeClass(classVal, the_data_set) #print records_ array_shaped_record = np.array(records_per_class_0) print "original datatset ", array_shaped_record.shape count_extra_synthetic_samples = 3200 ## fix samples based on number of nerighbors nearest_nieghbors = 10 ### Expected n_neighbors <= n_samples, level 0 has 10 samples smoted_dataset_0 = smote.SMOTE(array_shaped_record.shape, array_shaped_record, count_extra_synthetic_samples, nearest_nieghbors) print "smoted dataset shape: level-0::", smoted_dataset_0.shape print "-----" print "smoting time for level 1 " ## smoting time for level 1 # get the records per sample classVal = float(1) records_per_class_1 = smote_utility.getRecordsPeClass(classVal, the_data_set)
import numpy as np import smote_utility, smote fileName_ = "13_NonZeroDataset_Aggolo.csv" the_data_set = smote_utility.readCSVAsArray(fileName_) #print the_data_set # get the distribution per clss counter_dict = smote_utility.getCountPerClass(the_data_set) print counter_dict #{0.0: 393, 1.0: 106, 2.0: 2, 3.0: 3, 4.0: 6, 5.0: 3, 6.0: 7, 7.0: 6, 8.0: 6, 9.0: 7, 10.0: 5, 11.0: 4, 12.0: 1} print "smoting time for level 1 " ## smoting time for level 1 # get the records per sample classVal = float(1) records_per_class_1 = smote_utility.getRecordsPeClass(classVal, the_data_set) #print records_ array_shaped_record = np.array(records_per_class_1) print "original datatset ", array_shaped_record.shape count_extra_synthetic_samples = 300 nearest_nieghbors = 10 ### Expected n_neighbors <= n_samples smoted_dataset_1 = smote.SMOTE(array_shaped_record.shape, array_shaped_record, count_extra_synthetic_samples, nearest_nieghbors) print "smoted dataset shape: level-1::", smoted_dataset_1.shape print "-----" print "smoting time for level 2 " ## smoting time for level 2 # get the records per sample classVal = float(2)