/
adasyn_test.py
51 lines (36 loc) · 1.65 KB
/
adasyn_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from sklearn.datasets.samples_generator import make_blobs
import numpy as np
import matplotlib.pyplot as pl
import ADASYN
# Function that creates 2 clusters that partially overlap
# @return: X The datapoints e.g.: [f1, f2, ... ,fn]
# @return: y the classlabels e.g: [0,1,1,1,0,...,Cn]
def createCluster():
X1, y1 = make_blobs(n_samples=50, centers=1, n_features=2,random_state=0,center_box = (-5.0,5.0))
X2, y2 = make_blobs(n_samples=200, centers=1, n_features=2,random_state=0,center_box = (-4.0,6.0))
X = np.concatenate((X1,X2),axis=0)
y = np.concatenate((y1,[1]*len(y2)),axis=0)
return X.tolist(),y.tolist()
def main():
# Create 2 artificial clusters that partially overlap
X,y = createCluster()
# Plot the clusters
colors = np.array([x for x in 'bgrcmykbgrcmykbgrcmykbgrcmyk'])
colors = np.hstack([colors] * 20)
pl.scatter(np.array(X)[:, 0], np.array(X)[:, 1], color=colors[y].tolist(), s=10)
pl.show()
# Get the minority and majority count
ms,ml = ADASYN.getClassCount(X,y)
d = ADASYN.getd(X,y,ms,ml)
G = ADASYN.getG(X,y,ms,ml,1)
# Get the list of r values, which indicate how many samples will be made per data point in the minority dataset
rlist = ADASYN.getRis(X,y,0,5)
# Generate the synthetic data
newX,newy = ADASYN.generateSamples(rlist,X,y,G,0,5)
# Plot the dataset again
pl.scatter(np.array(X)[:, 0], np.array(X)[:, 1], color=colors[y].tolist(), s=10)
pl.scatter(np.array(newX)[:, 0], np.array(newX)[:, 1], color='red', s=10)
pl.show()
X,y = ADASYN.joinwithmajorityClass(X,y,newX,newy,1)
print 'test'
if __name__ =='__main__':main()