Ejemplo n.º 1
0
            # 计算插值样本
            # 3.随机选取K中的一个样本
            np.random.seed(self.random_state)
            choice_all = k_sample_index.flatten()
            #             print('choice_all',choice_all)
            #             print('choice_all[choice_all != 0]',choice_all != 0)
            choosed = np.random.choice(choice_all[choice_all != 0])
            #             print('choosed',choosed)

            # 4. 在正样本和随机样本之间选出一个点
            diff = sample.iloc[choosed, ] - sample.iloc[i, ]
            #             print('diff',type(diff), diff)
            gap = np.random.rand(1, n_atters)
            #             print('gap', gap)
            #             print('sample.iloc[i,]', sample.iloc[i,])
            new.loc[i] = [x for x in sample.iloc[i, ] + gap.flatten() * diff]
            #             print('new',new)

            label_out = np.r_[label_out, tp_less]  ##给新增加的一行数据添加label标签
        print('new', new)
        new_sample = pd.concat([x_data, new])
        new_sample.reset_index(inplace=True, drop=True)
        return new_sample, label_out


if __name__ == '__main__':
    x = pd.DataFrame(data=x, columns=columns)
    y = y
    smt = SMOTE()
    x_new, y_new = smt.over_sample(x, y)
    print(Counter(y_new))