def sample_data(X, y):
    if sourceType == SourceType.age:
        y = LabelEncoder().fit_transform(
            pd.cut(y, bins, labels=range(len(bins) - 1)))
    if sample_type == SampleType.under:
        X, y = under_sample(X, y)
    elif sample_type == SampleType.over:
        X, y = over_sample(X, y)
    else:
        if sourceType != SourceType.age:
            y = y.reshape(-1, 1)
    return X, y
def split_train_test(dataSet):
    cut = -1 if sourceType == SourceType.race else -2
    X, y = (dataSet[:, :cut], dataSet[:, cut+1]) if sourceType == SourceType.age else (dataSet[:, :cut], dataSet[:, cut])
    #if processType == ProcessType.name_c_tbn or processType == ProcessType.name or  processType == ProcessType.tbn_c_name_att:
    #idx = np.array([[num for num in range(len(X))]])
    #X = np.concatenate((X, idx.T), axis=1)
    #X = X[:, 1:]
    y = y.astype('int')
    #y = np.array([int(val) for val in y])
    if sourceType == SourceType.age:
        y = LabelEncoder().fit_transform(pd.cut(y, bins, labels=range(len(bins)-1)))
    if sample_type == SampleType.under:
        X, y = under_sample(X, y)
    elif sample_type == SampleType.over:
        X, y = over_sample(X, y)
    else:
        if sourceType != SourceType.age:
            y = y.reshape(-1, 1)
    return X, y
remainHandles = get_remain_handles(
    "/home/yaguang/new_nonstop_onefeaturesword1.csv")
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
fix_seq_len = 200
batch_size = 32
rocs = []

#emoji processing
word2idx = read_word2idx()
pretrained_embedding = load_pretrained_vectors(word2idx)

index_to_file_label = map_index_to_file_label(onlyfiles)
index, labels = get_index_label(index_to_file_label)
usecols = list(np.arange(1, 769))
counter = [0, 0, 0, 0]
index, labels = under_sample(index.reshape(-1, 1), labels)
index = index.ravel()
for label in labels:
    counter[label] += 1

w = open("metric_result.txt", 'w')
skf = StratifiedKFold(n_splits=10, shuffle=True)

learning_rate = 0.0001

counter = [0, 0, 0, 0]
for idx in index:
    counter[index_to_file_label[idx][1]] += 1
print(counter)

for train_index, test_index in skf.split(index, labels):