def preprocess_NewCategoryVec(newl_dic, toptarget_dic, dic_key,date = "07302015"):
    topdocveccategoryMat = IIalgorithm_model.makecategorydocMat(newl_dic[dic_key], defaultMatdict,word2vecdic, Folda = "toptexts_kaigyou_kihon2",clusternumber = DimentionN)
    #newl_train, newl_test, toptarget_train, toptarget_test = devide_train_test_with_random(newl,toptarget,
    newl_train, newl_test, toptarget_train, toptarget_test = devide_train_test_with_date(
                            np.array(newl_dic[dic_key]),np.array(toptarget_dic[dic_key]),
                            date = date,mod_number = 5, mod_value = 1)
    newl_train_balanced = []
    newl_test_balanced = []
    for i in range(min(len(newl_train[toptarget_train == -1]),len(newl_train[toptarget_train == 0]),len(newl_train[toptarget_train == 1]))):
        newl_train_balanced.append(newl_train[toptarget_train == -1][i])
        newl_train_balanced.append(newl_train[toptarget_train == 0][i])
        newl_train_balanced.append(newl_train[toptarget_train == 1][i])
    for i in range(min(len(newl_test[toptarget_test == -1]),len(newl_test[toptarget_test == 0]),len(newl_test[toptarget_test == 1]))):
        newl_test_balanced.append(newl_test[toptarget_test == -1][i])
        newl_test_balanced.append(newl_test[toptarget_test == 0][i])
        newl_test_balanced.append(newl_test[toptarget_test == 1][i])
    topdocveccategoryMat3_train = create_topdocveccategoryMat3(topdocveccategoryMat, newl_train_balanced)
    topdocveccategoryMat3_test = create_topdocveccategoryMat3(topdocveccategoryMat, newl_test_balanced)
    topdocveccategoryMat3_all = {}
    for n in range(DimentionN):
        topdocveccategoryMat3_all[n] = np.r_[topdocveccategoryMat3_train[n],topdocveccategoryMat3_test[n]]
    NewtopdocveccategoryMat,NewpreWdict, NewpreW, NewpreW_namelist_dic,NewDimentionN = yahoo_data_preprocess_func.create_NewpreW_NewpreWdict_NewDimentionN(
                                                                                        topdocveccategoryMat3_all,preW,preWdict,DimentionN)
    target_train = np.array([-1,0,1] * (len(newl_train_balanced)/3)).astype(np.int32)
    target_test = np.array([-1,0,1] * (len(newl_test_balanced)/3)).astype(np.int32)
    NewtopdocveccategoryMat_train,NewtopdocveccategoryMat_test = {},{}
    for n in range(DimentionN):
        NewtopdocveccategoryMat_train[n] = NewtopdocveccategoryMat[n][0:len(target_train)]
        NewtopdocveccategoryMat_test[n] = NewtopdocveccategoryMat[n][len(target_train):]
    return target_train,target_test,NewtopdocveccategoryMat_train, NewtopdocveccategoryMat_test,NewpreWdict, NewpreW, NewpreW_namelist_dic,NewDimentionN
topdoc.update(topdoc2015)

#preprocessed_docs = doc
preprocessed_docs = topdoc
dct = gensim.corpora.Dictionary(preprocessed_docs.values())
unfiltered = dct.token2id.keys()
dct.filter_extremes(no_below=5)
filtered = dct.token2id.keys()
#filtered_out = set(unfiltered) - set(filtered)
bow_docs = {}
bow_docs_all_zeros = {}
BOW_vecdic = {}
for name in preprocessed_docs.keys():
    sparse = dct.doc2bow(preprocessed_docs[name])
    bow_docs[name] = sparse
    dense = IIalgorithm_model.vec2dense(sparse, num_terms=len(dct))
    BOW_vecdic[name] = dense
    bow_docs_all_zeros[name] = all(d == 0 for d in dense)

#k = evaluate_with_SVM_3(BOW_vecdic, toptarget_dic, newl_dic, func2,func4,func1, 
                        #mod_number = 5, mod_value = 1, date = '09302015', with_date = True)

#all_target_test_Bow_list_dic, all_target_pred_Bow_list_dic = evaluate_with_SVM_3_k_fold(BOW_vecdic,toptarget_dic, newl_dic)

k_bow_dic = {}
for dic_key in newl_dic:
    #dic_key = 'close_previousday_to_close_nextday'
    newl = np.array(newl_dic[dic_key])
    target = np.array(toptarget_dic[dic_key])
    newl_zero_one = newl[target != 0]
    length = min(len(target[target== -1]), len(target[target== 1]))