def preprocess_NewCategoryVec(newl_dic, toptarget_dic, dic_key,date = "07302015"): topdocveccategoryMat = IIalgorithm_model.makecategorydocMat(newl_dic[dic_key], defaultMatdict,word2vecdic, Folda = "toptexts_kaigyou_kihon2",clusternumber = DimentionN) #newl_train, newl_test, toptarget_train, toptarget_test = devide_train_test_with_random(newl,toptarget, newl_train, newl_test, toptarget_train, toptarget_test = devide_train_test_with_date( np.array(newl_dic[dic_key]),np.array(toptarget_dic[dic_key]), date = date,mod_number = 5, mod_value = 1) newl_train_balanced = [] newl_test_balanced = [] for i in range(min(len(newl_train[toptarget_train == -1]),len(newl_train[toptarget_train == 0]),len(newl_train[toptarget_train == 1]))): newl_train_balanced.append(newl_train[toptarget_train == -1][i]) newl_train_balanced.append(newl_train[toptarget_train == 0][i]) newl_train_balanced.append(newl_train[toptarget_train == 1][i]) for i in range(min(len(newl_test[toptarget_test == -1]),len(newl_test[toptarget_test == 0]),len(newl_test[toptarget_test == 1]))): newl_test_balanced.append(newl_test[toptarget_test == -1][i]) newl_test_balanced.append(newl_test[toptarget_test == 0][i]) newl_test_balanced.append(newl_test[toptarget_test == 1][i]) topdocveccategoryMat3_train = create_topdocveccategoryMat3(topdocveccategoryMat, newl_train_balanced) topdocveccategoryMat3_test = create_topdocveccategoryMat3(topdocveccategoryMat, newl_test_balanced) topdocveccategoryMat3_all = {} for n in range(DimentionN): topdocveccategoryMat3_all[n] = np.r_[topdocveccategoryMat3_train[n],topdocveccategoryMat3_test[n]] NewtopdocveccategoryMat,NewpreWdict, NewpreW, NewpreW_namelist_dic,NewDimentionN = yahoo_data_preprocess_func.create_NewpreW_NewpreWdict_NewDimentionN( topdocveccategoryMat3_all,preW,preWdict,DimentionN) target_train = np.array([-1,0,1] * (len(newl_train_balanced)/3)).astype(np.int32) target_test = np.array([-1,0,1] * (len(newl_test_balanced)/3)).astype(np.int32) NewtopdocveccategoryMat_train,NewtopdocveccategoryMat_test = {},{} for n in range(DimentionN): NewtopdocveccategoryMat_train[n] = NewtopdocveccategoryMat[n][0:len(target_train)] NewtopdocveccategoryMat_test[n] = NewtopdocveccategoryMat[n][len(target_train):] return target_train,target_test,NewtopdocveccategoryMat_train, NewtopdocveccategoryMat_test,NewpreWdict, NewpreW, NewpreW_namelist_dic,NewDimentionN
toptarget.append(stockvaluedict[newsid]['tag_close_close'][ID]) newl.append(newsid) except: excpetl.append(ID) continue """ #IIalgorithmで2013-2015のclose(前日)-close(前日)を予測 dic_key = 'close_previousday_to_close_nextday' newl = np.array(newl_dic[dic_key]) target = np.array(toptarget_dic[dic_key]) newl_zero_one = newl[target != 0] length = min(len(target[target== -1]), len(target[target== 1])) target_balanced = np.r_[target[target== -1][0:length], target[target== 1][0:length]] target_balanced[target_balanced == -1] = 0 newl_balanced = np.r_[newl[target== -1][0:length], newl[target== 1][0:length]] topdocveccategoryMat = IIalgorithm_model.makecategorydocMat(newl, defaultMatdict,word2vecdic, Folda = "toptexts_kaigyou_kihon2",clusternumber = DimentionN) topdocveccategoryMat3 = create_topdocveccategoryMat3(topdocveccategoryMat, newl_balanced) #topdocveccategoryMat3 = create_topdocveccategoryMat3(topdocveccategoryMat, newl) #topdocveccategoryMat3[12].sum(axis = 0) """ namelist_dic ={} for label in range(DimentionN): namelist_dic[label] = np.array(topdocveccategoryMat[label].values()[0].keys()) new_preW = {} for label in range(DimentionN): new_preW[label] = np.zeros((1,len(namelist_dic[label]))) for index, word in enumerate(namelist_dic[label]): try: new_preW[label][0][index] = pne2[word]