コード例 #1
0
def preprocess_NewCategoryVec(newl_dic, toptarget_dic, dic_key,date = "07302015"):
    topdocveccategoryMat = IIalgorithm_model.makecategorydocMat(newl_dic[dic_key], defaultMatdict,word2vecdic, Folda = "toptexts_kaigyou_kihon2",clusternumber = DimentionN)
    #newl_train, newl_test, toptarget_train, toptarget_test = devide_train_test_with_random(newl,toptarget,
    newl_train, newl_test, toptarget_train, toptarget_test = devide_train_test_with_date(
                            np.array(newl_dic[dic_key]),np.array(toptarget_dic[dic_key]),
                            date = date,mod_number = 5, mod_value = 1)
    newl_train_balanced = []
    newl_test_balanced = []
    for i in range(min(len(newl_train[toptarget_train == -1]),len(newl_train[toptarget_train == 0]),len(newl_train[toptarget_train == 1]))):
        newl_train_balanced.append(newl_train[toptarget_train == -1][i])
        newl_train_balanced.append(newl_train[toptarget_train == 0][i])
        newl_train_balanced.append(newl_train[toptarget_train == 1][i])
    for i in range(min(len(newl_test[toptarget_test == -1]),len(newl_test[toptarget_test == 0]),len(newl_test[toptarget_test == 1]))):
        newl_test_balanced.append(newl_test[toptarget_test == -1][i])
        newl_test_balanced.append(newl_test[toptarget_test == 0][i])
        newl_test_balanced.append(newl_test[toptarget_test == 1][i])
    topdocveccategoryMat3_train = create_topdocveccategoryMat3(topdocveccategoryMat, newl_train_balanced)
    topdocveccategoryMat3_test = create_topdocveccategoryMat3(topdocveccategoryMat, newl_test_balanced)
    topdocveccategoryMat3_all = {}
    for n in range(DimentionN):
        topdocveccategoryMat3_all[n] = np.r_[topdocveccategoryMat3_train[n],topdocveccategoryMat3_test[n]]
    NewtopdocveccategoryMat,NewpreWdict, NewpreW, NewpreW_namelist_dic,NewDimentionN = yahoo_data_preprocess_func.create_NewpreW_NewpreWdict_NewDimentionN(
                                                                                        topdocveccategoryMat3_all,preW,preWdict,DimentionN)
    target_train = np.array([-1,0,1] * (len(newl_train_balanced)/3)).astype(np.int32)
    target_test = np.array([-1,0,1] * (len(newl_test_balanced)/3)).astype(np.int32)
    NewtopdocveccategoryMat_train,NewtopdocveccategoryMat_test = {},{}
    for n in range(DimentionN):
        NewtopdocveccategoryMat_train[n] = NewtopdocveccategoryMat[n][0:len(target_train)]
        NewtopdocveccategoryMat_test[n] = NewtopdocveccategoryMat[n][len(target_train):]
    return target_train,target_test,NewtopdocveccategoryMat_train, NewtopdocveccategoryMat_test,NewpreWdict, NewpreW, NewpreW_namelist_dic,NewDimentionN
コード例 #2
0
            toptarget.append(stockvaluedict[newsid]['tag_close_close'][ID])
            newl.append(newsid)
    except:
        excpetl.append(ID)
        continue
"""
#IIalgorithmで2013-2015のclose(前日)-close(前日)を予測
dic_key = 'close_previousday_to_close_nextday'
newl = np.array(newl_dic[dic_key])
target = np.array(toptarget_dic[dic_key])
newl_zero_one = newl[target != 0]
length = min(len(target[target== -1]), len(target[target== 1]))
target_balanced = np.r_[target[target== -1][0:length], target[target== 1][0:length]]
target_balanced[target_balanced == -1] = 0
newl_balanced = np.r_[newl[target== -1][0:length], newl[target== 1][0:length]]
topdocveccategoryMat = IIalgorithm_model.makecategorydocMat(newl, defaultMatdict,word2vecdic, Folda = "toptexts_kaigyou_kihon2",clusternumber = DimentionN)
topdocveccategoryMat3 = create_topdocveccategoryMat3(topdocveccategoryMat, newl_balanced)
#topdocveccategoryMat3 = create_topdocveccategoryMat3(topdocveccategoryMat, newl)

#topdocveccategoryMat3[12].sum(axis = 0)
"""
namelist_dic ={}
for label in range(DimentionN):
    namelist_dic[label] = np.array(topdocveccategoryMat[label].values()[0].keys())

new_preW = {}
for label in range(DimentionN):
    new_preW[label] = np.zeros((1,len(namelist_dic[label])))
    for index, word in enumerate(namelist_dic[label]):
        try:
            new_preW[label][0][index] = pne2[word]