Esempio n. 1
0
    train = origin_train.iloc[trainid]
    val = origin_train.iloc[valid]
else:
    train = pd.read_json(datapath, typ='frame')
    trainflow = flow

test = pd.read_json(datapath2, typ='frame')

if jieba:
    textdict = dp.text_jiebaDictionary(jiebapath, mink)  ## mink 以上
    userdict = dp.userDictionary(userpath, usermink)
else:
    user = np.array(train["user"])
    text = np.array(train["text"])
    textdict = dp.textDictionary(text)
    userdict = dp.Dictionary(user)
#textdict = dp.textDictionary(text)
#train_loader =  dp.TextClassDataLoader(train, userdict,textdict, batch_size=BATCH_SIZE)
#if isval:
#    val_loader = dp.TextClassDataLoader(val, userdict,textdict, batch_size=BATCH_SIZE)

train_loader = dp.TextClassDataLoader_class(train=train,
                                            userdict=userdict,
                                            textdict=textdict,
                                            flow=trainflow,
                                            batch_size=BATCH_SIZE)
if isval:
    val_loader = dp.TextClassDataLoader_class(val,
                                              userdict,
                                              textdict,
                                              flow=valflow,
Esempio n. 2
0
            valid.append(i)
        else:
            trainid.append(i)
    train = origin_train.iloc[trainid]
    val = origin_train.iloc[valid]
else:
    train = origin_train

#train.ix[1]
#df1 = train.iloc[:5]
#df1.sample(frac=1)
#df1.sample(frac=1).reset_index(drop=True)

if jieba:
    textdict = dp.text_jiebaDictionary(jiebapath, mink)  ## mink 以上
    userdict = dp.Dictionary(userpath, usermink)
else:
    user = np.array(train["user"])
    text = np.array(train["text"])
    textdict = dp.textDictionary(text)
    #userdict = dp.Dictionary(user)
    userdict = dp.userDictionary(userpath, usermink)
#textdict = dp.textDictionary(text)

train_loader = dp.TextClassDataLoader_notidentify(train,
                                                  userdict,
                                                  textdict,
                                                  batch_size=BATCH_SIZE)
if isval:
    val_loader = dp.TextClassDataLoader_notidentify(val,
                                                    userdict,