def run(self): self.prepareModel() log('Model Prepared') if args.load_model != None: self.loadModel() stloc = len(self.metrics['TrainLoss']) * args.tstEpoch - ( args.tstEpoch - 1) else: stloc = 0 init = tf.global_variables_initializer() self.sess.run(init) log('Variables Inited') for ep in range(stloc, args.epoch): test = (ep % args.tstEpoch == 0) reses = self.trainEpoch() log(self.makePrint('Train', ep, reses, test)) if test: reses = self.testEpoch() log(self.makePrint('Test', ep, reses, test)) if ep % args.tstEpoch == 0: self.saveHistory() print() # for i in range(10, 0, -1): # args.shoot = i reses = self.testEpoch() log(self.makePrint('Test', args.epoch, reses, True)) self.saveHistory()
def __init__(self, sess, datas, inputDim): self.sess = sess self.trainMat = datas[0] self.cvMat = datas[1] self.testMat = datas[2] inputDim = self.trainMat.shape[1] self.trainMask = self.trainMat != 0 self.cvMask = self.cvMat != 0 self.testMask = self.testMat != 0 self.train_losses = list() self.train_RMSEs = list() self.test_losses = list() self.test_RMSEs = list() self.inputDim = inputDim self.metrics = dict() self.metrics['trainLoss'] = list() self.metrics['trainRMSE'] = list() self.metrics['cvLoss'] = list() self.metrics['cvRMSE'] = list() if FUSE_TRAIN_CV: self.trainMat = self.trainMat + self.cvMat self.trainMask = self.trainMat != 0 self.cvMat = self.testMat self.cvMask = self.testMask log('Matrix Size: ' + str(self.trainMat.shape))
def run(self): self.prepare_model() log('Model Prepared') stloc = 0 if LOAD_MODEL != None: self.loadModel() stloc = len(self.metrics['trainLoss']) else: init = tf.global_variables_initializer() self.sess.run(init) log('Variables Inited') for ep in range(stloc, EPOCH): loss, rmse = self.runEpoch(inpMat=self.trainMat, labelMat=self.trainMat, labelMask=self.trainMask, train=True) log('Epoch %d/%d, Train: Loss = %.3f, RMSE = %.3f' %\ (ep, EPOCH, loss, rmse)) if ep % 5 == 0: loss, rmse = self.runEpoch(inpMat=self.trainMat, labelMat=self.cvMat, labelMask=self.cvMask, steps=0.9) log('Epoch %d/%d, CV: Loss = %.3f, RMSE = %.3f' %\ (ep, EPOCH, loss, rmse)) if ep % 5 == 0: self.saveHistory() print('') loss, rmse = self.runEpoch(inpMat=self.trainMat, labelMat=self.testMat, labelMask=self.testMask) log('Overall, Test: Loss = %.3f, RMSE = %.5f' % (loss, rmse)) self.saveHistory()
def saveHistory(self): if EPOCH == 0: return with open('History/' + SAVE_PATH + '.his', 'wb') as fs: pickle.dump(self.metrics, fs) saver = tf.train.Saver() saver.save(self.sess, 'Models/' + SAVE_PATH) log('Model Saved: %s' % SAVE_PATH)
def saveHistory(self): if args.epoch == 0: return with open('History/' + args.save_path + '.his', 'wb') as fs: pickle.dump(self.metrics, fs) saver = tf.train.Saver() saver.save(self.sess, 'Models/' + args.save_path) log('Model Saved: %s' % args.save_path)
def trainEpoch(self): trnMat = self.trnMat num = trnMat.shape[0] trnSfIds = np.random.permutation(num)[:args.trn_num] tstSfIds = self.tstUsrs sfIds = np.random.permutation(np.concatenate((trnSfIds, tstSfIds))) # sfIds = trnSfIds epochLoss, epochPreLoss = [0] * 2 num = len(sfIds) steps = int(np.ceil(num / args.batch)) for i in range(steps): curLst = list(np.random.permutation(self.inpDim)) st = i * args.batch ed = min((i + 1) * args.batch, num) batchIds = sfIds[st:ed] temTrn = trnMat[batchIds].toarray() tembuy = self.buyMat[batchIds].toarray() temPos = [[None] * (args.posbat * args.negsamp) for i in range(len(batchIds))] temNeg = [[None] * (args.posbat * args.negsamp) for i in range(len(batchIds))] for ii in range(len(batchIds)): row = batchIds[ii] posset = np.reshape(np.argwhere(tembuy[ii] != 0), [-1]) negset = negSamp(tembuy[ii], curLst) idx = 0 # if len(posset) == 0: # posset = np.random.choice(list(range(args.item)), args.posbat) for j in np.random.choice(posset, args.posbat): for k in np.random.choice(negset, args.negsamp): temPos[ii][idx] = j temNeg[ii][idx] = k idx += 1 target = [self.optimizer, self.preLoss, self.regLoss, self.loss] res = self.sess.run(target, feed_dict={ self.interaction: (temTrn).astype('int32'), self.posLabel: temPos, self.negLabel: temNeg }, options=config_pb2.RunOptions( report_tensor_allocations_upon_oom=True)) preLoss, regLoss, loss = res[1:] epochLoss += loss epochPreLoss += preLoss log('Step %d/%d: loss = %.2f, regLoss = %.2f ' %\ (i, steps, loss, regLoss), save=False, oneline=True) ret = dict() ret['Loss'] = epochLoss / steps ret['preLoss'] = epochPreLoss / steps return ret
def testEpoch(self): trnMat = self.trnMat tstInt = self.tstInt epochHit, epochNdcg = [0] * 2 ids = self.tstUsrs num = len(ids) testbatch = args.batch steps = int(np.ceil(num / testbatch)) for i in range(steps): st = i * testbatch ed = min((i + 1) * testbatch, num) batchIds = ids[st:ed] temTrn = trnMat[batchIds].toarray() temTst = tstInt[batchIds] tembuy = self.buyMat[batchIds].toarray() # get test locations tstLocs = [None] * len(batchIds) for j in range(len(batchIds)): negset = np.reshape(np.argwhere(tembuy[j] == 0), [-1]) rdnNegSet = np.random.permutation(negset) tstLocs[j] = list(rdnNegSet[:99]) tem = ([rdnNegSet[99]] if temTst[j] in tstLocs[j] else [temTst[j]]) tstLocs[j] = tstLocs[j] + tem preds = self.sess.run(self.posPred, feed_dict={ self.interaction: temTrn.astype('int32'), self.posLabel: tstLocs }, options=config_pb2.RunOptions( report_tensor_allocations_upon_oom=True)) hit, ndcg = self.calcRes(preds, temTst, tstLocs) epochHit += hit epochNdcg += ndcg log('Step %d/%d: hit = %d, ndcg = %d ' %\ (i, steps, hit, ndcg), save=False, oneline=True) ret = dict() ret['HR'] = epochHit / num ret['NDCG'] = epochNdcg / num return ret
def runEpoch(self, inpMat, labelMat, labelMask, train=False, steps=-1): num = inpMat.shape[0] shuffledIds = np.random.permutation(num) epochLoss, epochRmse, epochNum = [0] * 3 temStep = int(np.ceil(num / BATCH_SIZE)) if steps == -1 or steps > temStep: steps = temStep elif steps > 0 and steps < 1: steps = int(steps * temStep) for i in range(steps): st = i * BATCH_SIZE ed = min((i + 1) * BATCH_SIZE, num) batchIds = shuffledIds[st:ed] temTrain = inpMat[batchIds].toarray() temLabel = labelMat[batchIds].toarray() temMask = labelMask[batchIds].toarray() target = [self.preLoss, self.loss, self.regLoss, self.defaultErr] if train: target = [self.optimizer] + target res = self.sess.run(target, feed_dict={ self.inputR: temTrain, self.mask: temMask, self.label: temLabel, }, options=config_pb2.RunOptions( report_tensor_allocations_upon_oom=True)) preLoss, loss, regLoss, defaultErr = res[-4:] epochLoss += loss epochRmse += preLoss * (ed - st) + defaultErr epochNum += np.sum(temMask) log('Step %d/%d: loss = %.2f, regLoss = %.2f' %\ (i, steps, loss, regLoss), save=False, oneline=True) epochRmse = np.sqrt(epochRmse / epochNum) epochLoss = epochLoss / steps if train: self.metrics['trainLoss'].append(epochLoss) self.metrics['trainRMSE'].append(epochRmse) else: self.metrics['cvLoss'].append(epochLoss) self.metrics['cvRMSE'].append(epochRmse) return epochLoss, epochRmse
def trainEpoch(self): ids = np.random.permutation( list(range(args.temporalRange, args.trnDays))) epochLoss, epochPreLoss, epochAcc = [0] * 3 num = len(ids) steps = int(np.ceil(num / args.batch)) for i in range(steps): st = i * args.batch ed = min((i + 1) * args.batch, num) batIds = ids[st:ed] tem = self.sampleTrainBatch(batIds) if args.task == 'c': feats, labels = tem elif args.task == 'r': feats, labels, mask = tem targets = [self.optimizer, self.preLoss, self.loss] feeddict = { self.feats: feats, self.labels: labels, self.dropRate: args.dropRate } if args.task == 'r': feeddict[self.mask] = mask res = self.sess.run(targets, feed_dict=feeddict, options=config_pb2.RunOptions( report_tensor_allocations_upon_oom=True)) preLoss, loss = res[1:] epochLoss += loss epochPreLoss += preLoss log('Step %d/%d: preLoss = %.4f ' % (i, steps, preLoss), save=False, oneline=True) ret = dict() ret['Loss'] = epochLoss / steps ret['preLoss'] = epochPreLoss / steps return ret
def runEpoch(self, inpMat, labelMat, train=False): num = inpMat.shape[0] if train: sfIds = np.random.permutation(num) else: sfIds = self.testIds num = len(self.testIds) epochLoss, epochPrcNume, epochPrcDeno, epochNdcgNume, epochNdcgDeno = [0] * 5 steps = int(np.ceil(num / BATCH_SIZE)) for i in range(steps): st = i * BATCH_SIZE ed = min((i+1) * BATCH_SIZE, num) batchIds = sfIds[st: ed] temTrn = inpMat[batchIds].toarray() temLbl = labelMat[batchIds].toarray() target = [self.preLoss, self.loss, self.regLoss, self.pred] if train: target = [self.optimizer] + target mask = np.random.randint(0, 2, size=(ed-st, self.inputDim), dtype='int32') temTrn = temTrn * mask temLbl = temLbl * (1-mask) res = self.sess.run(target, feed_dict={ self.inp: temTrn, self.label: temLbl }, options=config_pb2.RunOptions(report_tensor_allocations_upon_oom=True)) preLoss, loss, regLoss, pred = res[-4:] epochLoss += loss if not train: prcNume, prcDeno, ndcgNume, ndcgDeno = self.getHit(pred, temLbl) epochPrcNume += prcNume epochPrcDeno += prcDeno epochNdcgNume += ndcgNume epochNdcgDeno += ndcgDeno log('Step %d/%d: loss = %.2f, regLoss = %.2f' % (i, steps, loss, regLoss), save=False, oneline=True) epochPrecision = epochPrcNume / (1e-6+epochPrcDeno) epochNdcg = epochNdcgNume / (1e-6+epochNdcgDeno) epochLoss /= steps return epochLoss, epochPrecision, epochNdcg
def run(self): self.prepare_model() log('Model Prepared') stloc = 0 if LOAD_MODEL != None: self.loadModel() else: init = tf.global_variables_initializer() self.sess.run(init) log('Variables Inited') for ep in range(stloc, EPOCH): loss, precision, ndcg = self.runEpoch(inpMat=self.trnMat, labelMat=self.trnMat, train=True) log('Epoch %d/%d, Train: Loss = %.3f' % (ep, EPOCH, loss)) if ep % 5 == 0: loss, precision, ndcg = self.runEpoch(inpMat=self.trnMat, labelMat=self.tstMat) log('Epoch %d/%d, CV, Loss = %.3f, Prc = %.3f, NDCG = %.3f' % (ep, EPOCH, loss, precision, ndcg)) self.saveHistory() print() loss, precision, ndcg = self.runEpoch(inpMat=self.trnMat, labelMat=self.tstMat) log('Overall, Test: Loss = %.3f, Prc = %.4f, NDCG = %.4f' % (loss, precision, ndcg)) self.saveHistory()
def run(self): self.prepareModel() log('Model Prepared') if args.load_model != None: self.loadModel() stloc = len(self.metrics['TrainpreLoss']) * args.tstEpoch else: stloc = 0 init = tf.global_variables_initializer() self.sess.run(init) log('Variables Inited') bestRes = None for ep in range(stloc, args.epoch): test = (ep % args.tstEpoch == 0) reses = self.trainEpoch() log(self.makePrint('Train', ep, reses, test)) if test: reses = self.testEpoch( self.handler.tstT, np.concatenate([self.handler.trnT, self.handler.valT], axis=1)) if bestRes is None or args.task == 'r' and bestRes[ 'MAPE'] > reses['MAPE'] or args.task == 'c' and bestRes[ 'macroF1'] > reses['macroF1']: bestRes = reses if ep % args.tstEpoch == 0: self.saveHistory() print() reses = self.testEpoch( self.handler.tstT, np.concatenate([self.handler.trnT, self.handler.valT], axis=1)) log(self.makePrint('Test', args.epoch, reses, True)) if bestRes is None or args.task == 'r' and bestRes['MAPE'] > reses[ 'MAPE'] or args.task == 'c' and bestRes['macroF1'] > reses[ 'macroF1']: bestRes = reses log(self.makePrint('Best', args.epoch, bestRes, True)) self.saveHistory()
def saveHistory(self): saver = tf.train.Saver() saver.save(self.sess, 'Models/' + SAVE_PATH) log('Model Saved: %s' % SAVE_PATH)
def saveHistory(self): if args.epoch == 0: return with open('History/' + args.save_path + '.his', 'wb') as fs: pickle.dump(self.metrics, fs) saver = tf.train.Saver() saver.save(self.sess, 'Models/' + args.save_path) log('Model Saved: %s' % args.save_path) def loadModel(self): saver = tf.train.Saver() saver.restore(sess, 'Models/' + args.load_model) with open('History/' + args.load_model + '.his', 'rb') as fs: self.metrics = pickle.load(fs) log('Model Loaded') if __name__ == '__main__': logger.saveDefault = True config = tf.ConfigProto() config.gpu_options.allow_growth = True log('Start') datas = LoadData() log('Load Data') with tf.Session(config=config) as sess: recom = Recommender(sess, datas, args.item) recom.run()
def loadModel(self): saver = tf.train.Saver() saver.restore(sess, 'Models/' + LOAD_MODEL) log('Model Loaded')
def testEpoch(self): epochHit, epochNdcg, epochMrr = [0] * 3 allIds = self.handler.tstUsrs num = len(allIds) tstBat = args.batch divSize = args.divSize bigSteps = int(np.ceil(num / divSize)) glb_i = 0 glb_step = int(np.ceil(num / tstBat)) for s in range(bigSteps): bigSt = s * divSize bigEd = min((s + 1) * divSize, num) ids = allIds[bigSt:bigEd] steps = int(np.ceil((bigEd - bigSt) / tstBat)) posItms = self.handler.tstInt[ids] cnt = 0 while True: u_ut, ut_i_beh, ut_i_item, ut_i_time, i_ut_adjs, u_i_dict, pckLabel, pckLabelP, usrs, itms = self.handler.sampleLargeGraph( ids, list(set(posItms))) cnt += 1 if cnt == 5: exit() if not u_ut is None: break usrIdMap = dict(map(lambda x: (usrs[x], x), range(len(usrs)))) itmIdMap = dict(map(lambda x: (itms[x], x), range(len(itms)))) ids = list(map(lambda x: usrIdMap[x], ids)) itmMapping = (lambda x: None if (x is None or x not in itmIdMap) else itmIdMap[x]) pckTstInt = np.array( list( map(lambda x: itmMapping(self.handler.tstInt[usrs[x]]), range(len(usrs))))) feeddict = { self.allUsrs: usrs, self.allItms: itms, self.itmNum: len(itms), self.u_ut: u_ut, self.ut_i_beh: ut_i_beh, self.ut_i_item: ut_i_item, self.ut_i_time: ut_i_time } for behid in range(args.behNums): feeddict[self.i_ut_adjs[behid] ['rows']] = i_ut_adjs[behid]['rows'] feeddict[self.i_ut_adjs[behid] ['cols']] = i_ut_adjs[behid]['cols'] for i in range(steps): st = i * tstBat ed = min((i + 1) * tstBat, bigEd - bigSt) batIds = ids[st:ed] uLocs, iLocs, temTst, tstLocs = self.sampleTestBatch( batIds, pckLabel, pckLabelP, pckTstInt, u_i_dict) feeddict[self.utids] = uLocs feeddict[self.iids] = iLocs preds, atts = self.sess.run( [self.pred, self.atts], feed_dict=feeddict, options=config_pb2.RunOptions( report_tensor_allocations_upon_oom=True)) hit, ndcg, mrr = self.calcRes( np.reshape(preds, [ed - st, 100]), temTst, tstLocs) epochHit += hit epochNdcg += ndcg epochMrr += mrr glb_i += 1 log('Steps %d/%d: hit = %d, ndcg = %.2f, mrr = %.2f ' % (glb_i, glb_step, hit, ndcg, mrr), save=False, oneline=True) ret = dict() ret['HR'] = epochHit / num ret['NDCG'] = epochNdcg / num ret['MRR'] = epochMrr / num return ret
def trainEpoch(self): num = args.user allIds = np.random.permutation(num)[:args.trnNum] epochLoss, epochPreLoss = [0] * 2 num = len(allIds) divSize = args.divSize bigSteps = int(np.ceil(num / divSize)) glb_i = 0 glb_step = int(np.ceil(num / args.batch)) for s in range(bigSteps): bigSt = s * divSize bigEd = min((s + 1) * divSize, num) sfIds = allIds[bigSt:bigEd] steps = int(np.ceil((bigEd - bigSt) / args.batch)) cnt = 0 while True: u_ut, ut_i_beh, ut_i_item, ut_i_time, i_ut_adjs, u_i_dict, pckLabel, _, usrs, itms = self.handler.sampleLargeGraph( sfIds) cnt += 1 if cnt == 5: exit() if not u_ut is None: break usrIdMap = dict(map(lambda x: (usrs[x], x), range(len(usrs)))) sfIds = list(map(lambda x: usrIdMap[x], sfIds)) feeddict = { self.allUsrs: usrs, self.allItms: itms, self.itmNum: len(itms), self.u_ut: u_ut, self.ut_i_beh: ut_i_beh, self.ut_i_item: ut_i_item, self.ut_i_time: ut_i_time } for behid in range(args.behNums): feeddict[self.i_ut_adjs[behid] ['rows']] = i_ut_adjs[behid]['rows'] feeddict[self.i_ut_adjs[behid] ['cols']] = i_ut_adjs[behid]['cols'] for i in range(steps): st = i * args.batch ed = min((i + 1) * args.batch, bigEd - bigSt) batIds = sfIds[st:ed] uLocs, iLocs = self.sampleTrainBatch(batIds, len(itms), pckLabel, u_i_dict) target = [ self.optimizer, self.preLoss, self.regLoss, self.loss ] feeddict[self.utids] = uLocs feeddict[self.iids] = iLocs res = self.sess.run( target, feed_dict=feeddict, options=config_pb2.RunOptions( report_tensor_allocations_upon_oom=True)) preLoss, regLoss, loss = res[1:] epochLoss += loss epochPreLoss += preLoss glb_i += 1 log('Step %d/%d: loss = %.2f, regLoss = %.2f ' % (glb_i, glb_step, loss, regLoss), save=False, oneline=True) ret = dict() ret['Loss'] = epochLoss / glb_step ret['preLoss'] = epochPreLoss / glb_step return ret
def loadModel(self): saver = tf.train.Saver() saver.restore(sess, 'Models/' + args.load_model) with open('History/' + args.load_model + '.his', 'rb') as fs: self.metrics = pickle.load(fs) log('Model Loaded')
def testEpoch(self, tstTensor, inpTensor): ids = np.random.permutation(list(range(tstTensor.shape[1]))) epochLoss, epochPreLoss, = [0] * 2 if args.task == 'c': epochTp, epochFp, epochTn, epochFn = [ np.zeros(4) for i in range(4) ] elif args.task == 'r': epochSqLoss, epochAbsLoss, epochTstNum, epochApeLoss, epochPosNums = [ np.zeros(4) for i in range(5) ] num = len(ids) steps = int(np.ceil(num / args.batch)) for i in range(steps): st = i * args.batch ed = min((i + 1) * args.batch, num) batIds = ids[st:ed] tem = self.sampTestBatch(batIds, tstTensor, inpTensor) if args.task == 'c': feats, labels = tem elif args.task == 'r': feats, labels, mask = tem if args.task == 'c': targets = [ self.preLoss, self.regLoss, self.loss, self.truePos, self.falsePos, self.trueNeg, self.falseNeg ] feeddict = { self.feats: feats, self.labels: labels, self.dropRate: 0.0 } elif args.task == 'r': targets = [ self.preds, self.preLoss, self.regLoss, self.loss, self.sqLoss, self.absLoss, self.tstNums, self.apeLoss, self.posNums ] feeddict = { self.feats: feats, self.labels: labels, self.dropRate: 0.0, self.mask: mask } res = self.sess.run(targets, feed_dict=feeddict, options=config_pb2.RunOptions( report_tensor_allocations_upon_oom=True)) if args.task == 'c': preLoss, regLoss, loss, tp, fp, tn, fn = res epochTp += tp epochFp += fp epochTn += tn epochFn += fn elif args.task == 'r': preds, preLoss, regLoss, loss, sqLoss, absLoss, tstNums, apeLoss, posNums = res epochSqLoss += sqLoss epochAbsLoss += absLoss epochTstNum += tstNums epochApeLoss += apeLoss epochPosNums += posNums epochLoss += loss epochPreLoss += preLoss log('Step %d/%d: loss = %.2f, regLoss = %.2f ' % (i, steps, loss, regLoss), save=False, oneline=True) ret = dict() ret['preLoss'] = epochPreLoss / steps if args.task == 'c': temSum = 0 for i in range(args.offNum): ret['F1_%d' % i] = epochTp[i] * 2 / (epochTp[i] * 2 + epochFp[i] + epochFn[i]) temSum += ret['F1_%d' % i] ret['microF1'] = temSum / args.offNum ret['macroF1'] = np.sum(epochTp) * 2 / ( np.sum(epochTp) * 2 + np.sum(epochFp) + np.sum(epochFn)) elif args.task == 'r': for i in range(args.offNum): ret['RMSE_%d' % i] = np.sqrt(epochSqLoss[i] / epochTstNum[i]) ret['MAE_%d' % i] = epochAbsLoss[i] / epochTstNum[i] ret['MAPE_%d' % i] = epochApeLoss[i] / epochPosNums[i] ret['RMSE'] = np.sqrt(np.sum(epochSqLoss) / np.sum(epochTstNum)) ret['MAE'] = np.sum(epochAbsLoss) / np.sum(epochTstNum) ret['MAPE'] = np.sum(epochApeLoss) / np.sum(epochPosNums) return ret