def vertical_attr(self, lenth): attr = [0.] * lenth num_examples = testdata_size for i in range(num_examples // self.batchsize): data, compaign_data, labels, seqlen, time = loadCriteo.loadCriteoBatch_AMTA( batchsize=self.batchsize, max_input=self.feature_number + 2, max_seq_len=self.max_seq_len, fin=f_test) feed_dict = { self.x: data, self.y: labels, self.time: time, self.seqlen: seqlen, # self.channel: compaign_data } fetches = [self.attribution] attribution = self.sess.run(fetches, feed_dict)[0] for i in range(len(labels)): if seqlen[i] == lenth and labels[i] == 1: for j in range(seqlen[i]): # if click_label[i][j] == 1: v = attribution[j][i] attr[j] += v print attr
def train_one_epoch(self): num_examples = traindata_size print num_examples total_loss = [] total_loss_0 = [] total_loss_1 = [] total_loss_2 = [] pred = [] label = [] f_train = open(train_path) pbar = tqdm.tqdm(total=traindata_size // self.batchsize) for i in range(num_examples // self.batchsize): # print("batch {}:".format(i)) pbar.update(1) data, compaign_data, labels, seqlen, time = loadCriteo.loadCriteoBatch_AMTA( batchsize=self.batchsize, max_input=self.feature_number + 2, max_seq_len=self.max_seq_len, fin=f_train) feed_dict = { self.x: data, self.y: labels, self.time: time, self.seqlen: seqlen, # self.channel: compaign_data } # print(data) # print(labels) # print(time) # print(seqlen) fetches = [ self.train_step, self.cross_entropy, self.prediction, self.h, self.loss_0, self.loss_1, self.loss_2 ] result = self.sess.run(fetches, feed_dict=feed_dict) _, loss, prediction, h, loss_0, loss_1, loss_2 = result total_loss.append(loss) total_loss_0.append(loss_0) total_loss_1.append(loss_1) total_loss_2.append(loss_2) pred += prediction.tolist() label += labels # print(loss) print("training finished") mean_loss = np.mean(total_loss) mean_loss_0 = np.mean(total_loss_0) mean_loss_1 = np.mean(total_loss_1) mean_loss_2 = np.mean(total_loss_2) auc = roc_auc_score(label, pred) print("{:.4f} {:.4f} {:.4f} {:.4f} {:.4f}".format( mean_loss, mean_loss_0, mean_loss_1, mean_loss_2, auc)) f_train.close() f_log.write("{:.4f} {:.4f} {:.4f} {:.4f} {:.4f}\n".format( mean_loss, mean_loss_0, mean_loss_1, mean_loss_2, auc)) return mean_loss, auc, mean_loss_0, mean_loss_1, mean_loss_2
def test(self, epoch=0): num_examples = testdata_size print num_examples total_loss = [] total_loss_0 = [] total_loss_1 = [] total_loss_2 = [] pred = [] label = [] f_test = open(test_path) for i in range(num_examples // self.batchsize): data, compaign_data, labels, seqlen, time = loadCriteo.loadCriteoBatch_AMTA( batchsize=self.batchsize, max_input=self.feature_number + 2, max_seq_len=self.max_seq_len, fin=f_test) feed_dict = { self.x: data, self.y: labels, self.time: time, self.seqlen: seqlen, # self.channel: compaign_data } fetches = [ self.cross_entropy, self.prediction, self.h, self.loss_0, self.loss_1, self.loss_2 ] result = self.sess.run(fetches, feed_dict=feed_dict) loss, prediction, h, loss_0, loss_1, loss_2 = result total_loss.append(loss) total_loss_0.append(loss_0) total_loss_1.append(loss_1) total_loss_2.append(loss_2) pred += prediction.tolist() label += labels print("testing {}-th epoch finished".format(epoch)) mean_loss = np.mean(total_loss) mean_loss_0 = np.mean(total_loss_0) mean_loss_1 = np.mean(total_loss_1) mean_loss_2 = np.mean(total_loss_2) auc = roc_auc_score(label, pred) print("{:.4f} {:.4f} {:.4f} {:.4f} {:.4f}".format( mean_loss, mean_loss_0, mean_loss_1, mean_loss_2, auc)) f_log.write("{:.4f} {:.4f} {:.4f} {:.4f} {:.4f}\n".format( mean_loss, mean_loss_0, mean_loss_1, mean_loss_2, auc)) self.log(epoch, [auc], 'test') f_test.close()
def attr(self): num_examples = testdata_size f_test = open(test_path) outfile = open('./attribute_criteo_s1/AMTA.txt', 'w') filec = open('index2channel.pkl', 'rb') Channel = pkl.load(filec) Channel_value = {} Channel_time = {} for i in range(num_examples // self.batchsize): data, compaign_data, labels, seqlen, time = loadCriteo.loadCriteoBatch_AMTA( batchsize=self.batchsize, max_input=self.feature_number + 2, max_seq_len=self.max_seq_len, fin=f_test) feed_dict = { self.x: data, self.y: labels, self.time: time, self.seqlen: seqlen, # self.channel: compaign_data } fetches = [self.attribution] attribution = self.sess.run(fetches, feed_dict)[0] for i in range(self.batchsize): if labels[i] != 0: for j in range(seqlen[i]): # if click_label[i][j] == 1: index = Channel[str(data[i][j][0])] v = attribution[j][i] if index in Channel_value: Channel_value[index] += v Channel_time[index] += 1 else: Channel_value[index] = v Channel_time[index] = 1 for key in Channel_value: outfile.write(key + '\t' + str(Channel_value[key] / Channel_time[key]) + '\n')