Exemple #1
0
    def vertical_attr(self, lenth):
        attr = [0.] * lenth
        num_examples = testdata_size

        for i in range(num_examples // self.batchsize):
            data, compaign_data, labels, seqlen, time = loadCriteo.loadCriteoBatch_AMTA(
                batchsize=self.batchsize,
                max_input=self.feature_number + 2,
                max_seq_len=self.max_seq_len,
                fin=f_test)
            feed_dict = {
                self.x: data,
                self.y: labels,
                self.time: time,
                self.seqlen: seqlen,
                # self.channel: compaign_data
            }
            fetches = [self.attribution]
            attribution = self.sess.run(fetches, feed_dict)[0]
            for i in range(len(labels)):
                if seqlen[i] == lenth and labels[i] == 1:
                    for j in range(seqlen[i]):
                        # if click_label[i][j] == 1:

                        v = attribution[j][i]
                        attr[j] += v

        print attr
Exemple #2
0
 def train_one_epoch(self):
     num_examples = traindata_size
     print num_examples
     total_loss = []
     total_loss_0 = []
     total_loss_1 = []
     total_loss_2 = []
     pred = []
     label = []
     f_train = open(train_path)
     pbar = tqdm.tqdm(total=traindata_size // self.batchsize)
     for i in range(num_examples // self.batchsize):
         # print("batch {}:".format(i))
         pbar.update(1)
         data, compaign_data, labels, seqlen, time = loadCriteo.loadCriteoBatch_AMTA(
             batchsize=self.batchsize,
             max_input=self.feature_number + 2,
             max_seq_len=self.max_seq_len,
             fin=f_train)
         feed_dict = {
             self.x: data,
             self.y: labels,
             self.time: time,
             self.seqlen: seqlen,
             # self.channel: compaign_data
         }
         # print(data)
         # print(labels)
         # print(time)
         # print(seqlen)
         fetches = [
             self.train_step, self.cross_entropy, self.prediction, self.h,
             self.loss_0, self.loss_1, self.loss_2
         ]
         result = self.sess.run(fetches, feed_dict=feed_dict)
         _, loss, prediction, h, loss_0, loss_1, loss_2 = result
         total_loss.append(loss)
         total_loss_0.append(loss_0)
         total_loss_1.append(loss_1)
         total_loss_2.append(loss_2)
         pred += prediction.tolist()
         label += labels
     # print(loss)
     print("training finished")
     mean_loss = np.mean(total_loss)
     mean_loss_0 = np.mean(total_loss_0)
     mean_loss_1 = np.mean(total_loss_1)
     mean_loss_2 = np.mean(total_loss_2)
     auc = roc_auc_score(label, pred)
     print("{:.4f} {:.4f} {:.4f} {:.4f} {:.4f}".format(
         mean_loss, mean_loss_0, mean_loss_1, mean_loss_2, auc))
     f_train.close()
     f_log.write("{:.4f} {:.4f} {:.4f} {:.4f} {:.4f}\n".format(
         mean_loss, mean_loss_0, mean_loss_1, mean_loss_2, auc))
     return mean_loss, auc, mean_loss_0, mean_loss_1, mean_loss_2
Exemple #3
0
    def test(self, epoch=0):
        num_examples = testdata_size
        print num_examples
        total_loss = []
        total_loss_0 = []
        total_loss_1 = []
        total_loss_2 = []
        pred = []
        label = []
        f_test = open(test_path)
        for i in range(num_examples // self.batchsize):
            data, compaign_data, labels, seqlen, time = loadCriteo.loadCriteoBatch_AMTA(
                batchsize=self.batchsize,
                max_input=self.feature_number + 2,
                max_seq_len=self.max_seq_len,
                fin=f_test)
            feed_dict = {
                self.x: data,
                self.y: labels,
                self.time: time,
                self.seqlen: seqlen,
                # self.channel: compaign_data
            }
            fetches = [
                self.cross_entropy, self.prediction, self.h, self.loss_0,
                self.loss_1, self.loss_2
            ]
            result = self.sess.run(fetches, feed_dict=feed_dict)
            loss, prediction, h, loss_0, loss_1, loss_2 = result
            total_loss.append(loss)
            total_loss_0.append(loss_0)
            total_loss_1.append(loss_1)
            total_loss_2.append(loss_2)
            pred += prediction.tolist()
            label += labels

        print("testing {}-th epoch finished".format(epoch))
        mean_loss = np.mean(total_loss)
        mean_loss_0 = np.mean(total_loss_0)
        mean_loss_1 = np.mean(total_loss_1)
        mean_loss_2 = np.mean(total_loss_2)
        auc = roc_auc_score(label, pred)
        print("{:.4f} {:.4f} {:.4f} {:.4f} {:.4f}".format(
            mean_loss, mean_loss_0, mean_loss_1, mean_loss_2, auc))
        f_log.write("{:.4f} {:.4f} {:.4f} {:.4f} {:.4f}\n".format(
            mean_loss, mean_loss_0, mean_loss_1, mean_loss_2, auc))
        self.log(epoch, [auc], 'test')
        f_test.close()
Exemple #4
0
    def attr(self):
        num_examples = testdata_size

        f_test = open(test_path)
        outfile = open('./attribute_criteo_s1/AMTA.txt', 'w')
        filec = open('index2channel.pkl', 'rb')
        Channel = pkl.load(filec)
        Channel_value = {}
        Channel_time = {}
        for i in range(num_examples // self.batchsize):
            data, compaign_data, labels, seqlen, time = loadCriteo.loadCriteoBatch_AMTA(
                batchsize=self.batchsize,
                max_input=self.feature_number + 2,
                max_seq_len=self.max_seq_len,
                fin=f_test)
            feed_dict = {
                self.x: data,
                self.y: labels,
                self.time: time,
                self.seqlen: seqlen,
                # self.channel: compaign_data
            }
            fetches = [self.attribution]
            attribution = self.sess.run(fetches, feed_dict)[0]
            for i in range(self.batchsize):
                if labels[i] != 0:
                    for j in range(seqlen[i]):
                        # if click_label[i][j] == 1:
                        index = Channel[str(data[i][j][0])]
                        v = attribution[j][i]
                        if index in Channel_value:
                            Channel_value[index] += v
                            Channel_time[index] += 1
                        else:
                            Channel_value[index] = v
                            Channel_time[index] = 1

        for key in Channel_value:
            outfile.write(key + '\t' +
                          str(Channel_value[key] / Channel_time[key]) + '\n')