Example #1
0
    def join(self):
        arg = pydev.AutoArg()
        test_num = int(arg.option('testnum', -1))
        input_filename = arg.option('f')
        movie_dir = arg.option('m')
        slot_output_filename = arg.option('s')
        output_filename = arg.option('o')
        coder_output_filename = arg.option('c')

        data = utils.readfile(file(input_filename), test_num=test_num)

        extractor = MovieLensRankingFeatureExtractor(movie_dir)
        writer = sf.SlotFileWriter(output_filename)
        for user_id, item_id, click in tqdm.tqdm(data):
            writer.begin_instance(click)

            extractor.begin(user_id, item_id)
            ps = extractor.processes()
            for p in ps:
                slot, lst = p()
                writer.write_slot(slot, lst)

            writer.end_instance()

        extractor.save(coder_output_filename, slot_output_filename)
        writer.summary()
Example #2
0
    def lr(self):
        import train_lr
        model = train_lr.LRRank(138494, 131263, 8)

        auto_arg = pydev.AutoArg()
        model_path = auto_arg.option('model', 'temp/lr.pkl')

        model.load_state_dict(torch.load(model_path))
        model.to(self.device)
        self.test_uid_iid_model(model)
Example #3
0
    def dnn(self):
        import train_dnn
        model = train_dnn.DNNRank(138494, 131263, 16)

        auto_arg = pydev.AutoArg()
        model_path = auto_arg.option('model', 'temp/dnn.pkl')

        model.load_state_dict(torch.load(model_path))
        model.to(self.device)
        self.test_uid_iid_model(model)
Example #4
0
    def test_ins_data(self, model, slot_info):
        autoarg = pydev.AutoArg()
        input_filename = autoarg.option('test')
        batch_size = int(autoarg.option('batch', 20000))
        reader = easy.slot_file.SlotFileReader(input_filename)

        y = []
        y_ = []
        reading_count = 0
        while reader.epoch() < 1:
            labels, slots = reader.next(batch_size)

            # make pytorch data.
            clicks = torch.Tensor(labels).to(self.device)
            dct = {}
            for item in slots:
                for slot, ids in item:
                    if slot not in dct:
                        # id_list, offset
                        dct[slot] = [[], []]

                    lst = dct[slot][0]
                    idx = dct[slot][1]
                    idx.append(len(lst))
                    lst += ids

            x = []
            for slot, _ in slot_info:
                id_list, offset = dct.get(slot, [[], []])
                emb_pair = torch.tensor(id_list).to(
                    self.device), torch.tensor(offset).to(self.device)
                x.append(emb_pair)

            clicks_ = model.forward(x)

            y += clicks.view(-1).tolist()
            y_ += clicks_.view(-1).tolist()

            pydev.log13('reading_count : %d' % reading_count)
            reading_count += 1

        auc = metrics.roc_auc_score(y, y_)
        print
        pydev.log('Valid AUC: %.3f' % auc)
Example #5
0
    def slot_dnn(self):
        import train_slot_dnn
        autoarg = pydev.AutoArg()

        EmbeddingSize = int(autoarg.option('emb', 32))
        slotinfo_filename = autoarg.option('s')
        model_path = autoarg.option('m')

        # temp get slot_info.
        slot_info = []
        for slot, slot_feanum in pydev.foreach_row(file(slotinfo_filename),
                                                   format='si'):
            slot_info.append((slot, slot_feanum))

        model = train_slot_dnn.SlotDnnRank(slot_info,
                                           EmbeddingSize).to(self.device)
        model.load_state_dict(torch.load(model_path))

        self.test_ins_data(model, slot_info)
Example #6
0
            for uid, iid, click  in self.train:
                user_ids.append(uid)
                item_ids.append(iid)
                clicks.append(float(click))

                if len(clicks)>=self.batch_size:
                    yield (torch.tensor(user_ids).to(self.device),
                            torch.tensor(item_ids).to(self.device), 
                            torch.tensor(clicks).to(self.device))

                    user_ids = []
                    item_ids = []
                    clicks = []

if __name__=='__main__':
    autoarg = pydev.AutoArg()
    data_dir = autoarg.option('data', 'data/')
    model_save_path = autoarg.option('output', 'temp/dnn.pkl')

    TestNum = int(autoarg.option('testnum', -1))
    EmbeddingSize = int(autoarg.option('embed', 16))
    EpochCount = int(autoarg.option('epoch', 3))
    BatchSize = int(autoarg.option('batch', 1024))
    device_name = autoarg.option('device', 'cuda')

    pydev.info('EmbeddingSize=%d' % EmbeddingSize)
    pydev.info('Epoch=%d' % EpochCount)
    pydev.info('BatchSize=%d' % BatchSize)

    device = torch.device(device_name)