def init_data(self):
        if self.load:
            self.we = tools.load_params(self.we_file_pkl)
            vocab2id = tools.load_params(self.vocab2id_file_pkl)
        else:
            self.we, vocab2id = load_embed_from_text(self.word_embed_file,
                                                     self.word_dim)
            tools.save_params(self.we, self.we_file_pkl)
            tools.save_params(vocab2id, self.vocab2id_file_pkl)
        print("vocab size: %d" % len(vocab2id), "we shape: ", self.we.shape)

        self.train_x, self.train_y, self.train_seq_len = sentence2id_and_pad(
            self.train_file, vocab2id, self.max_seq_len)
        print("train_x: %d " % len(self.train_x),
              "train_y: %d" % len(self.train_y))

        if self.dev_file is not None:
            self.dev_x, self.dev_y, self.dev_seq_len = sentence2id_and_pad(
                self.dev_file, vocab2id, self.max_seq_len)
            print("dev_x: %d " % len(self.dev_x),
                  "dev_y: %d" % len(self.dev_y))

        if self.test_file is not None:
            self.test_x, self.test_y, self.test_seq_len = sentence2id_and_pad(
                self.test_file, vocab2id, self.max_seq_len)
            print("test_x: %d " % len(self.test_x),
                  "test_y: %d" % len(self.test_y))
Esempio n. 2
0
def to2d_pca(fname="samples_vector"):
    x, y = tools.load_params("../log/{}.pkl".format(fname))
    print(x.shape)
    pca = PCA(n_components=2)
    x_2d = pca.fit_transform(x)
    print(x_2d.shape)
    tools.save_params([x_2d, y], "../log/pca_{}_2d.pkl".format(fname))
    return [x_2d, y]
Esempio n. 3
0
def to2d(fname="samples_vector"):
    x, y = tools.load_params("../log/{}.pkl".format(fname))
    print(x.shape)
    tsne = TSNE()
    x_2d = tsne.fit_transform(x)
    print(x_2d.shape)
    tools.save_params([x_2d, y], "../log/{}_2d.pkl".format(fname))
    return [x_2d, y]
@author: dby_freedom
"""
import os
import pickle

import numpy as np
import tensorflow as tf

from tools import load_config, load_params
from model_trainer import train_fn

ProcessedDataDir = './processed_data'

try:
    load_dir = load_params()
except FileNotFoundError:
    train_fn()
    load_dir = load_params()
title_count, title_set, genres2int, features, targets_values, ratings, users, movies, data, movies_orig, users_orig = pickle.load(
    open(ProcessedDataDir + os.sep + 'preprocess.p', mode='rb'))

embed_dim, uid_max, gender_max, age_max, job_max, movie_id_max, movie_categories_max, \
movie_title_max, combiner, sentences_size, window_sizes, filter_num = load_config()

# 电影ID转下标的字典,数据集中电影ID跟下标不一致,比如第5行的数据电影ID不一定是5
movieid2idx = {val[0]: i for i, val in enumerate(movies.values)}


def get_tensors(loaded_graph):
    uid = loaded_graph.get_tensor_by_name("uid:0")