def __init__(self, vocab_size, embedding_size, init_scale=0.1): # vocab_size定义了这个skipgram这个模型的词表大小 # embedding_size定义了词向量的维度是多少 # init_scale定义了词向量初始化的范围,一般来说,比较小的初始化范围有助于模型训练 super(SkipGram, self).__init__() self.vocab_size = vocab_size self.embedding_size = embedding_size # 使用Embedding函数构造一个词向量参数 # 这个参数的大小为:[self.vocab_size, self.embedding_size] # 数据类型为:float32 # 这个参数的名称为:embedding_para # 这个参数的初始化方式为在[-init_scale, init_scale]区间进行均匀采样 self.embedding = Embedding( num_embeddings=self.vocab_size, embedding_dim=self.embedding_size, weight_attr=paddle.ParamAttr( initializer=paddle.nn.initializer.Uniform( low=-0.5 / embedding_size, high=0.5 / embedding_size))) # 使用Embedding函数构造另外一个词向量参数 # 这个参数的大小为:[self.vocab_size, self.embedding_size] # 这个参数的初始化方式为在[-init_scale, init_scale]区间进行均匀采样 self.embedding_out = Embedding( num_embeddings=self.vocab_size, embedding_dim=self.embedding_size, weight_attr=paddle.ParamAttr( initializer=paddle.nn.initializer.Uniform( low=-0.5 / embedding_size, high=0.5 / embedding_size)))
def __init__(self, hidden_size, vocab_size, num_steps=20, init_scale=0.1, is_sparse=False, dtype="float64"): super(SimpleNet, self).__init__() self.hidden_size = hidden_size self.vocab_size = vocab_size self.init_scale = init_scale self.num_steps = num_steps self.embedding = Embedding( self.vocab_size, self.hidden_size, sparse=True, weight_attr=paddle.ParamAttr( name='embedding_param', initializer=paddle.nn.initializer.Uniform(low=-init_scale, high=init_scale))) self.softmax_weight = self.create_parameter( attr=paddle.ParamAttr(), shape=[self.hidden_size, self.vocab_size], dtype=dtype, default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale)) self.softmax_bias = self.create_parameter( attr=paddle.ParamAttr(), shape=[self.vocab_size], dtype=dtype, default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale))
# you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddle from paddle.nn import Linear, Embedding, Conv2D import numpy as np import paddle.nn.functional as F # 自定义一个用户ID数据 usr_id_data = np.random.randint(0, 6040, (2)).reshape((-1)).astype('int64') print("输入的用户ID是:", usr_id_data) USR_ID_NUM = 6040 + 1 # 定义用户ID的embedding层和fc层 usr_emb = Embedding(num_embeddings=USR_ID_NUM, embedding_dim=32, sparse=False) usr_fc = Linear(in_features=32, out_features=32) usr_id_var = paddle.to_tensor(usr_id_data) usr_id_feat = usr_fc(usr_emb(usr_id_var)) usr_id_feat = F.relu(usr_id_feat) print("用户ID的特征是:", usr_id_feat.numpy(), "\n其形状是:", usr_id_feat.shape)
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddle from paddle.nn import Linear, Embedding, Conv2D import numpy as np import paddle.nn.functional as F # 自定义一个用户年龄数据 usr_age_data = np.array((1, 18)).reshape(-1).astype('int64') print("输入的用户年龄是:", usr_age_data) # 对用户年龄信息做映射,并紧接着一个Linear层 # 年龄的最大ID是56,所以Embedding层size的第一个参数设置为56 + 1 = 57 USR_AGE_DICT_SIZE = 56 + 1 usr_age_emb = Embedding(num_embeddings=USR_AGE_DICT_SIZE, embedding_dim=16) usr_age_fc = Linear(in_features=16, out_features=16) usr_age = paddle.to_tensor(usr_age_data) usr_age_feat = usr_age_emb(usr_age) usr_age_feat = usr_age_fc(usr_age_feat) usr_age_feat = F.relu(usr_age_feat) print("用户年龄特征的数据特征是:", usr_age_feat.numpy(), "\n其形状是:", usr_age_feat.shape) print("\n年龄 1 对应的特征是:", usr_age_feat.numpy()[0, :]) print("年龄 18 对应的特征是:", usr_age_feat.numpy()[1, :])
def __init__(self, use_poster, use_mov_title, use_mov_cat, use_age_job, fc_sizes): super(MovModel, self).__init__() # 将传入的name信息和bool型参数添加到模型类中 self.use_mov_poster = use_poster self.use_mov_title = use_mov_title self.use_usr_age_job = use_age_job self.use_mov_cat = use_mov_cat self.fc_sizes = fc_sizes # 获取数据集的信息,并构建训练和验证集的数据迭代器 Dataset = MovieLen(self.use_mov_poster) self.Dataset = Dataset self.trainset = self.Dataset.train_dataset self.valset = self.Dataset.valid_dataset self.train_loader = self.Dataset.load_data(dataset=self.trainset, mode='train') self.valid_loader = self.Dataset.load_data(dataset=self.valset, mode='valid') """ define network layer for embedding usr info """ # 对电影ID信息做映射,并紧接着一个Linear层 MOV_DICT_SIZE = Dataset.max_mov_id + 1 self.mov_emb = Embedding(num_embeddings=MOV_DICT_SIZE, embedding_dim=32) self.mov_fc = Linear(32, 32) # 对电影类别做映射 CATEGORY_DICT_SIZE = len(Dataset.movie_cat) + 1 self.mov_cat_emb = Embedding(num_embeddings=CATEGORY_DICT_SIZE, embedding_dim=32) self.mov_cat_fc = Linear(32, 32) # 对电影名称做映射 MOV_TITLE_DICT_SIZE = len(Dataset.movie_title) + 1 self.mov_title_emb = Embedding(num_embeddings=MOV_TITLE_DICT_SIZE, embedding_dim=32) self.mov_title_conv = Conv2D(in_channels=1, out_channels=1, kernel_size=(3, 1), stride=(2, 1), padding=0) self.mov_title_conv2 = Conv2D(in_channels=1, out_channels=1, kernel_size=(3, 1), stride=1, padding=0) # 新建一个Linear层,用于整合电影特征 self.mov_concat_embed = Linear(in_features=96, out_features=200) #电影特征和用户特征使用了不同的全连接层,不共享参数 movie_sizes = [200] + self.fc_sizes acts = ["relu" for _ in range(len(self.fc_sizes))] self._movie_layers = [] for i in range(len(self.fc_sizes)): linear = paddle.nn.Linear( in_features=movie_sizes[i], out_features=movie_sizes[i + 1], weight_attr=paddle.ParamAttr( initializer=paddle.nn.initializer.Normal( std=1.0 / math.sqrt(movie_sizes[i])))) self._movie_layers.append(linear) if acts[i] == 'relu': act = paddle.nn.ReLU() self._movie_layers.append(act)
def __init__(self, use_poster, use_mov_title, use_mov_cat, use_age_job,fc_sizes): super(Model, self).__init__() # 将传入的name信息和bool型参数添加到模型类中 self.use_mov_poster = use_poster self.use_mov_title = use_mov_title self.use_usr_age_job = use_age_job self.use_mov_cat = use_mov_cat self.fc_sizes=fc_sizes # 获取数据集的信息,并构建训练和验证集的数据迭代器 Dataset = MovieLen(self.use_mov_poster) self.Dataset = Dataset self.trainset = self.Dataset.train_dataset self.valset = self.Dataset.valid_dataset self.train_loader = self.Dataset.load_data(dataset=self.trainset, mode='train') self.valid_loader = self.Dataset.load_data(dataset=self.valset, mode='valid') usr_embedding_dim=32 gender_embeding_dim=16 age_embedding_dim=16 job_embedding_dim=16 mov_embedding_dim=16 category_embedding_dim=16 title_embedding_dim=32 """ define network layer for embedding usr info """ USR_ID_NUM = Dataset.max_usr_id + 1 # 对用户ID做映射,并紧接着一个Linear层 self.usr_emb = Embedding(num_embeddings=USR_ID_NUM, embedding_dim=usr_embedding_dim, sparse=False) self.usr_fc = Linear(in_features=usr_embedding_dim, out_features=32) # 对用户性别信息做映射,并紧接着一个Linear层 USR_GENDER_DICT_SIZE = 2 self.usr_gender_emb = Embedding(num_embeddings=USR_GENDER_DICT_SIZE, embedding_dim=gender_embeding_dim) self.usr_gender_fc = Linear(in_features=gender_embeding_dim, out_features=16) # 对用户年龄信息做映射,并紧接着一个Linear层 USR_AGE_DICT_SIZE = Dataset.max_usr_age + 1 self.usr_age_emb = Embedding(num_embeddings=USR_AGE_DICT_SIZE, embedding_dim=age_embedding_dim) self.usr_age_fc = Linear(in_features=age_embedding_dim, out_features=16) # 对用户职业信息做映射,并紧接着一个Linear层 USR_JOB_DICT_SIZE = Dataset.max_usr_job + 1 self.usr_job_emb = Embedding(num_embeddings=USR_JOB_DICT_SIZE, embedding_dim=job_embedding_dim) self.usr_job_fc = Linear(in_features=job_embedding_dim, out_features=16) # 新建一个Linear层,用于整合用户数据信息 self.usr_combined = Linear(in_features=80, out_features=200) """ define network layer for embedding usr info """ # 对电影ID信息做映射,并紧接着一个Linear层 MOV_DICT_SIZE = Dataset.max_mov_id + 1 self.mov_emb = Embedding(num_embeddings=MOV_DICT_SIZE, embedding_dim=mov_embedding_dim) self.mov_fc = Linear(in_features=mov_embedding_dim, out_features=32) # 对电影类别做映射 CATEGORY_DICT_SIZE = len(Dataset.movie_cat) + 1 self.mov_cat_emb = Embedding(num_embeddings=CATEGORY_DICT_SIZE, embedding_dim=category_embedding_dim, sparse=False) self.mov_cat_fc = Linear(in_features=category_embedding_dim, out_features=32) # 对电影名称做映射 MOV_TITLE_DICT_SIZE = len(Dataset.movie_title) + 1 self.mov_title_emb = Embedding(num_embeddings=MOV_TITLE_DICT_SIZE, embedding_dim=title_embedding_dim, sparse=False) self.mov_title_conv = Conv2D(in_channels=1, out_channels=1, kernel_size=(3, 1), stride=(2,1), padding=0) self.mov_title_conv2 = Conv2D(in_channels=1, out_channels=1, kernel_size=(3, 1), stride=1, padding=0) # 新建一个Linear层,用于整合电影特征 self.mov_concat_embed = Linear(in_features=96, out_features=200) user_sizes = [200] + self.fc_sizes acts = ["relu" for _ in range(len(self.fc_sizes))] self._user_layers = [] for i in range(len(self.fc_sizes)): linear = paddle.nn.Linear( in_features=user_sizes[i], out_features=user_sizes[i + 1], weight_attr=paddle.ParamAttr( initializer=paddle.nn.initializer.Normal( std=1.0 / math.sqrt(user_sizes[i])))) self.add_sublayer('linear_user_%d' % i, linear) self._user_layers.append(linear) if acts[i] == 'relu': act = paddle.nn.ReLU() self.add_sublayer('user_act_%d' % i, act) self._user_layers.append(act) #电影特征和用户特征使用了不同的全连接层,不共享参数 movie_sizes = [200] + self.fc_sizes acts = ["relu" for _ in range(len(self.fc_sizes))] self._movie_layers = [] for i in range(len(self.fc_sizes)): linear = paddle.nn.Linear( in_features=movie_sizes[i], out_features=movie_sizes[i + 1], weight_attr=paddle.ParamAttr( initializer=paddle.nn.initializer.Normal( std=1.0 / math.sqrt(movie_sizes[i])))) self.add_sublayer('linear_movie_%d' % i, linear) self._movie_layers.append(linear) if acts[i] == 'relu': act = paddle.nn.ReLU() self.add_sublayer('movie_act_%d' % i, act) self._movie_layers.append(act)
import paddle from paddle.nn import Linear, Embedding, Conv2D import numpy as np import paddle.nn.functional as F FC_ID = Linear(in_features=32, out_features=200) FC_JOB = Linear(in_features=16, out_features=200) FC_AGE = Linear(in_features=16, out_features=200) FC_GENDER = Linear(in_features=16, out_features=200) # 自定义一个用户ID数据 usr_id_data = np.random.randint(0, 6040, (2)).reshape((-1)).astype('int64') USR_ID_NUM = 6040 + 1 # 定义用户ID的embedding层和fc层 usr_emb = Embedding(num_embeddings=USR_ID_NUM, embedding_dim=32, sparse=False) usr_fc = Linear(in_features=32, out_features=32) usr_id_var = paddle.to_tensor(usr_id_data) usr_id_feat = usr_fc(usr_emb(usr_id_var)) usr_id_feat = F.relu(usr_id_feat) # 自定义一个用户年龄数据 usr_age_data = np.array((1, 18)).reshape(-1).astype('int64') # 年龄的最大ID是56,所以Embedding层size的第一个参数设置为56 + 1 = 57 USR_AGE_DICT_SIZE = 56 + 1 usr_age_emb = Embedding(num_embeddings=USR_AGE_DICT_SIZE, embedding_dim=16) usr_age_fc = Linear(in_features=16, out_features=16)
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddle from paddle.nn import Linear, Embedding, Conv2D import numpy as np import paddle.nn.functional as F # 自定义一个用户职业数据 usr_job_data = np.array((0, 20)).reshape(-1).astype('int64') print("输入的用户职业是:", usr_job_data) # 对用户职业信息做映射,并紧接着一个Linear层 # 用户职业的最大ID是20,所以Embedding层size的第一个参数设置为20 + 1 = 21 USR_JOB_DICT_SIZE = 20 + 1 usr_job_emb = Embedding(num_embeddings=USR_JOB_DICT_SIZE, embedding_dim=16) usr_job_fc = Linear(in_features=16, out_features=16) usr_job = paddle.to_tensor(usr_job_data) usr_job_feat = usr_job_emb(usr_job) usr_job_feat = usr_job_fc(usr_job_feat) usr_job_feat = F.relu(usr_job_feat) print("用户年龄特征的数据特征是:", usr_job_feat.numpy(), "\n其形状是:", usr_job_feat.shape) print("\n职业 0 对应的特征是:", usr_job_feat.numpy()[0, :]) print("职业 20 对应的特征是:", usr_job_feat.numpy()[1, :])
# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddle from paddle.nn import Linear, Embedding, Conv2D import numpy as np import paddle.nn.functional as F # 自定义一个电影ID数据 mov_id_data = np.array((1, 2)).reshape(-1).astype('int64') # 对电影ID信息做映射,并紧接着一个FC层 MOV_DICT_SIZE = 3952 + 1 mov_emb = Embedding(num_embeddings=MOV_DICT_SIZE, embedding_dim=32) mov_fc = Linear(32, 32) print("输入的电影ID是:", mov_id_data) mov_id_data = paddle.to_tensor(mov_id_data) mov_id_feat = mov_fc(mov_emb(mov_id_data)) mov_id_feat = F.relu(mov_id_feat) print("计算的电影ID的特征是", mov_id_feat.numpy(), "\n其形状是:", mov_id_feat.shape) print("\n电影ID为 {} 计算得到的特征是:{}".format(mov_id_data.numpy()[0], mov_id_feat.numpy()[0])) print("电影ID为 {} 计算得到的特征是:{}".format(mov_id_data.numpy()[1], mov_id_feat.numpy()[1]))
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddle from paddle.nn import Linear, Embedding, Conv2D import numpy as np import paddle.nn.functional as F # 自定义两个电影名称数据 mov_title_data = np.array(((1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (2, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))).reshape(2, 1, 15).astype('int64') # 对电影名称做映射,紧接着FC和pool层 MOV_TITLE_DICT_SIZE = 1000 + 1 mov_title_emb = Embedding(num_embeddings=MOV_TITLE_DICT_SIZE, embedding_dim=32) mov_title_conv = Conv2D(in_channels=1, out_channels=1, kernel_size=(3, 1), stride=(2, 1), padding=0) # 使用 3 * 3卷积层代替全连接层 mov_title_conv2 = Conv2D(in_channels=1, out_channels=1, kernel_size=(3, 1), stride=1, padding=0) mov_title_data = paddle.to_tensor(mov_title_data) print("电影名称数据的输入形状: ", mov_title_data.shape) # 1. 通过Embedding映射电影名称数据;
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddle from paddle.nn import Linear, Embedding, Conv2D import numpy as np import paddle.nn.functional as F # 自定义一个用户性别数据 usr_gender_data = np.array((0, 1)).reshape(-1).astype('int64') print("输入的用户性别是:", usr_gender_data) # 用户的性别用0, 1 表示 # 性别最大ID是1,所以Embedding层size的第一个参数设置为1 + 1 = 2 USR_ID_NUM = 2 # 对用户性别信息做映射,并紧接着一个FC层 USR_GENDER_DICT_SIZE = 2 usr_gender_emb = Embedding(num_embeddings=USR_GENDER_DICT_SIZE, embedding_dim=16) usr_gender_fc = Linear(in_features=16, out_features=16) usr_gender_var = paddle.to_tensor(usr_gender_data) usr_gender_feat = usr_gender_fc(usr_gender_emb(usr_gender_var)) usr_gender_feat = F.relu(usr_gender_feat) print("用户性别特征的数据特征是:", usr_gender_feat.numpy(), "\n其形状是:", usr_gender_feat.shape) print("\n性别 0 对应的特征是:", usr_gender_feat.numpy()[0, :]) print("性别 1 对应的特征是:", usr_gender_feat.numpy()[1, :])
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddle from paddle.nn import Linear, Embedding, Conv2D import numpy as np import paddle.nn.functional as F # 声明用户的最大ID,在此基础上加1(算上数字0) USR_ID_NUM = 6040 + 1 # 声明Embedding 层,将ID映射为32长度的向量 usr_emb = Embedding(num_embeddings=USR_ID_NUM, embedding_dim=32, sparse=False) # 声明输入数据,将其转成tensor arr_1 = np.array([1], dtype="int64").reshape((-1)) print(arr_1) arr_pd1 = paddle.to_tensor(arr_1) print(arr_pd1) # 计算结果 emb_res = usr_emb(arr_pd1) # 打印结果 print("数字 1 的embedding结果是: ", emb_res.numpy(), "\n形状是:", emb_res.shape) # 声明用户的最大ID,在此基础上加1(算上数字0) USR_ID_NUM = 10 # 声明Embedding 层,将ID映射为16长度的向量 usr_emb = Embedding(num_embeddings=USR_ID_NUM, embedding_dim=16, sparse=False) # 定义输入数据,输入数据为不超过10的整数,将其转成tensor