def forward(self, h: FT) -> Dict[str, FT]: shared_h = nn.functional.leaky_relu(self.linear(h).refine_names( ..., 'shared_repr'), negative_slope=0.1) ret = dict() for name, layer in self.feat_predictors.items(): out = layer(shared_h).refine_names(..., name) if not should_predict_none(name, new_style=g.new_style): f_idx = get_none_index(name) out[:, f_idx] = -999.9 ret[Name(name, 'camel')] = out # Compose probs for complex feature groups if possible. if g.new_style: for e in get_needed_categories(g.feat_groups, new_style=True, breakdown=False): if e.num_groups() > 1: assert e not in ret part_tensors = [ ret[part_enum.get_name()] for part_enum in e.parts() ] parts = list() for i, part_tensor in enumerate(part_tensors): conversion = self.conversion_idx[e.get_name().value][:, i] bs = len(part_tensor) part = part_tensor.rename(None).gather( 1, conversion.rename(None).expand(bs, -1)) parts.append(part) parts = torch.stack(parts, dim=-1) dim_name = e.get_name().value ret[e.get_name()] = parts.sum(dim=-1).refine_names( 'batch', dim_name) for part_cat in e.parts(): del ret[part_cat.get_name()] for name in ret: ret[name] = torch.log_softmax(ret[name], dim=-1) # Deal with conditions for some categories for cat, index in conditions.items(): if should_include(g.feat_groups, cat): # Find out the exact value to be conditioned on. # TODO(j_luo) ugly Category call. condition_e = get_enum_by_cat(Category(index.c_idx)) condition_name = condition_e.__name__ + ('X' if g.new_style else '') cat_name = get_enum_by_cat(cat).__name__ + ('X' if g.new_style else '') condition_name = Name(condition_name, 'camel') cat_name = Name(cat_name, 'camel') condition_log_probs = ret[condition_name][..., index.f_idx] # condition_log_probs.align_as(ret[cat_name]) ret[cat_name] = ret[cat_name] + condition_log_probs.rename( None).unsqueeze(dim=-1) return ret
def __init__(self): super().__init__() saved_dict = torch.load(g.lm_model_path) try: self.load_state_dict(saved_dict['model']) except RuntimeError as e: logging.error(str(e)) # NOTE(j_luo) We have to map normal feature embedidngs to dense feature embeddings. old_weights = saved_dict['model'][ 'encoder.feat_embedding.embed_layer.weight'] for cat in Category: try: emb_param = self.encoder.feat_embedding.embed_layer[cat.name] e = get_enum_by_cat(cat) g_idx = [feat.value.g_idx for feat in e] emb_param.data.copy_(old_weights[g_idx]) except KeyError: pass freeze(self.encoder) freeze(self.predictor) self.adapter = AdaptLayer() if g.use_prior or g.use_moe: noise_hs = 10 noise_dim = 10 self.noise_encoder = self._get_encoder(hidden_size=noise_hs, dim=noise_dim) self.noise_predictor = Predictor(hidden_size=noise_hs) if g.use_moe: self.moe_gate = nn.Linear(noise_hs + g.hidden_size, 2)
def _get_embeddings(self): emb_dict = dict() for cat in Category: if should_include(self.feat_groups, cat): e = get_enum_by_cat(cat) nf = len(e) emb_dict[cat.name] = nn.Parameter(torch.zeros(nf, self.dim)) return nn.ParameterDict(emb_dict)
def __init__(self): super().__init__() param_dict = dict() for cat in Category: if should_include(g.feat_groups, cat): e = get_enum_by_cat(cat) nf = len(e) param = nn.Parameter(torch.zeros(nf, nf)) param_dict[cat.name] = param self.adapters = nn.ParameterDict(param_dict)
def _get_embeddings(self): emb_dict = dict() for cat in Category: if should_include(g.feat_groups, cat): e = get_enum_by_cat(cat) nf = len(e) emb_dict[cat.name] = nn.Parameter(torch.zeros(nf, self.dim)) logging.warning('dense feature embedding init') torch.nn.init.uniform_(emb_dict[cat.name], -0.1, 0.1) return nn.ParameterDict(emb_dict)
def _post_init_helper(self): super()._post_init_helper() names = self.feat_matrix.names bs = self.feat_matrix.size('batch') ml = self.feat_matrix.size('length') fm = self._g2f[self.feat_matrix.rename(None)].refine_names(*names) sfms = dict() for cat in Category: e = get_enum_by_cat(cat) sfm_idx = fm[..., cat.value] sfm = get_zeros(bs, ml, len(e), cpu=True) sfm = sfm.scatter(2, sfm_idx.rename(None).unsqueeze(dim=-1), 1.0) sfms[cat] = sfm.refine_names('batch', 'length', f'{cat.name}_feat') self.dense_feat_matrix = {k: v.cuda() for k, v in sfms.items()}
def convert_to_dense(feat_matrix: LT) -> DenseFeatureMatrix: names = feat_matrix.names bs = feat_matrix.size('batch') ml = feat_matrix.size('length') fm = _g2f[feat_matrix.rename(None)].refine_names(*names) dfms = dict() for cat in Category: e = get_enum_by_cat(cat) dfm_idx = fm[..., cat.value] dfm = get_zeros(bs, ml, len(e), cpu=True) dfm = dfm.scatter(2, dfm_idx.rename(None).unsqueeze(dim=-1), 1.0) dfms[cat] = dfm.refine_names('batch', 'length', f'{cat.name}_feat') if has_gpus(): dfms = {k: v.cuda() for k, v in dfms.items()} return dfms
def predict(self, batch, k=-1) -> Dict[Cat, Tuple[FT, LT, np.ndarray]]: """ Predict the top K results for each feature group. If k == -1, then everything would be sorted and returned, otherwise take the topk. """ ret = dict() distr = self(batch) for cat, log_probs in distr.items(): e = get_enum_by_cat(cat) name = cat.name.lower() max_k = log_probs.size(name) this_k = max_k if k == -1 else min(max_k, k) top_values, top_indices = log_probs.topk(this_k, dim=-1) top_cats = np.asarray([ e.get(i) for i in top_indices.view(-1).cpu().numpy() ]).reshape(*top_indices.shape) ret[name] = (top_values, top_indices, top_cats) return ret
def __init__(self, feat_groups, lm_model_path): super().__init__() saved_dict = torch.load(lm_model_path) try: self.load_state_dict(saved_dict['model']) except RuntimeError as e: logging.error(str(e)) # NOTE(j_luo) We have to map normal feature embedidngs to dense feature embeddings. old_weights = saved_dict['model'][ 'encoder.feat_embedding.embed_layer.weight'] for cat in Category: try: emb_param = self.encoder.feat_embedding.embed_layer[cat.name] e = get_enum_by_cat(cat) g_idx = [feat.value.g_idx for feat in e] emb_param.data.copy_(old_weights[g_idx]) except KeyError: pass freeze(self.encoder) freeze(self.predictor) self.adapter = AdaptLayer(feat_groups)