def predict_state(state_logit, conv_lens): batch_size, max_conv_len, num_asv = state_logit.size() mask = ((state_logit == float("-inf")) | (state_logit == float("inf"))) pred = (torch.sigmoid(state_logit.masked_fill( mask, 0)).masked_fill(mask, 0)) > 0.5 pred = utils.to_sparse(pred.view(-1, num_asv)) return utils.DoublyStacked1DTensor( value=pred.value.view(batch_size, max_conv_len, -1), lens=conv_lens, lens1=pred.lens.view(batch_size, max_conv_len))
def test_turn_state_encoder_decoder(): dataset = create_dummy_dataset() vocabs = list(dataset.vocabs.turn.slot_values.values()) encoder = GenericStateEncoder( vocabs=vocabs, output_dim=100, label_encoder=functools.partial( EmbeddingLabelEncoder ), label_layer=feedforward.MultiLayerFeedForward, label_pooling=pooling.SumPooling, state_pooling=pooling.MaxPooling, output_layer=feedforward.MultiLayerFeedForward ) decoder = GenericStateDecoder( input_dim=100, vocabs=vocabs, input_layer=feedforward.MultiLayerFeedForward, output_layer=feedforward.MultiLayerFeedForward, label_emb=EmbeddingLabelEncoder ) encoder.reset_parameters() decoder.reset_parameters() encoder.train(), decoder.train() params = [p for p in encoder.parameters() if p.requires_grad] params += [p for p in decoder.parameters() if p.requires_grad] optimizer = op.Adam(params) bce = nn.BCEWithLogitsLoss(reduction="none") vocab_lens = torch.LongTensor(list(map(len, vocabs))) x_sparse = torch.randint(0, 2, (4, len(vocab_lens), max(vocab_lens))).byte() x_sparse = x_sparse.masked_fill(~utils.mask(vocab_lens), 0) x, lens = utils.to_sparse(x_sparse) x_sparse = x_sparse.masked_fill(~utils.mask(vocab_lens), -1) lens = torch.randint(0, 3, (4, len(encoder.vocabs))) + 1 for i in range(100): logits = decoder(encoder(x, lens)) loss = bce(logits, x_sparse.float()) loss = loss.masked_fill(~utils.mask(vocab_lens), 0).sum() print(i, loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() encoder.eval(), decoder.eval() logits = decoder(encoder(x, lens)) x_pred = torch.sigmoid(logits) > 0.5 x_pred = x_pred.masked_fill(~utils.mask(vocab_lens), -1) assert (x_pred == x_sparse).all().item()
def train(): start = time.time() from lightfm.evaluation import precision_at_k, recall_at_k, auc_score from utils import clean_data, to_sparse, create_user_dict, create_item_dict, fit_mf_model from utils import items_to_user, items_to_item, create_item_emdedding_distance_matrix, users_to_item print('Modules loaded...') training_metrics={} data = pd.read_csv(training_data) piv, cols, interactions_ = to_sparse(data) interactions_.to_csv(interactions, index=True) user_dict_ = create_user_dict(interactions=interactions_) item_dict_ = create_item_dict(df = data, id_col = 'StockCode', name_col = 'Description') with open(user_dict, 'w') as json_file: json.dump(user_dict_, json_file) with open(item_dict, 'w') as json_file: json.dump(item_dict_, json_file) print('Data preparations ready...') mf_model = fit_mf_model(interactions = interactions_, n_components = 140, loss = 'warp', epoch = 10, n_jobs = 6) print('Model fit...') training_metrics["precision_at_3"] = round(precision_at_k(mf_model, piv, k=3).mean()*100) training_metrics["recall_at_3"] = round(recall_at_k(mf_model, piv, k=3).mean()*100) training_metrics["auc_score"]=round(auc_score(mf_model, piv).mean()*100) pickle.dump(mf_model, open(str(model_directory + "/" +"recomender.pkl"), "wb")) print('Model trained & serialized in %.1f seconds' % (time.time() - start)) return jsonify(training_metrics)
def e_step(votes_ij, activations_j, mean_j, stdv_j, var_j, spatial_routing_matrix): """The e-step in EM routing between input capsules (i) and output capsules (j). Update the assignment weights using in routung. The output capsules (j) compete for the input capsules (i). See Hinton et al. "Matrix Capsules with EM Routing" for detailed description of e-step. Author: Ashley Gritzman 19/10/2018 Args: votes_ij: votes from capsules in layer i to capsules in layer j For conv layer: (N, OH, OW, kh*kw*i, o, 4x4) (64, 6, 6, 9*8, 32, 16) For FC layer: The kernel dimensions are equal to the spatial dimensions of the input layer i, and the spatial dimensions of the output layer j are 1x1. (N, 1, 1, child_space*child_space*i, output_classes, 4x4) (64, 1, 1, 4*4*16, 5, 16) activations_j: activations of capsules in layer j (L+1) (N, OH, OW, 1, o, 1) (64, 6, 6, 1, 32, 1) mean_j: mean of each channel in capsules of layer j (L+1) (N, OH, OW, 1, o, n_channels) (24, 6, 6, 1, 32, 16) stdv_j: standard deviation of each channel in capsules of layer j (L+1) (N, OH, OW, 1, o, n_channels) (24, 6, 6, 1, 32, 16) var_j: variance of each channel in capsules of layer j (L+1) (N, OH, OW, 1, o, n_channels) (24, 6, 6, 1, 32, 16) spatial_routing_matrix: ??? Returns: rr: assignment weights between capsules in layer i and layer j (N, OH, OW, kh*kw*i, o, 1) (64, 6, 6, 9*8, 16, 1) """ with tf.variable_scope("e_step") as scope: # AG 26/06/2018: changed stdv_j to var_j o_p_unit0 = - tf.reduce_sum( tf.square(votes_ij - mean_j, name="num") / (2 * var_j), axis=-1, keepdims=True, name="o_p_unit0") o_p_unit2 = - 0.5 * tf.reduce_sum( tf.log(2*np.pi * var_j), axis=-1, keepdims=True, name="o_p_unit2" ) # (24, 6, 6, 288, 32, 1) o_p = o_p_unit0 + o_p_unit2 zz = tf.log(activations_j + FLAGS.epsilon) + o_p # AG 13/11/2018: New implementation of normalising across parents #----- Start -----# zz_shape = zz.get_shape().as_list() batch_size = zz_shape[0] parent_space = zz_shape[1] kh_kw_i = zz_shape[3] parent_caps = zz_shape[4] kk = int(np.sum(spatial_routing_matrix[:,0])) child_caps = int(kh_kw_i / kk) zz = tf.reshape(zz, [batch_size, parent_space, parent_space, kk, child_caps, parent_caps]) """ # In un-log space with tf.variable_scope("to_sparse_unlog") as scope: zz_unlog = tf.exp(zz) #zz_sparse_unlog = utl.to_sparse(zz_unlog, spatial_routing_matrix, # sparse_filler=1e-15) zz_sparse_unlog = utl.to_sparse( zz_unlog, spatial_routing_matrix, sparse_filler=0.0) # maybe this value should be even lower 1e-15 zz_sparse_log = tf.log(zz_sparse_unlog + 1e-15) zz_sparse = zz_sparse_log """ # In log space with tf.variable_scope("to_sparse_log") as scope: # Fill the sparse matrix with the smallest value in zz (at least -100) sparse_filler = tf.minimum(tf.reduce_min(zz), -100) # sparse_filler = -100 zz_sparse = utl.to_sparse( zz, spatial_routing_matrix, sparse_filler=sparse_filler) with tf.variable_scope("softmax_across_parents") as scope: rr_sparse = utl.softmax_across_parents(zz_sparse, spatial_routing_matrix) with tf.variable_scope("to_dense") as scope: rr_dense = utl.to_dense(rr_sparse, spatial_routing_matrix) rr = tf.reshape( rr_dense, [batch_size, parent_space, parent_space, kh_kw_i, parent_caps, 1]) #----- End -----# # AG 02/11/2018 # In response to a question on OpenReview, Hinton et al. wrote the # following: # "The gradient flows through EM algorithm. We do not use stop gradient. A # routing of 3 is like a 3 layer network where the weights of layers are # shared." # https://openreview.net/forum?id=HJWLfGWRb¬eId=S1eo2P1I3Q return rr
def test_dense_sparse(): x = torch.randint(0, 2, (3, 4, 5)).byte() y = utils.to_dense(*utils.to_sparse(x)) assert (x == y).all()
if ARG_do_bounce == 'true': do_bounce = True else: do_bounce = False if ARG_type == 'seq': ratings_str = ','.join([str(o.rating) for o in seq]) vec_str = ','.join([':'.join(str(s) for s in o.vec) for o in seq]) ret = [] ret.append(qid) ret.append(str(seq_len)) ret.append(ratings_str) ret.append(','.join([str(o[0]) for o in fb_seq])) ret.append(','.join([str(o[1]) for o in fb_seq])) ret.append(vec_str) print ';'.join(ret) elif ARG_type == 'single': for index, o in enumerate(seq): ret = [] ret.append(str(fb_seq[index][0])) ret.append(qid) ret.append(utils.to_sparse(o.vec)) ret.append('#') ret.append('rating=' + str(o.rating)) print ' '.join(ret) if do_bounce and fb_seq[index][1] == 1: break
parser.add_argument('--dropout', type=float, default=0.15) parser.add_argument('--lr', type=float, default=0.01) parser.add_argument('--infusion', type=str, default='inner') parser.add_argument('--dataset', type=str, default='cora') parser.add_argument('--sparse', dest='sparse', action='store_true') parser.add_argument('--no-sparse', dest='sparse', action='store_false') parser.set_defaults(sparse=True) args = parser.parse_args() # Load data adj_1, features, labels, idx_train, idx_val, idx_test = load_data(args.dataset) adj_3 = onp.linalg.matrix_power(adj_1, 3) adj_5 = onp.linalg.matrix_power(adj_1, 5) if args.sparse: adj_1 = to_sparse(adj_1) # custom format adj_3 = to_sparse(adj_3) # custom format adj_5 = to_sparse(adj_5) # custom format adj = (adj_3, adj_3) # the k-hop adj used in each layer rng_key = random.PRNGKey(args.seed) dropout = args.dropout step_size = args.lr hidden = args.hidden num_epochs = args.epochs n_nodes = features.shape[0] n_feats = features.shape[1] infusion = args.infusion init_fun, predict_fun = GHNet(nhid=hidden,
if __name__ == "__main__": dilation = [1, 1, 1, 1] seq_length = [9, 9, 9, 9] transform_fp = os.path.join("data", "CoMA", "transform.pkl") with open(transform_fp, 'rb') as f: tmp = pickle.load(f, encoding='latin1') spiral_indices_list = [ utils.preprocess_spiral(tmp['face'][idx], seq_length[idx], tmp['vertices'][idx], dilation[idx]).to(device) for idx in range(len(tmp['face']) - 1) ] down_transform_list = [ utils.to_sparse(down_transform).to(device) for down_transform in tmp['down_transform'] ] up_transform_list = [ utils.to_sparse(up_transform).to(device) for up_transform in tmp['up_transform'] ] meshdata = MeshData("data/CoMA", "data/CoMA/template/template.obj", split="interpolation", test_exp="bareteeth") mean = meshdata.mean std = meshdata.std