Exemple #1
0
 def predict_state(state_logit, conv_lens):
     batch_size, max_conv_len, num_asv = state_logit.size()
     mask = ((state_logit == float("-inf")) |
             (state_logit == float("inf")))
     pred = (torch.sigmoid(state_logit.masked_fill(
         mask, 0)).masked_fill(mask, 0)) > 0.5
     pred = utils.to_sparse(pred.view(-1, num_asv))
     return utils.DoublyStacked1DTensor(
         value=pred.value.view(batch_size, max_conv_len, -1),
         lens=conv_lens,
         lens1=pred.lens.view(batch_size, max_conv_len))
Exemple #2
0
def test_turn_state_encoder_decoder():
    dataset = create_dummy_dataset()
    vocabs = list(dataset.vocabs.turn.slot_values.values())
    encoder = GenericStateEncoder(
        vocabs=vocabs,
        output_dim=100,
        label_encoder=functools.partial(
            EmbeddingLabelEncoder
        ),
        label_layer=feedforward.MultiLayerFeedForward,
        label_pooling=pooling.SumPooling,
        state_pooling=pooling.MaxPooling,
        output_layer=feedforward.MultiLayerFeedForward
    )
    decoder = GenericStateDecoder(
        input_dim=100,
        vocabs=vocabs,
        input_layer=feedforward.MultiLayerFeedForward,
        output_layer=feedforward.MultiLayerFeedForward,
        label_emb=EmbeddingLabelEncoder
    )
    encoder.reset_parameters()
    decoder.reset_parameters()
    encoder.train(), decoder.train()
    params = [p for p in encoder.parameters() if p.requires_grad]
    params += [p for p in decoder.parameters() if p.requires_grad]
    optimizer = op.Adam(params)
    bce = nn.BCEWithLogitsLoss(reduction="none")
    vocab_lens = torch.LongTensor(list(map(len, vocabs)))
    x_sparse = torch.randint(0, 2, (4, len(vocab_lens), max(vocab_lens))).byte()
    x_sparse = x_sparse.masked_fill(~utils.mask(vocab_lens), 0)
    x, lens = utils.to_sparse(x_sparse)
    x_sparse = x_sparse.masked_fill(~utils.mask(vocab_lens), -1)
    lens = torch.randint(0, 3, (4, len(encoder.vocabs))) + 1
    for i in range(100):
        logits = decoder(encoder(x, lens))
        loss = bce(logits, x_sparse.float())
        loss = loss.masked_fill(~utils.mask(vocab_lens), 0).sum()
        print(i, loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    encoder.eval(), decoder.eval()
    logits = decoder(encoder(x, lens))
    x_pred = torch.sigmoid(logits) > 0.5
    x_pred = x_pred.masked_fill(~utils.mask(vocab_lens), -1)
    assert (x_pred == x_sparse).all().item()
Exemple #3
0
def train():

    start = time.time()

    from lightfm.evaluation import precision_at_k, recall_at_k, auc_score
    from utils import clean_data, to_sparse, create_user_dict, create_item_dict, fit_mf_model
    from utils import items_to_user, items_to_item, create_item_emdedding_distance_matrix, users_to_item
    print('Modules loaded...')
    training_metrics={}

    data = pd.read_csv(training_data)
    piv, cols, interactions_ = to_sparse(data)
    interactions_.to_csv(interactions, index=True)

    user_dict_ = create_user_dict(interactions=interactions_)
    item_dict_ = create_item_dict(df = data, id_col = 'StockCode', name_col = 'Description')
    
    with open(user_dict, 'w') as json_file:
        json.dump(user_dict_, json_file)
    with open(item_dict, 'w') as json_file:
        json.dump(item_dict_, json_file)
    
    print('Data preparations ready...')
    mf_model = fit_mf_model(interactions = interactions_,
                            n_components = 140,
                            loss = 'warp',
                            epoch = 10,
                            n_jobs = 6)
    print('Model fit...')
    training_metrics["precision_at_3"] = round(precision_at_k(mf_model, piv, k=3).mean()*100)
    training_metrics["recall_at_3"] = round(recall_at_k(mf_model, piv, k=3).mean()*100)
    training_metrics["auc_score"]=round(auc_score(mf_model, piv).mean()*100)

    pickle.dump(mf_model, open(str(model_directory + "/" +"recomender.pkl"), "wb"))
    print('Model trained & serialized in %.1f seconds' % (time.time() - start))
    
    return jsonify(training_metrics)
Exemple #4
0
def e_step(votes_ij, activations_j, mean_j, stdv_j, var_j, spatial_routing_matrix):
  """The e-step in EM routing between input capsules (i) and output capsules (j).
  
  Update the assignment weights using in routung. The output capsules (j) 
  compete for the input capsules (i).
  See Hinton et al. "Matrix Capsules with EM Routing" for detailed description 
  of e-step.
  
  Author:
    Ashley Gritzman 19/10/2018
    
  Args: 
    votes_ij: 
      votes from capsules in layer i to capsules in layer j
      For conv layer:
        (N, OH, OW, kh*kw*i, o, 4x4)
        (64, 6, 6, 9*8, 32, 16)
      For FC layer:
        The kernel dimensions are equal to the spatial dimensions of the input 
        layer i, and the spatial dimensions of the output layer j are 1x1.
        (N, 1, 1, child_space*child_space*i, output_classes, 4x4)
        (64, 1, 1, 4*4*16, 5, 16)
    activations_j: 
      activations of capsules in layer j (L+1)
      (N, OH, OW, 1, o, 1)
      (64, 6, 6, 1, 32, 1)
    mean_j: 
      mean of each channel in capsules of layer j (L+1)
      (N, OH, OW, 1, o, n_channels)
      (24, 6, 6, 1, 32, 16)
    stdv_j: 
      standard deviation of each channel in capsules of layer j (L+1)
      (N, OH, OW, 1, o, n_channels)
      (24, 6, 6, 1, 32, 16)
    var_j: 
      variance of each channel in capsules of layer j (L+1)
      (N, OH, OW, 1, o, n_channels)
      (24, 6, 6, 1, 32, 16)
    spatial_routing_matrix: ???
    
  Returns:
    rr: 
      assignment weights between capsules in layer i and layer j
      (N, OH, OW, kh*kw*i, o, 1)
      (64, 6, 6, 9*8, 16, 1)
  """
  
  with tf.variable_scope("e_step") as scope:
    
    # AG 26/06/2018: changed stdv_j to var_j
    o_p_unit0 = - tf.reduce_sum(
      tf.square(votes_ij - mean_j, name="num") / (2 * var_j), 
      axis=-1, 
      keepdims=True, 
      name="o_p_unit0")
    
    o_p_unit2 = - 0.5 * tf.reduce_sum(
      tf.log(2*np.pi * var_j), 
      axis=-1, 
      keepdims=True, 
      name="o_p_unit2"
    )

    # (24, 6, 6, 288, 32, 1)
    o_p = o_p_unit0 + o_p_unit2
    zz = tf.log(activations_j + FLAGS.epsilon) + o_p
    
    # AG 13/11/2018: New implementation of normalising across parents
    #----- Start -----#
    zz_shape = zz.get_shape().as_list()
    batch_size = zz_shape[0]
    parent_space = zz_shape[1]
    kh_kw_i = zz_shape[3]
    parent_caps = zz_shape[4]
    kk = int(np.sum(spatial_routing_matrix[:,0]))
    child_caps = int(kh_kw_i / kk)
    
    zz = tf.reshape(zz, [batch_size, parent_space, parent_space, kk, 
                         child_caps, parent_caps])
    
    """
    # In un-log space
    with tf.variable_scope("to_sparse_unlog") as scope:
      zz_unlog = tf.exp(zz)
      #zz_sparse_unlog = utl.to_sparse(zz_unlog, spatial_routing_matrix, 
      # sparse_filler=1e-15)
      zz_sparse_unlog = utl.to_sparse(
          zz_unlog, 
          spatial_routing_matrix, 
          sparse_filler=0.0)
      # maybe this value should be even lower 1e-15
      zz_sparse_log = tf.log(zz_sparse_unlog + 1e-15) 
      zz_sparse = zz_sparse_log
    """

    
    # In log space
    with tf.variable_scope("to_sparse_log") as scope:
      # Fill the sparse matrix with the smallest value in zz (at least -100)
      sparse_filler = tf.minimum(tf.reduce_min(zz), -100)
#       sparse_filler = -100
      zz_sparse = utl.to_sparse(
          zz, 
          spatial_routing_matrix, 
          sparse_filler=sparse_filler)
  
    
    with tf.variable_scope("softmax_across_parents") as scope:
      rr_sparse = utl.softmax_across_parents(zz_sparse, spatial_routing_matrix)
    
    with tf.variable_scope("to_dense") as scope:
      rr_dense = utl.to_dense(rr_sparse, spatial_routing_matrix)
      
    rr = tf.reshape(
        rr_dense, 
        [batch_size, parent_space, parent_space, kh_kw_i, parent_caps, 1])
    #----- End -----#

    # AG 02/11/2018
    # In response to a question on OpenReview, Hinton et al. wrote the 
    # following:
    # "The gradient flows through EM algorithm. We do not use stop gradient. A 
    # routing of 3 is like a 3 layer network where the weights of layers are 
    # shared."
    # https://openreview.net/forum?id=HJWLfGWRb&noteId=S1eo2P1I3Q
    
    return rr
Exemple #5
0
def test_dense_sparse():
    x = torch.randint(0, 2, (3, 4, 5)).byte()
    y = utils.to_dense(*utils.to_sparse(x))
    assert (x == y).all()
    if ARG_do_bounce == 'true':
        do_bounce = True
    else:
        do_bounce = False

    if ARG_type == 'seq':
        ratings_str = ','.join([str(o.rating) for o in seq])
        vec_str = ','.join([':'.join(str(s) for s in o.vec) for o in seq])
        ret = []
        ret.append(qid)
        ret.append(str(seq_len))
        ret.append(ratings_str)
        ret.append(','.join([str(o[0]) for o in fb_seq]))
        ret.append(','.join([str(o[1]) for o in fb_seq]))
        ret.append(vec_str)
        print ';'.join(ret)

    elif ARG_type == 'single':
        for index, o in enumerate(seq):
            ret = []
            ret.append(str(fb_seq[index][0]))
            ret.append(qid)
            ret.append(utils.to_sparse(o.vec))
            ret.append('#')
            ret.append('rating=' + str(o.rating))
            print ' '.join(ret)

            if do_bounce and fb_seq[index][1] == 1:
                break
Exemple #7
0
    parser.add_argument('--dropout', type=float, default=0.15)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--infusion', type=str, default='inner')
    parser.add_argument('--dataset', type=str, default='cora')
    parser.add_argument('--sparse', dest='sparse', action='store_true')
    parser.add_argument('--no-sparse', dest='sparse', action='store_false')
    parser.set_defaults(sparse=True)
    args = parser.parse_args()

    # Load data
    adj_1, features, labels, idx_train, idx_val, idx_test = load_data(args.dataset)
    adj_3 = onp.linalg.matrix_power(adj_1, 3)
    adj_5 = onp.linalg.matrix_power(adj_1, 5)

    if args.sparse:
        adj_1 = to_sparse(adj_1) # custom format
        adj_3 = to_sparse(adj_3) # custom format
        adj_5 = to_sparse(adj_5) # custom format
    
    adj = (adj_3, adj_3) # the k-hop adj used in each layer

    rng_key = random.PRNGKey(args.seed)
    dropout = args.dropout
    step_size = args.lr
    hidden = args.hidden
    num_epochs = args.epochs
    n_nodes = features.shape[0]
    n_feats = features.shape[1]
    infusion = args.infusion

    init_fun, predict_fun = GHNet(nhid=hidden, 
Exemple #8
0
if __name__ == "__main__":

    dilation = [1, 1, 1, 1]
    seq_length = [9, 9, 9, 9]

    transform_fp = os.path.join("data", "CoMA", "transform.pkl")
    with open(transform_fp, 'rb') as f:
        tmp = pickle.load(f, encoding='latin1')

    spiral_indices_list = [
        utils.preprocess_spiral(tmp['face'][idx], seq_length[idx],
                                tmp['vertices'][idx], dilation[idx]).to(device)
        for idx in range(len(tmp['face']) - 1)
    ]
    down_transform_list = [
        utils.to_sparse(down_transform).to(device)
        for down_transform in tmp['down_transform']
    ]
    up_transform_list = [
        utils.to_sparse(up_transform).to(device)
        for up_transform in tmp['up_transform']
    ]

    meshdata = MeshData("data/CoMA",
                        "data/CoMA/template/template.obj",
                        split="interpolation",
                        test_exp="bareteeth")

    mean = meshdata.mean
    std = meshdata.std