def build_rnn_model(vocab_size, embed_dim, rnn_size, num_layers, dropout_p, bidirectional): # Build encoder src_embeddings = Embeddings( word_vec_size=embed_dim, word_vocab_size=vocab_size, word_padding_idx=0 ) encoder = RNNEncoder("GRU", bidirectional, num_layers, rnn_size, dropout=dropout_p, embeddings=src_embeddings) tgt_embeddings0 = Embeddings( word_vec_size=embed_dim, word_vocab_size=vocab_size, word_padding_idx=0 ) decoder0 = StdRNNDecoder("GRU", bidirectional, num_layers, rnn_size, dropout=dropout_p, embeddings=tgt_embeddings0) tgt_embeddings1 = Embeddings( word_vec_size=embed_dim, word_vocab_size=vocab_size, word_padding_idx=0 ) tgt_embeddings1 = Embeddings( word_vec_size=embed_dim, word_vocab_size=vocab_size, word_padding_idx=0 ) decoder1 = StdRNNDecoder("GRU", bidirectional, num_layers, rnn_size, dropout=dropout_p, embeddings=tgt_embeddings1) return encoder, decoder0, decoder1
def build_trans_model(vocab_size, embed_dim, model_dim, num_layers, num_heads, ff_size, enc_dropout, dec_dropout): # Build encoders src_embeddings = Embeddings( word_vec_size=model_dim, word_vocab_size=vocab_size, word_padding_idx=0, position_encoding=True ) encoder = TransformerEncoder( num_layers=num_layers, d_model=model_dim, heads=num_heads, d_ff=ff_size, dropout=enc_dropout, embeddings=src_embeddings ) # Build decoders tgt_embeddings0 = Embeddings( word_vec_size=model_dim, word_vocab_size=vocab_size, word_padding_idx=0, position_encoding=True ) decoder0 = TransformerDecoder( num_layers=num_layers, d_model=model_dim, heads=num_heads, d_ff=ff_size, attn_type=None, copy_attn=False, self_attn_type="scaled-dot", dropout=dec_dropout, embeddings=tgt_embeddings0 ) tgt_embeddings1 = Embeddings( word_vec_size=model_dim, word_vocab_size=vocab_size, word_padding_idx=0, position_encoding=True ) decoder1 = TransformerDecoder( num_layers=num_layers, d_model=model_dim, heads=num_heads, d_ff=ff_size, attn_type=None, copy_attn=False, self_attn_type="scaled-dot", dropout=dec_dropout, embeddings=tgt_embeddings1 ) return encoder, decoder0, decoder1
def make_embeddings(opt, word_dict, feature_dicts, for_encoder=True): if for_encoder: embedding_dim = opt.src_word_vec_size else: embedding_dim = opt.tgt_word_vec_size word_padding_idx = word_dict.stoi[onmt.io.PAD_WORD] num_word_embeddings = len(word_dict) feats_padding_idx = [ feat_dict.stoi[onmt.io.PAD_WORD] for feat_dict in feature_dicts ] num_feat_embeddings = [len(feat_dict) for feat_dict in feature_dicts] return Embeddings(word_vec_size=embedding_dim, position_encoding=opt.position_encoding, feat_merge=opt.feat_merge, feat_vec_exponent=opt.feat_vec_exponent, feat_vec_size=opt.feat_vec_size, dropout=opt.dropout, word_padding_idx=word_padding_idx, feat_padding_idx=feats_padding_idx, word_vocab_size=num_word_embeddings, feat_vocab_sizes=num_feat_embeddings, sparse=opt.optim == "sparseadam")
def build_embeddings(opt, text_field, for_encoder=True): """ Args: opt: the option in current environment. text_field(TextMultiField): word and feats field. for_encoder(bool): build Embeddings for encoder or decoder? """ emb_dim = opt.src_word_vec_size if for_encoder else opt.tgt_word_vec_size pad_indices = [f.vocab.stoi[f.pad_token] for _, f in text_field] word_padding_idx, feat_pad_indices = pad_indices[0], pad_indices[1:] num_embs = [len(f.vocab) for _, f in text_field] num_word_embeddings, num_feat_embeddings = num_embs[0], num_embs[1:] fix_word_vecs = opt.fix_word_vecs_enc if for_encoder \ else opt.fix_word_vecs_dec emb = Embeddings(word_vec_size=emb_dim, position_encoding=opt.position_encoding, feat_merge=opt.feat_merge, feat_vec_exponent=opt.feat_vec_exponent, feat_vec_size=opt.feat_vec_size, dropout=opt.dropout, word_padding_idx=word_padding_idx, feat_padding_idx=feat_pad_indices, word_vocab_size=num_word_embeddings, feat_vocab_sizes=num_feat_embeddings, sparse=opt.optim == "sparseadam", fix_word_vecs=fix_word_vecs) return emb
def build_embeddings(opt, word_dict, feature_dicts, for_encoder=True): """ Build an Embeddings instance. Args: opt: the option in current environment. word_dict(Vocab): words dictionary. feature_dicts([Vocab], optional): a list of feature dictionary. for_encoder(bool): build Embeddings for encoder or decoder? """ if for_encoder: embedding_dim = opt.src_word_vec_size else: embedding_dim = opt.tgt_word_vec_size word_padding_idx = word_dict.stoi[inputters.PAD_WORD] num_word_embeddings = len(word_dict) feats_padding_idx = [ feat_dict.stoi[inputters.PAD_WORD] for feat_dict in feature_dicts ] num_feat_embeddings = [len(feat_dict) for feat_dict in feature_dicts] return Embeddings(word_vec_size=embedding_dim, position_encoding=opt.position_encoding, feat_merge=opt.feat_merge, feat_vec_exponent=opt.feat_vec_exponent, feat_vec_size=opt.feat_vec_size, dropout=opt.dropout, word_padding_idx=word_padding_idx, feat_padding_idx=feats_padding_idx, word_vocab_size=num_word_embeddings, feat_vocab_sizes=num_feat_embeddings, sparse=opt.optim == "sparseadam")
def make_embeddings(opt, word_padding_idx, feats_padding_idx, num_word_embeddings, for_encoder, num_feat_embeddings=[]): """ Make an Embeddings instance. Args: opt: command-line options. word_padding_idx(int): padding index for words in the embeddings. feats_padding_idx(int): padding index for a list of features in the embeddings. num_word_embeddings(int): size of dictionary of embedding for words. for_encoder(bool): make Embeddings for Encoder or Decoder? num_feat_embeddings([int]): list of size of dictionary of embedding for each feature. """ if for_encoder: embedding_dim = opt.src_word_vec_size else: embedding_dim = opt.tgt_word_vec_size return Embeddings(embedding_dim, opt.position_encoding, opt.feat_merge, opt.feat_vec_exponent, opt.feat_vec_size, opt.dropout, word_padding_idx, feats_padding_idx, num_word_embeddings, num_feat_embeddings)
def build_embeddings(opt, word_field, feat_fields, for_encoder=True): """ Args: opt: the option in current environment. word_dict(Vocab): words dictionary. feature_dicts([Vocab], optional): a list of feature dictionary. for_encoder(bool): build Embeddings for encoder or decoder? """ emb_dim = opt.src_word_vec_size if for_encoder else opt.tgt_word_vec_size word_padding_idx = word_field.vocab.stoi[word_field.pad_token] num_word_embeddings = len(word_field.vocab) feat_pad_indices = [ff.vocab.stoi[ff.pad_token] for ff in feat_fields] num_feat_embeddings = [len(ff.vocab) for ff in feat_fields] emb = Embeddings(word_vec_size=emb_dim, position_encoding=opt.position_encoding, feat_merge=opt.feat_merge, feat_vec_exponent=opt.feat_vec_exponent, feat_vec_size=opt.feat_vec_size, dropout=opt.dropout, word_padding_idx=word_padding_idx, feat_padding_idx=feat_pad_indices, word_vocab_size=num_word_embeddings, feat_vocab_sizes=num_feat_embeddings, sparse=opt.optim == "sparseadam") return emb
def make_embeddings(opt, word_dict, feature_dicts, for_encoder=True): """ Make an Embeddings instance. Args: opt: the option in current environment. word_dict(Vocab): words dictionary. feature_dicts([Vocab], optional): a list of feature dictionary. for_encoder(bool): make Embeddings for encoder or decoder? """ if for_encoder: embedding_dim = opt.src_word_vec_size else: embedding_dim = opt.tgt_word_vec_size word_padding_idx = word_dict.stoi[onmt.io.PAD_WORD] num_word_embeddings = len(word_dict) feats_padding_idx = [ feat_dict.stoi[onmt.io.PAD_WORD] for feat_dict in feature_dicts ] num_feat_embeddings = [len(feat_dict) for feat_dict in feature_dicts] return Embeddings(embedding_dim, opt.position_encoding, opt.feat_merge, opt.feat_vec_exponent, opt.feat_vec_size, opt.dropout, word_padding_idx, feats_padding_idx, num_word_embeddings, num_feat_embeddings)
def build_embeddings(opt, text_field, for_encoder=True): """ Args: opt: the option in current environment. text_field(TextMultiField): word and feats field. for_encoder(bool): build Embeddings for encoder or decoder? """ if opt.model_type == 'table' and for_encoder: emb_dim = opt.src_word_vec_size if for_encoder else opt.tgt_word_vec_size # value field field = text_field[0][1] word_padding_idx = field.vocab.stoi[field.pad_token] word_vocab_size = len(field.vocab) # pos field field = text_field[1][1] feat_padding_idx = field.vocab.stoi[field.pad_token] feat_vocab_size = len(field.vocab) ent_idx = text_field.base_field.vocab.stoi['<ent>'] return TableEmbeddings(word_vec_size=emb_dim, word_vocab_size=word_vocab_size, word_padding_idx=word_padding_idx, feat_vec_exponent=opt.feat_vec_exponent, feat_vec_size=opt.feat_vec_size, feat_vocab_size=feat_vocab_size, feat_padding_idx=feat_padding_idx, merge=opt.feat_merge, merge_activation=opt.feat_merge_activation, dropout=opt.dropout[0] if type(opt.dropout) is list else opt.dropout, ent_idx=ent_idx) emb_dim = opt.src_word_vec_size if for_encoder else opt.tgt_word_vec_size pad_indices = [f.vocab.stoi[f.pad_token] for _, f in text_field] word_padding_idx, feat_pad_indices = pad_indices[0], pad_indices[1:] num_embs = [len(f.vocab) for _, f in text_field] num_word_embeddings, num_feat_embeddings = num_embs[0], num_embs[1:] fix_word_vecs = opt.fix_word_vecs_enc if for_encoder \ else opt.fix_word_vecs_dec return Embeddings( word_vec_size=emb_dim, position_encoding=opt.position_encoding, feat_merge=opt.feat_merge, feat_vec_exponent=opt.feat_vec_exponent, feat_vec_size=opt.feat_vec_size, dropout=opt.dropout[0] if type(opt.dropout) is list else opt.dropout, word_padding_idx=word_padding_idx, feat_padding_idx=feat_pad_indices, word_vocab_size=num_word_embeddings, feat_vocab_sizes=num_feat_embeddings, sparse=opt.optim == "sparseadam", fix_word_vecs=fix_word_vecs)
def build_embeddings(opt, text_field, for_encoder=True): """ Args: opt: the option in current environment. text_field(TextMultiField): word and feats field. for_encoder(bool): build Embeddings for encoder or decoder? """ emb_dim = opt.src_word_vec_size if for_encoder else opt.tgt_word_vec_size if opt.model_type == "vec" and for_encoder: return VecEmbedding( opt.feat_vec_size, emb_dim, position_encoding=opt.position_encoding, dropout=(opt.dropout[0] if type(opt.dropout) is list else opt.dropout), ) pad_indices = [f.vocab.stoi[f.pad_token] for _, f in text_field] word_padding_idx, feat_pad_indices = pad_indices[0], pad_indices[1:] num_embs = [len(f.vocab) for _, f in text_field] num_word_embeddings, num_feat_embeddings = num_embs[0], num_embs[1:] fix_word_vecs = opt.fix_word_vecs_enc if for_encoder \ else opt.fix_word_vecs_dec # if seg_token_id is None, it indicates that segment_embedding is False. if opt.segment_embedding and for_encoder: seg_token_id = opt.seg_token_id else: seg_token_id = None # wei 20200723 if opt.flat_layers > 0 and for_encoder: flat_layer_flag = opt.flat_layers else: flat_layer_flag = -1 # end wei emb = Embeddings( word_vec_size=emb_dim, position_encoding=opt.position_encoding, seg_token_id=seg_token_id, # wei 20200723 flat_layer_flag=flat_layer_flag, # end wei feat_merge=opt.feat_merge, feat_vec_exponent=opt.feat_vec_exponent, feat_vec_size=opt.feat_vec_size, dropout=opt.dropout[0] if type(opt.dropout) is list else opt.dropout, word_padding_idx=word_padding_idx, feat_padding_idx=feat_pad_indices, word_vocab_size=num_word_embeddings, feat_vocab_sizes=num_feat_embeddings, sparse=opt.optim == "sparseadam", fix_word_vecs=fix_word_vecs) return emb
def get_onmt_transformer(): encoder = TransformerEncoder( num_layers=FLAGS.num_layers, d_model=FLAGS.d_model, heads=FLAGS.n_heads, d_ff=FLAGS.d_ff, dropout=FLAGS.dropout, embeddings=Embeddings( word_vec_size=FLAGS.d_model, word_vocab_size=FLAGS.input_dim, word_padding_idx=FLAGS.src_pad_idx, position_encoding=True, dropout=FLAGS.dropout ), attention_dropout=FLAGS.dropout, max_relative_positions=0, ) decoder = TransformerDecoder( num_layers=FLAGS.num_layers, d_model=FLAGS.d_model, heads=FLAGS.n_heads, d_ff=FLAGS.d_ff, copy_attn=False, self_attn_type='scaled-dot', dropout=FLAGS.dropout, embeddings=Embeddings( word_vec_size=FLAGS.d_model, word_vocab_size=FLAGS.output_dim, word_padding_idx=FLAGS.trg_pad_idx, position_encoding=True, dropout=FLAGS.dropout ), aan_useffn=False, alignment_heads=0, alignment_layer=0, full_context_alignment=False, attention_dropout=FLAGS.dropout, max_relative_positions=0, ) return NMTModel(encoder, decoder)
def build_embeddings(opt, text_field, for_encoder=True): """ Args: opt: the option in current environment. text_field(TextMultiField): word and feats field. for_encoder(bool): build Embeddings for encoder or decoder? """ if opt.is_bert: token_fields_vocab = text_field.base_field.vocab vocab_size = len(token_fields_vocab) emb_dim = opt.word_vec_size return BertEmbeddings( vocab_size, emb_dim, dropout=(opt.dropout[0] if type(opt.dropout) is list else opt.dropout)) emb_dim = opt.src_word_vec_size if for_encoder else opt.tgt_word_vec_size if opt.model_type == "vec" and for_encoder: return VecEmbedding( opt.feat_vec_size, emb_dim, position_encoding=opt.position_encoding, dropout=(opt.dropout[0] if type(opt.dropout) is list else opt.dropout), ) pad_indices = [f.vocab.stoi[f.pad_token] for _, f in text_field] word_padding_idx, feat_pad_indices = pad_indices[0], pad_indices[1:] num_embs = [len(f.vocab) for _, f in text_field] num_word_embeddings, num_feat_embeddings = num_embs[0], num_embs[1:] fix_word_vecs = opt.fix_word_vecs_enc if for_encoder \ else opt.fix_word_vecs_dec emb = Embeddings( word_vec_size=emb_dim, position_encoding=opt.position_encoding, feat_merge=opt.feat_merge, feat_vec_exponent=opt.feat_vec_exponent, feat_vec_size=opt.feat_vec_size, dropout=opt.dropout[0] if type(opt.dropout) is list else opt.dropout, word_padding_idx=word_padding_idx, feat_padding_idx=feat_pad_indices, word_vocab_size=num_word_embeddings, feat_vocab_sizes=num_feat_embeddings, sparse=opt.optim == "sparseadam", fix_word_vecs=fix_word_vecs) return emb
def build_embeddings(vocab, embed_dim): """ Build an Embeddings instance. Args: opt: the option in current environment. vocab(Vocab): words dictionary. """ embedding_dim = embed_dim word_padding_idx = vocab.pad_idx num_word_embeddings = len(vocab) return Embeddings(word_vec_size=embedding_dim, word_padding_idx=word_padding_idx, word_vocab_size=num_word_embeddings)
def build_embeddings(opt, text_field, for_encoder=True): """ Args: opt: the option in current environment. text_field(TextMultiField): word and feats field. for_encoder(bool): build Embeddings for encoder or decoder? """ emb_dim = opt.src_word_vec_size if for_encoder else opt.tgt_word_vec_size if opt.model_type == "vec" and for_encoder: return VecEmbedding( opt.feat_vec_size, emb_dim, position_encoding=opt.position_encoding, dropout=(opt.dropout[0] if type(opt.dropout) is list else opt.dropout), ) pad_indices = [f.vocab.stoi[f.pad_token] for _, f in text_field] word_padding_idx, feat_pad_indices = pad_indices[0], pad_indices[1:] num_embs = [len(f.vocab) for _, f in text_field] num_word_embeddings, num_feat_embeddings = num_embs[0], num_embs[1:] fix_word_vecs = opt.fix_word_vecs_enc if for_encoder \ else opt.fix_word_vecs_dec conmt = False out_vec_size = None if ("continuous" in opt.generator_function) and not for_encoder: out_vec_size = text_field.base_field.vocab.vectors.size(1) conmt = True emb = Embeddings( word_vec_size=emb_dim, position_encoding=opt.position_encoding, feat_merge=opt.feat_merge, feat_vec_exponent=opt.feat_vec_exponent, feat_vec_size=opt.feat_vec_size, dropout=opt.dropout[0] if type(opt.dropout) is list else opt.dropout, word_padding_idx=word_padding_idx, feat_padding_idx=feat_pad_indices, word_vocab_size=num_word_embeddings, feat_vocab_sizes=num_feat_embeddings, sparse=opt.optim == "sparseadam", fix_word_vecs=fix_word_vecs, tie_embeddings=opt.share_decoder_embeddings and conmt, out_vec_size=out_vec_size) return emb
def make_embeddings(opt, word_dict, feature_dicts, for_encoder=True, for_inference_network=False): """ Make an Embeddings instance. Args: opt: the option in current environment. word_dict(Vocab): words dictionary. feature_dicts([Vocab], optional): a list of feature dictionary. for_encoder(bool): make Embeddings for encoder or decoder? """ if for_encoder: if not for_inference_network: embedding_dim = opt.src_word_vec_size else: embedding_dim = opt.inference_network_src_word_vec_size else: if not for_inference_network: embedding_dim = opt.tgt_word_vec_size else: embedding_dim = opt.inference_network_tgt_word_vec_size word_padding_idx = word_dict.stoi[onmt.io.PAD_WORD] num_word_embeddings = len(word_dict) feats_padding_idx = [ feat_dict.stoi[onmt.io.PAD_WORD] for feat_dict in feature_dicts ] num_feat_embeddings = [len(feat_dict) for feat_dict in feature_dicts] if not for_inference_network: dropout = opt.dropout else: dropout = opt.inference_network_dropout return Embeddings(word_vec_size=embedding_dim, position_encoding=opt.position_encoding, feat_merge=opt.feat_merge, feat_vec_exponent=opt.feat_vec_exponent, feat_vec_size=opt.feat_vec_size, dropout=dropout, word_padding_idx=word_padding_idx, feat_padding_idx=feats_padding_idx, word_vocab_size=num_word_embeddings, feat_vocab_sizes=num_feat_embeddings, sparse=opt.optim == "sparseadam")
def build_embeddings(opt, word_dict, feature_dicts, for_encoder=True): """ Build an Embeddings instance. Args: opt: the option in current environment. word_dict(Vocab): words dictionary. feature_dicts([Vocab], optional): a list of feature dictionary. for_encoder(bool): build Embeddings for encoder or decoder? """ if for_encoder: embedding_dim = opt.src_word_vec_size else: embedding_dim = opt.tgt_word_vec_size word_padding_idx = word_dict.stoi[inputters.PAD_WORD] num_word_embeddings = len(word_dict) feats_padding_idx = [feat_dict.stoi[inputters.PAD_WORD] for feat_dict in feature_dicts] # print('feats_padding_idx', feats_padding_idx) #latt if opt.feat_merge == 'latt' and len(feature_dicts) > 0: # for feat_dict in feature_dicts: # print('feat_dict', len(feat_dict)) # print('old_fdict_len', len(feature_dicts)) #test feature_dicts = inputters.merge_vocabs(feature_dicts) # print('new_fdict_len', len(feature_dicts)) #test num_feat_embeddings = [len(feature_dicts)] # print('num_feat_embeddings', num_feat_embeddings) # print('new_fdict_len', len(feature_dicts)) #latt else: # print('ac num_feat_embed') num_feat_embeddings = [len(feat_dict) for feat_dict in feature_dicts] return Embeddings(word_vec_size=embedding_dim, position_encoding=opt.position_encoding, feat_merge=opt.feat_merge, feat_vec_exponent=opt.feat_vec_exponent, feat_vec_size=opt.feat_vec_size, dropout=opt.dropout, word_padding_idx=word_padding_idx, feat_padding_idx=feats_padding_idx, word_vocab_size=num_word_embeddings, feat_vocab_sizes=num_feat_embeddings, sparse=opt.optim == "sparseadam")
def make_embeddings(opt, word_dict, for_encoder=True): """ Make an Embeddings instance. Args: opt: the option in current environment. word_dict(Vocabulary): words dictionary. for_encoder(bool): make Embeddings for encoder or decoder? """ embedding_dim = opt.word_vec_size word_padding_idx = word_dict.to_ind(markers.PAD) num_word_embeddings = len(word_dict) return Embeddings(word_vec_size=embedding_dim, position_encoding=False, dropout=opt.dropout, word_padding_idx=word_padding_idx, word_vocab_size=num_word_embeddings)
def build_embeddings(opt, text_field, for_encoder=True): """ Args: opt: the option in current environment. text_field(TextMultiField): word and feats field. for_encoder(bool): build Embeddings for encoder or decoder? """ emb_dim = opt.src_word_vec_size if for_encoder else opt.tgt_word_vec_size pad_indices = [f.vocab.stoi[f.pad_token] for _, f in text_field] word_padding_idx, feat_pad_indices = pad_indices[0], pad_indices[1:] num_embs = [len(f.vocab) for _, f in text_field] num_word_embeddings, num_feat_embeddings = num_embs[0], num_embs[1:] fix_word_vecs = opt.fix_word_vecs_enc if for_encoder \ else opt.fix_word_vecs_dec if for_encoder: embedding_dim = opt.src_word_vec_size else: embedding_dim = opt.tgt_word_vec_size emb = Embeddings(word_vec_size=embedding_dim, position_encoding=opt.position_encoding, feat_merge=opt.feat_merge, feat_vec_exponent=opt.feat_vec_exponent, feat_vec_size=opt.feat_vec_size, dropout=opt.dropout, word_padding_idx=word_padding_idx, feat_padding_idx=feat_pad_indices, word_vocab_size=num_word_embeddings, feat_vocab_sizes=num_feat_embeddings, sparse=opt.optim == "sparseadam", emb_type=opt.emb_type if for_encoder else None, gcn_vec_size=opt.gcn_vec_size, gcn_dropout=opt.gcn_dropout, gcn_edge_dropout=opt.gcn_edge_dropout, n_gcn_layers=opt.n_gcn_layers, activation=opt.activation, highway=opt.highway, treelstm_vec_size=opt.treelstm_vec_size, geometric_layer=opt.geometric_layer) return emb
def build_embeddings(opt, word_dict, feature_dicts, side='src_sku'): """ Build an Embeddings instance. Args: opt: the option in current environment. word_dict(Vocab): words dictionary. feature_dicts([Vocab], optional): a list of feature dictionary. side for_encoder(bool): build Embeddings for encoder or decoder? """ if side == 'src_sku': embedding_dim = opt.src_word_vec_size elif side == 'tgt': embedding_dim = opt.tgt_word_vec_size elif side == 'loc': embedding_dim = 900 elif side == 'name' or side == 'comment': embedding_dim = 2000 elif side == 'both': embedding_dim = 4000 else: raise ValueError word_padding_idx = word_dict.stoi[inputters.PAD_WORD] num_word_embeddings = len(word_dict) feats_padding_idx = [ feat_dict.stoi[inputters.PAD_WORD] for feat_dict in feature_dicts ] num_feat_embeddings = [len(feat_dict) for feat_dict in feature_dicts] return Embeddings(word_vec_size=embedding_dim, position_encoding=opt.position_encoding, feat_merge=opt.feat_merge, feat_vec_exponent=opt.feat_vec_exponent, feat_vec_size=opt.feat_vec_size, dropout=opt.dropout, word_padding_idx=word_padding_idx, feat_padding_idx=feats_padding_idx, word_vocab_size=num_word_embeddings, feat_vocab_sizes=num_feat_embeddings, sparse=opt.optim_e == "sparseadam")
def build_embeddings(opt, text_field, for_encoder=True): """ Args: opt: the option in current environment. text_field(TextMultiField): word and feats field. for_encoder(bool): build Embeddings for encoder or decoder? """ emb_dim = opt.src_word_vec_size if for_encoder else opt.tgt_word_vec_size pad_indices = [f.vocab.stoi[f.pad_token] for _, f in text_field] word_padding_idx, feat_pad_indices = pad_indices[0], pad_indices[1:] num_embs = [len(f.vocab) for _, f in text_field] num_word_embeddings, num_feat_embeddings = num_embs[0], num_embs[1:] fix_word_vecs = opt.fix_word_vecs_enc if for_encoder \ else opt.fix_word_vecs_dec pos_enc_learned = opt.position_encoding_learned_enc if for_encoder else opt.position_encoding_learned_dec GPT_representation_mode = opt.GPT_representation_mode if opt.GPT_representation_loc == 'both' or ( opt.GPT_representation_loc == 'src' and for_encoder ) or (opt.GPT_representation_loc == 'tgt' and not for_encoder) else 'none' emb = Embeddings(word_vec_size=emb_dim, position_encoding=opt.position_encoding, position_encoding_learned=pos_enc_learned, position_encoding_ctxsize=opt.position_encoding_ctxsize, feat_merge=opt.feat_merge, feat_vec_exponent=opt.feat_vec_exponent, feat_vec_size=opt.feat_vec_size, dropout=opt.dropout, word_padding_idx=word_padding_idx, feat_padding_idx=feat_pad_indices, word_vocab_size=num_word_embeddings, feat_vocab_sizes=num_feat_embeddings, sparse=opt.optim == "sparseadam", fix_word_vecs=fix_word_vecs, GPT_representation_mode=GPT_representation_mode, GPT_representation_tgt=not for_encoder) return emb
def make_embeddings(opt, word_dict, feature_dicts, for_encoder=True): """ Make an Embeddings instance. Args: opt: the option in current environment. word_dict(Vocab): words dictionary. feature_dicts([Vocab], optional): a list of feature dictionary. for_encoder(bool): make Embeddings for encoder or decoder? """ try: if not for_encoder and opt.linked_embeddings is not None: return make_linked_embeddings(opt, word_dict, feature_dicts, for_encoder) except AttributeError: pass if for_encoder: embedding_dim = opt.src_word_vec_size else: embedding_dim = opt.tgt_word_vec_size word_padding_idx = word_dict.stoi[onmt.io.PAD_WORD] num_word_embeddings = len(word_dict) feats_padding_idx = [ feat_dict.stoi[onmt.io.PAD_WORD] for feat_dict in feature_dicts ] num_feat_embeddings = [len(feat_dict) for feat_dict in feature_dicts] return Embeddings(word_vec_size=embedding_dim, position_encoding=opt.position_encoding, feat_merge=opt.feat_merge, feat_vec_exponent=opt.feat_vec_exponent, feat_vec_size=opt.feat_vec_size, dropout=opt.dropout, word_padding_idx=word_padding_idx, feat_padding_idx=feats_padding_idx, word_vocab_size=num_word_embeddings, feat_vocab_sizes=num_feat_embeddings, sparse=opt.optim == "sparseadam")
def make_embeddings(opt, word_dict, for_encoder=True): """ Make an Embeddings instance. Args: opt: the option in current environment. word_dict(Vocab): words dictionary. for_encoder(bool): make Embeddings for encoder or decoder? """ if for_encoder: embedding_dim = opt.src_word_vec_size else: embedding_dim = opt.tgt_word_vec_size word_padding_idx = word_dict.stoi['pad'] num_word_embeddings = len(word_dict) print("Making emebddings for vocabulary of size %d" % num_word_embeddings) return Embeddings(word_vec_size=embedding_dim, dropout=opt.dropout, word_padding_idx=word_padding_idx, word_vocab_size=num_word_embeddings)
def make_embeddings(opt, word_dict, feature_dicts, for_encoder=True): """ Make an Embeddings instance. Args: opt: the option in current environment. word_dict(Vocab): words dictionary. feature_dicts([Vocab], optional): a list of feature dictionary. for_encoder(bool): make Embeddings for encoder or decoder? """ print(opt.pre_word_vecs_enc) if for_encoder: embedding_dim = opt.src_word_vec_size if opt.pre_word_vecs_enc is not None: embedding_dim = 300 else: embedding_dim = opt.tgt_word_vec_size if opt.pre_word_vecs_dec is not None: embedding_dim = 300 word_padding_idx = word_dict.stoi[onmt.io.PAD_WORD] num_word_embeddings = len(word_dict) feats_padding_idx = [ feat_dict.stoi[onmt.io.PAD_WORD] for feat_dict in feature_dicts ] num_feat_embeddings = [len(feat_dict) for feat_dict in feature_dicts] return Embeddings(word_vec_size=embedding_dim, position_encoding=opt.position_encoding, feat_merge=opt.feat_merge, feat_vec_exponent=opt.feat_vec_exponent, feat_vec_size=opt.feat_vec_size, dropout=opt.dropout, word_padding_idx=word_padding_idx, feat_padding_idx=feats_padding_idx, word_vocab_size=num_word_embeddings, feat_vocab_sizes=num_feat_embeddings)
def main(): # global args args = parser.parse_args() writer = SummaryWriter() # <editor-fold desc="Initialization"> now = datetime.datetime.now() current_date = now.strftime("%m-%d-%H-%M") assert args.text_criterion in ("MSE", "Cosine", "Hinge", "NLLLoss"), 'Invalid Loss Function' assert args.cm_criterion in ("MSE", "Cosine", "Hinge"), 'Invalid Loss Function' mask = args.common_emb_size assert mask <= args.hidden_size cuda = args.cuda if cuda == 'true': cuda = True else: cuda = False model_path = args.model_path + current_date + args.comment + "/" result_path = args.result_path if result_path == "NONE": result_path = model_path + "results/" if not os.path.exists(result_path): os.makedirs(result_path) if not os.path.exists(model_path): os.makedirs(model_path) #</editor-fold> # <editor-fold desc="Image Preprocessing"> # Image preprocessing //ATTENTION # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) #</editor-fold> # <editor-fold desc="Creating Embeddings"> # Load vocabulary wrapper. print("Loading Vocabulary...") with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Load Embeddings emb_size = args.embedding_size emb_path = args.embedding_path if args.embedding_path[-1] == '/': emb_path += 'glove.6B.' + str(emb_size) + 'd.txt' print("Loading Embeddings...") emb = load_glove_embeddings(emb_path, vocab.word2idx, emb_size) glove_emb = Embeddings(emb_size, len(vocab.word2idx), vocab.word2idx["<pad>"]) glove_emb.word_lut.weight.data.copy_(emb) glove_emb.word_lut.weight.requires_grad = False # glove_emb = nn.Embedding(emb.size(0), emb.size(1)) # glove_emb = embedding(emb.size(0), emb.size(1)) # glove_emb.weight = nn.Parameter(emb) # Freeze weighs # if args.fixed_embeddings == "true": # glove_emb.weight.requires_grad = False # </editor-fold> # <editor-fold desc="Data-Loaders"> # Build data loader print("Building Data Loader For Test Set...") data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) print("Building Data Loader For Validation Set...") val_loader = get_loader(args.valid_dir, args.valid_caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # </editor-fold> # <editor-fold desc="Network Initialization"> print("Setting up the Networks...") encoder_Txt = TextEncoder(glove_emb, num_layers=1, bidirectional=False, hidden_size=args.hidden_size) decoder_Txt = TextDecoder(glove_emb, num_layers=1, bidirectional=False, hidden_size=args.hidden_size) # decoder_Txt = TextDecoder(encoder_Txt, glove_emb) # decoder_Txt = DecoderRNN(glove_emb, hidden_size=args.hidden_size) encoder_Img = ImageEncoder(img_dimension=args.crop_size, feature_dimension=args.hidden_size) decoder_Img = ImageDecoder(img_dimension=args.crop_size, feature_dimension=args.hidden_size) if cuda: encoder_Txt = encoder_Txt.cuda() decoder_Img = decoder_Img.cuda() encoder_Img = encoder_Img.cuda() decoder_Txt = decoder_Txt.cuda() # </editor-fold> # <editor-fold desc="Losses"> # Losses and Optimizers print("Setting up the Objective Functions...") img_criterion = nn.MSELoss() # txt_criterion = nn.MSELoss(size_average=True) if args.text_criterion == 'MSE': txt_criterion = nn.MSELoss() elif args.text_criterion == "Cosine": txt_criterion = nn.CosineEmbeddingLoss(size_average=False) else: txt_criterion = nn.HingeEmbeddingLoss(size_average=False) if args.cm_criterion == 'MSE': cm_criterion = nn.MSELoss() elif args.cm_criterion == "Cosine": cm_criterion = nn.CosineEmbeddingLoss() else: cm_criterion = nn.HingeEmbeddingLoss() if cuda: img_criterion = img_criterion.cuda() txt_criterion = txt_criterion.cuda() cm_criterion = cm_criterion.cuda() # txt_criterion = nn.CrossEntropyLoss() # </editor-fold> # <editor-fold desc="Optimizers"> # gen_params = chain(generator_A.parameters(), generator_B.parameters()) print("Setting up the Optimizers...") # img_params = chain(decoder_Img.parameters(), encoder_Img.parameters()) # txt_params = chain(decoder_Txt.decoder.parameters(), encoder_Txt.encoder.parameters()) # img_params = list(decoder_Img.parameters()) + list(encoder_Img.parameters()) # txt_params = list(decoder_Txt.decoder.parameters()) + list(encoder_Txt.encoder.parameters()) # ATTENTION: Check betas and weight decay # ATTENTION: Check why valid_params fails on image networks with out of memory error # img_optim = optim.Adam(img_params, lr=0.0001, betas=(0.5, 0.999), weight_decay=0.00001) # txt_optim = optim.Adam(valid_params(txt_params), lr=0.0001,betas=(0.5, 0.999), weight_decay=0.00001) img_enc_optim = optim.Adam( encoder_Img.parameters(), lr=args.learning_rate) #betas=(0.5, 0.999), weight_decay=0.00001) img_dec_optim = optim.Adam( decoder_Img.parameters(), lr=args.learning_rate) #betas=(0.5,0.999), weight_decay=0.00001) txt_enc_optim = optim.Adam( valid_params(encoder_Txt.encoder.parameters()), lr=args.learning_rate) #betas=(0.5,0.999), weight_decay=0.00001) txt_dec_optim = optim.Adam( valid_params(decoder_Txt.decoder.parameters()), lr=args.learning_rate) #betas=(0.5,0.999), weight_decay=0.00001) # </editor-fold desc="Optimizers"> train_images = False # Reverse 2 for epoch in range(args.num_epochs): # <editor-fold desc = "Epoch Initialization"? # TRAINING TIME print('EPOCH ::: TRAINING ::: ' + str(epoch + 1)) batch_time = AverageMeter() txt_losses = AverageMeter() img_losses = AverageMeter() cm_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=len(data_loader)) # Set training mode encoder_Img.train() decoder_Img.train() encoder_Txt.encoder.train() decoder_Txt.decoder.train() neg_rate = max(0, 2 * (10 - epoch) / 10) # </editor-fold desc = "Epoch Initialization"? train_images = not train_images for i, (images, captions, lengths) in enumerate(data_loader): # ATTENTION REMOVE if i == len(data_loader) - 1: break # <editor-fold desc = "Training Parameters Initiliazation"? # Set mini-batch dataset images = to_var(images) captions = to_var(captions) # target = pack_padded_sequence(captions, lengths, batch_first=True)[0] # captions, lengths = pad_sequences(captions, lengths) # images = torch.FloatTensor(images) captions = captions.transpose(0, 1).unsqueeze(2) lengths = torch.LongTensor(lengths) # print(captions.size()) # Forward, Backward and Optimize # img_optim.zero_grad() img_dec_optim.zero_grad() img_enc_optim.zero_grad() # encoder_Img.zero_grad() # decoder_Img.zero_grad() # txt_params.zero_grad() txt_dec_optim.zero_grad() txt_enc_optim.zero_grad() # encoder_Txt.encoder.zero_grad() # decoder_Txt.decoder.zero_grad() # </editor-fold desc = "Training Parameters Initiliazation"? # <editor-fold desc = "Image AE"? # Image Auto_Encoder Forward img_encoder_outputs, Iz = encoder_Img(images) IzI = decoder_Img(img_encoder_outputs) img_rc_loss = img_criterion(IzI, images) # </editor-fold desc = "Image AE"? # <editor-fold desc = "Seq2Seq AE"? # Text Auto Encoder Forward # target = target[:-1] # exclude last target from inputs captions = captions[:-1, :, :] lengths = lengths - 1 dec_state = None encoder_outputs, memory_bank = encoder_Txt(captions, lengths) enc_state = \ decoder_Txt.decoder.init_decoder_state(captions, memory_bank, encoder_outputs) decoder_outputs, dec_state, attns = \ decoder_Txt.decoder(captions, memory_bank, enc_state if dec_state is None else dec_state, memory_lengths=lengths) Tz = encoder_outputs TzT = decoder_outputs # </editor-fold desc = "Seq2Seq AE"? # <editor-fold desc = "Loss accumulation"? if args.text_criterion == 'MSE': txt_rc_loss = txt_criterion(TzT, glove_emb(captions)) else: txt_rc_loss = txt_criterion(TzT, glove_emb(captions),\ Variable(torch.ones(TzT.size(0,1))).cuda()) # # for x,y,l in zip(TzT.transpose(0,1),glove_emb(captions).transpose(0,1),lengths): # if args.criterion == 'MSE': # # ATTENTION dunno what's the right one # txt_rc_loss += txt_criterion(x,y) # else: # # ATTENTION Fails on last batch # txt_rc_loss += txt_criterion(x, y, Variable(torch.ones(x.size(0))).cuda())/l # # txt_rc_loss /= captions.size(1) # Computes Cross-Modal Loss Tz = Tz[0] txt = Tz.narrow(1, 0, mask) im = Iz.narrow(1, 0, mask) if args.cm_criterion == 'MSE': # cm_loss = cm_criterion(Tz.narrow(1,0,mask), Iz.narrow(1,0,mask)) cm_loss = mse_loss(txt, im) else: cm_loss = cm_criterion(txt, im, \ Variable(torch.ones(im.size(0)).cuda())) # K - Negative Samples k = args.negative_samples for _ in range(k): if cuda: perm = torch.randperm(args.batch_size).cuda() else: perm = torch.randperm(args.batch_size) # if args.criterion == 'MSE': # cm_loss -= mse_loss(txt, im[perm])/k # else: # cm_loss -= cm_criterion(txt, im[perm], \ # Variable(torch.ones(Tz.narrow(1,0,mask).size(0)).cuda()))/k # sim = (F.cosine_similarity(txt,txt[perm]) - 0.5)/2 if args.cm_criterion == 'MSE': sim = (F.cosine_similarity(txt, txt[perm]) - 1) / (2 * k) # cm_loss = cm_criterion(Tz.narrow(1,0,mask), Iz.narrow(1,0,mask)) cm_loss += mse_loss(txt, im[perm], sim) else: cm_loss += neg_rate * cm_criterion(txt, im[perm], \ Variable(-1*torch.ones(txt.size(0)).cuda()))/k # cm_loss = Variable(torch.max(torch.FloatTensor([-0.100]).cuda(), cm_loss.data)) # Computes the loss to be back-propagated img_loss = img_rc_loss * ( 1 - args.cm_loss_weight) + cm_loss * args.cm_loss_weight txt_loss = txt_rc_loss * ( 1 - args.cm_loss_weight) + cm_loss * args.cm_loss_weight # txt_loss = txt_rc_loss + 0.1 * cm_loss # img_loss = img_rc_loss + cm_loss txt_losses.update(txt_rc_loss.data[0], args.batch_size) img_losses.update(img_rc_loss.data[0], args.batch_size) cm_losses.update(cm_loss.data[0], args.batch_size) # </editor-fold desc = "Loss accumulation"? # <editor-fold desc = "Back Propagation"> # Half of the times we update one pipeline the others the other one if train_images: # Image Network Training and Backpropagation img_loss.backward() # img_optim.step() img_enc_optim.step() img_dec_optim.step() else: # Text Nextwork Training & Back Propagation txt_loss.backward() # txt_optim.step() txt_enc_optim.step() txt_dec_optim.step() # </editor-fold desc = "Back Propagation"> # <editor-fold desc = "Logging"> if i % args.image_save_interval == 0: subdir_path = os.path.join(result_path, str(i / args.image_save_interval)) if os.path.exists(subdir_path): pass else: os.makedirs(subdir_path) for im_idx in range(3): im_or = (images[im_idx].cpu().data.numpy().transpose( 1, 2, 0) / 2 + .5) * 255 im = (IzI[im_idx].cpu().data.numpy().transpose(1, 2, 0) / 2 + .5) * 255 filename_prefix = os.path.join(subdir_path, str(im_idx)) scipy.misc.imsave(filename_prefix + '_original.A.jpg', im_or) scipy.misc.imsave(filename_prefix + '.A.jpg', im) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_Img: {img_l:.3f}| Loss_Txt: {txt_l:.3f} | Loss_CM: {cm_l:.4f}'.format( batch=i, size=len(data_loader), bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, img_l=img_losses.avg, txt_l=txt_losses.avg, cm_l=cm_losses.avg, ) bar.next() # </editor-fold desc = "Logging"> bar.finish() # <editor-fold desc = "Saving the models"? # Save the models print('\n') print('Saving the models in {}...'.format(model_path)) torch.save( decoder_Img.state_dict(), os.path.join(model_path, 'decoder-img-%d-' % (epoch + 1)) + current_date + ".pkl") torch.save( encoder_Img.state_dict(), os.path.join(model_path, 'encoder-img-%d-' % (epoch + 1)) + current_date + ".pkl") torch.save( decoder_Txt.state_dict(), os.path.join(model_path, 'decoder-txt-%d-' % (epoch + 1)) + current_date + ".pkl") torch.save( encoder_Txt.state_dict(), os.path.join(model_path, 'encoder-txt-%d-' % (epoch + 1)) + current_date + ".pkl") # </editor-fold desc = "Saving the models"? # <editor-fold desc = "Validation"> if args.validate == "true": print("Train Set") validate(encoder_Img, encoder_Txt, data_loader, mask, 10) print("Test Set") validate(encoder_Img, encoder_Txt, val_loader, mask, 10) # </editor-fold desc = "Validation"> writer.add_scalars( 'data/scalar_group', { 'Image_RC': img_losses.avg, 'Text_RC': txt_losses.avg, 'CM_loss': cm_losses.avg }, epoch)
def __init__(self, model_dim=None, model_type=None, word_embedding_dim=None, vocab_size=None, initial_embeddings=None, fine_tune_loaded_embeddings=None, num_classes=None, embedding_keep_rate=None, tracking_lstm_hidden_dim=4, transition_weight=None, encode_reverse=None, encode_bidirectional=None, encode_num_layers=None, lateral_tracking=None, tracking_ln=None, use_tracking_in_composition=None, predict_use_cell=None, use_sentence_pair=False, use_difference_feature=False, use_product_feature=False, mlp_dim=None, num_mlp_layers=None, mlp_ln=None, classifier_keep_rate=None, context_args=None, composition_args=None, with_attention=False, data_type=None, target_vocabulary=None, onmt_module=None, FLAGS=None, data_manager=None, **kwargs): super(NMTModel, self).__init__() assert not ( use_tracking_in_composition and not lateral_tracking ), "Lateral tracking must be on to use tracking in composition." self.kwargs = kwargs self.model_dim = model_dim self.model_type = model_type self.data_type = data_type self.target_vocabulary = target_vocabulary if self.model_type == "SPINN": encoder_builder = spinn_builder elif self.model_type == "RLSPINN": encoder_builder = rl_builder elif self.model_type == "LMS": encoder_builder = lms_builder elif self.model_type == "RNN": encoder_builder = rnn_builder if self.model_type == "SPINN" or "RNN" or "LMS": self.encoder = encoder_builder( model_dim=model_dim, word_embedding_dim=word_embedding_dim, vocab_size=vocab_size, initial_embeddings=initial_embeddings, fine_tune_loaded_embeddings=fine_tune_loaded_embeddings, num_classes=num_classes, embedding_keep_rate=embedding_keep_rate, tracking_lstm_hidden_dim=tracking_lstm_hidden_dim, transition_weight=transition_weight, use_sentence_pair=use_sentence_pair, lateral_tracking=lateral_tracking, tracking_ln=tracking_ln, use_tracking_in_composition=use_tracking_in_composition, predict_use_cell=predict_use_cell, use_difference_feature=use_difference_feature, use_product_feature=use_product_feature, classifier_keep_rate=classifier_keep_rate, mlp_dim=mlp_dim, num_mlp_layers=num_mlp_layers, mlp_ln=mlp_ln, context_args=context_args, composition_args=composition_args, with_attention=with_attention, data_type=data_type, onmt_module=onmt_module, FLAGS=FLAGS, data_manager=data_manager) else: self.encoder = rl_builder(data_manager=data_manager, initial_embeddings=initial_embeddings, vocab_size=vocab_size, num_classes=num_classes, FLAGS=FLAGS, context_args=context_args, composition_args=composition_args) if self.model_type == "LMS": self.model_dim **= 2 # To-do: move this head of script. onmt_module path needs to be imported to do so. sys.path.append(onmt_module) from onmt.decoders.decoder import InputFeedRNNDecoder, StdRNNDecoder, RNNDecoderBase from onmt.encoders.rnn_encoder import RNNEncoder from onmt.modules import Embeddings self.output_embeddings = Embeddings(self.model_dim, len(target_vocabulary) + 1, 0) # Below, model_dim is multiplied by 2 so that the output dimension is the same as the # input word embedding dimension, and not half. # Look at TreeRNN for details (there is a down projection). if self.model_type == "RNN": self.is_bidirectional = True self.down_project = Linear()(2 * self.model_dim, self.model_dim, bias=True) self.down_project_context = Linear()(2 * self.model_dim, self.model_dim, bias=True) else: if self.model_type == "LMS": self.spinn = self.encoder.lms else: self.spinn = self.encoder.spinn self.is_bidirectional = False self.decoder = StdRNNDecoder("LSTM", self.is_bidirectional, 1, self.model_dim, embeddings=self.output_embeddings) self.generator = nn.Sequential( nn.Linear(self.model_dim, len(self.target_vocabulary) + 1), nn.LogSoftmax())
def __init__(self, layer_num, head_num, head_size, vocab_size, start_id, end_id, weights, beam_search_diversity_rate=0.0, args=None): super().__init__() self.layer_num = layer_num self.hidden_dim = head_num * head_size self.start_id = start_id self.end_id = end_id self.vocab_size = vocab_size self.diversity_rate = beam_search_diversity_rate self.args = args emb = Embeddings(self.hidden_dim, vocab_size, 1, position_encoding=True) self.decoder = TransformerDecoder(layer_num, self.hidden_dim, head_num, 4 * self.hidden_dim, False, 'scaled-dot', 0, 0, emb, 0, False, False, -3, 0, args) self.generator = nn.Linear(self.hidden_dim, vocab_size) self.logsoftmax = nn.LogSoftmax(dim=-1) self.module_path = args.module_path if args.model_type == 'torch_decoding': for i in range(layer_num): self.decoder.transformer_layers[ i].layer_norm_1.weight.data = weights.w[0][i] self.decoder.transformer_layers[ i].layer_norm_1.bias.data = weights.w[1][i] self.decoder.transformer_layers[ i].self_attn.linear_query.weight.data = weights.w[2][ i].transpose(-1, -2).contiguous() self.decoder.transformer_layers[ i].self_attn.linear_keys.weight.data = weights.w[3][ i].transpose(-1, -2).contiguous() self.decoder.transformer_layers[ i].self_attn.linear_values.weight.data = weights.w[4][ i].transpose(-1, -2).contiguous() self.decoder.transformer_layers[ i].self_attn.linear_query.bias.data = weights.w[5][i] self.decoder.transformer_layers[ i].self_attn.linear_keys.bias.data = weights.w[6][i] self.decoder.transformer_layers[ i].self_attn.linear_values.bias.data = weights.w[7][i] self.decoder.transformer_layers[ i].self_attn.final_linear.weight.data = weights.w[8][ i].transpose(-1, -2).contiguous() self.decoder.transformer_layers[ i].self_attn.final_linear.bias.data = weights.w[9][i] self.decoder.transformer_layers[ i].layer_norm_2.weight.data = weights.w[10][i] self.decoder.transformer_layers[ i].layer_norm_2.bias.data = weights.w[11][i] self.decoder.transformer_layers[ i].context_attn.linear_query.weight.data = weights.w[12][ i].transpose(-1, -2).contiguous() self.decoder.transformer_layers[ i].context_attn.linear_keys.weight.data = weights.w[13][ i].transpose(-1, -2).contiguous() self.decoder.transformer_layers[ i].context_attn.linear_values.weight.data = weights.w[14][ i].transpose(-1, -2).contiguous() self.decoder.transformer_layers[ i].context_attn.linear_query.bias.data = weights.w[15][i] self.decoder.transformer_layers[ i].context_attn.linear_keys.bias.data = weights.w[16][i] self.decoder.transformer_layers[ i].context_attn.linear_values.bias.data = weights.w[17][i] self.decoder.transformer_layers[ i].context_attn.final_linear.weight.data = weights.w[18][ i].transpose(-1, -2).contiguous() self.decoder.transformer_layers[ i].context_attn.final_linear.bias.data = weights.w[19][i] self.decoder.transformer_layers[ i].feed_forward.layer_norm.weight.data = weights.w[20][i] self.decoder.transformer_layers[ i].feed_forward.layer_norm.bias.data = weights.w[21][i] self.decoder.transformer_layers[ i].feed_forward.w_1.weight.data = weights.w[22][ i].transpose(-1, -2).contiguous() self.decoder.transformer_layers[ i].feed_forward.w_1.bias.data = weights.w[23][i] self.decoder.transformer_layers[ i].feed_forward.w_2.weight.data = weights.w[24][ i].transpose(-1, -2).contiguous() self.decoder.transformer_layers[ i].feed_forward.w_2.bias.data = weights.w[25][i] elif args.model_type == 'torch_decoding_with_decoder_ext': w = [] for i in range(layer_num): w.append([weights.w[j][i].clone().detach() for j in range(26)]) for i in range(len(w[-1])): w[-1][i] = w[-1][i].cuda() if args.data_type == 'fp16': for i in range(len(w[-1])): w[-1][i] = w[-1][i].half() decoder_layers = nn.ModuleList([ FTDecoderLayer(head_num, head_size, w[i], args) for i in range(layer_num) ]) self.decoder.transformer_layers = decoder_layers else: raise ValueError('wrong model_type') self.decoder.layer_norm.weight.data = weights.w[26] self.decoder.layer_norm.bias.data = weights.w[27] self.decoder.embeddings.make_embedding.emb_luts[ 0].weight.data = weights.w[28] self.generator.weight.data = weights.w[30].transpose(-1, -2).contiguous() self.generator.bias.data = weights.w[31]
def make_embeddings(opt, word_dict, feature_dicts, for_encoder=True, hist_dict=None, use_hier_hist=False): """ Make an Embeddings instance. Args: opt: the option in current environment. word_dict(Vocab): words dictionary. feature_dicts([Vocab], optional): a list of feature dictionary. for_encoder(bool): make Embeddings for encoder or decoder? """ feat_vec_size = opt.feat_vec_size if for_encoder: embedding_dim = opt.src_word_vec_size else: embedding_dim = opt.tgt_word_vec_size word_padding_idx = word_dict.stoi[onmt.io.PAD_WORD] num_word_embeddings = len(word_dict) feats_padding_idx = [ feat_dict.stoi[onmt.io.PAD_WORD] for feat_dict in feature_dicts ] num_feat_embeddings = [len(feat_dict) for feat_dict in feature_dicts] main_emb = Embeddings( word_vec_size=embedding_dim, position_encoding=opt.position_encoding, feat_merge=opt.feat_merge, feat_vec_exponent=opt.feat_vec_exponent, feat_vec_size=feat_vec_size, dropout=opt.dropout, word_padding_idx=word_padding_idx, feat_padding_idx=feats_padding_idx, word_vocab_size=num_word_embeddings, feat_vocab_sizes=num_feat_embeddings, ) if use_hier_hist: assert for_encoder and hist_dict is not None hist_padding_idx = hist_dict.stoi[onmt.io.PAD_WORD] num_hist_embeddings = len(hist_dict) assert len(feats_padding_idx) == 3 assert len(main_emb.get_feat_emb) == 3 external_embedding = [ nn.Embedding(num_hist_embeddings, embedding_dim, padding_idx=hist_padding_idx) ] + main_emb.get_feat_emb[:2] hier_hist_emb = Embeddings(word_vec_size=embedding_dim, position_encoding=opt.position_encoding, feat_merge=opt.feat_merge, feat_vec_exponent=opt.feat_vec_exponent, feat_vec_size=feat_vec_size, dropout=opt.dropout, word_padding_idx=hist_padding_idx, feat_padding_idx=feats_padding_idx[:2], word_vocab_size=num_hist_embeddings, feat_vocab_sizes=num_feat_embeddings[:2], emb_for_hier_hist=True, external_embedding=external_embedding) return (main_emb, hier_hist_emb) else: return main_emb
def main(): print("Initializing...") # global args args = parser.parse_args() now = datetime.datetime.now() current_date = now.strftime("%m-%d-%H-%M") assert args.text_criterion in ("MSE", "Cosine", "Hinge"), 'Invalid Loss Function' assert args.cm_criterion in ("MSE", "Cosine", "Hinge"), 'Invalid Loss Function' mask = args.common_emb_size assert mask <= args.hidden_size cuda = args.cuda if cuda == 'true': cuda = True else: cuda = False # Image preprocessing //ATTENTION # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) result_path = args.result_path model_path = args.model_path + current_date + "/" if not os.path.exists(result_path): os.makedirs(result_path) if not os.path.exists(model_path): print("Creating model path on", model_path) os.makedirs(model_path) # Load vocabulary wrapper. print("Loading Vocabulary...") with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Load Embeddings emb_size = args.embedding_size emb_path = args.embedding_path if args.embedding_path[-1] == '/': emb_path += 'glove.6B.' + str(emb_size) + 'd.txt' print("Loading Embeddings...") emb = load_glove_embeddings(emb_path, vocab.word2idx, emb_size) glove_emb = Embeddings(emb_size, len(vocab.word2idx), vocab.word2idx["<pad>"]) glove_emb.word_lut.weight.data.copy_(emb) glove_emb.word_lut.weight.requires_grad = False # glove_emb = nn.Embedding(emb.size(0), emb.size(1)) # glove_emb = embedding(emb.size(0), emb.size(1)) # glove_emb.weight = nn.Parameter(emb) # Freeze weighs # if args.fixed_embeddings == "true": # glove_emb.weight.requires_grad = False # Build data loader print("Building Data Loader For Test Set...") data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) print("Building Data Loader For Validation Set...") val_loader = get_loader(args.valid_dir, args.valid_caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) print("Setting up the Networks...") encoder_Img = ImageEncoder(img_dimension=args.crop_size, feature_dimension=args.hidden_size) decoder_Img = ImageDecoder(img_dimension=args.crop_size, feature_dimension=args.hidden_size) if cuda: encoder_Img = encoder_Img.cuda() decoder_Img = decoder_Img.cuda() # Losses and Optimizers print("Setting up the Objective Functions...") img_criterion = nn.MSELoss() # txt_criterion = nn.MSELoss(size_average=True) if cuda: img_criterion = img_criterion.cuda() # txt_criterion = nn.CrossEntropyLoss() # gen_params = chain(generator_A.parameters(), generator_B.parameters()) print("Setting up the Optimizers...") # img_params = chain(decoder_Img.parameters(), encoder_Img.parameters()) img_params = list(decoder_Img.parameters()) + list( encoder_Img.parameters()) # ATTENTION: Check betas and weight decay # ATTENTION: Check why valid_params fails on image networks with out of memory error img_optim = optim.Adam( img_params, lr=0.001) #,betas=(0.5, 0.999), weight_decay=0.00001) # img_enc_optim = optim.Adam(encoder_Img.parameters(), lr=args.learning_rate)#betas=(0.5, 0.999), weight_decay=0.00001) # img_dec_optim = optim.Adam(decoder_Img.parameters(), lr=args.learning_rate)#betas=(0.5,0.999), weight_decay=0.00001) train_images = False # Reverse 2 for epoch in range(args.num_epochs): # TRAINING TIME print('EPOCH ::: TRAINING ::: ' + str(epoch + 1)) batch_time = AverageMeter() img_losses = AverageMeter() txt_losses = AverageMeter() cm_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=len(data_loader)) # Set training mode encoder_Img.train() decoder_Img.train() train_images = True for i, (images, captions, lengths) in enumerate(data_loader): # ATTENTION REMOVE if i == 6450: break # Set mini-batch dataset images = to_var(images) captions = to_var(captions) # target = pack_padded_sequence(captions, lengths, batch_first=True)[0] # captions, lengths = pad_sequences(captions, lengths) # images = torch.FloatTensor(images) captions = captions.transpose(0, 1).unsqueeze(2) lengths = torch.LongTensor(lengths) # print(captions.size()) # Forward, Backward and Optimize # img_optim.zero_grad() # img_dec_optim.zero_grad() # img_enc_optim.zero_grad() encoder_Img.zero_grad() decoder_Img.zero_grad() # txt_params.zero_grad() # txt_dec_optim.zero_grad() # txt_enc_optim.zero_grad() # Image Auto_Encoder Forward img_encoder_outputs, Iz = encoder_Img(images) IzI = decoder_Img(img_encoder_outputs) img_rc_loss = img_criterion(IzI, images) # Text Auto Encoder Forward # target = target[:-1] # exclude last target from inputs img_loss = img_rc_loss img_losses.update(img_rc_loss.data[0], args.batch_size) txt_losses.update(0, args.batch_size) cm_losses.update(0, args.batch_size) # Image Network Training and Backpropagation img_loss.backward() img_optim.step() if i % args.image_save_interval == 0: subdir_path = os.path.join(result_path, str(i / args.image_save_interval)) if os.path.exists(subdir_path): pass else: os.makedirs(subdir_path) for im_idx in range(3): im_or = (images[im_idx].cpu().data.numpy().transpose( 1, 2, 0) / 2 + .5) * 255 im = (IzI[im_idx].cpu().data.numpy().transpose(1, 2, 0) / 2 + .5) * 255 filename_prefix = os.path.join(subdir_path, str(im_idx)) scipy.misc.imsave(filename_prefix + '_original.A.jpg', im_or) scipy.misc.imsave(filename_prefix + '.A.jpg', im) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_Img: {img_l:.3f}| Loss_Txt: {txt_l:.3f} | Loss_CM: {cm_l:.4f}'.format( batch=i, size=len(data_loader), bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, img_l=img_losses.avg, txt_l=txt_losses.avg, cm_l=cm_losses.avg, ) bar.next() bar.finish() # Save the models print('\n') print('Saving the models in {}...'.format(model_path)) torch.save( decoder_Img.state_dict(), os.path.join(model_path, 'decoder-img-%d-' % (epoch + 1)) + current_date + ".pkl") torch.save( encoder_Img.state_dict(), os.path.join(model_path, 'encoder-img-%d-' % (epoch + 1)) + current_date + ".pkl")
def main(): # global args args = parser.parse_args() assert args.criterion in ("MSE","Cosine","Hinge"), 'Invalid Loss Function' cuda = args.cuda if cuda == 'true': cuda = True else: cuda = False # Image preprocessing //ATTENTION # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]) result_path = args.result_path model_path = args.model_path if not os.path.exists(result_path): os.makedirs(result_path) if not os.path.exists(model_path): os.makedirs(model_path) # Load vocabulary wrapper. print('\n') print("\033[94mLoading Vocabulary...") with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Load Embeddings emb_size = args.embedding_size emb_path = args.embedding_path if args.embedding_path[-1]=='/': emb_path += 'glove.6B.' + str(emb_size) + 'd.txt' print("Loading Embeddings...") emb = load_glove_embeddings(emb_path, vocab.word2idx, emb_size) glove_emb = Embeddings(emb_size,len(vocab.word2idx),vocab.word2idx["<pad>"]) glove_emb.word_lut.weight.data.copy_(emb) glove_emb.word_lut.weight.requires_grad = False # glove_emb = nn.Embedding(emb.size(0), emb.size(1)) # glove_emb = embedding(emb.size(0), emb.size(1)) # glove_emb.weight = nn.Parameter(emb) # Freeze weighs # if args.fixed_embeddings == "true": # glove_emb.weight.requires_grad = False # Build data loader print("Building Data Loader For Test Set...") data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) print("Building Data Loader For Validation Set...") val_loader = get_loader(args.valid_dir, args.valid_caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) print("Setting up the Networks...") encoder_Txt = TextEncoderOld(glove_emb, num_layers=1, bidirectional=False, hidden_size=args.hidden_size) # decoder_Txt = TextDecoderOld(glove_emb, num_layers=1, bidirectional=False, hidden_size=args.hidden_size) # decoder_Txt = TextDecoder(encoder_Txt, glove_emb) # decoder_Txt = DecoderRNN(glove_emb, hidden_size=args.hidden_size) encoder_Img = ImageEncoder(img_dimension=args.crop_size,feature_dimension= args.hidden_size) # decoder_Img = ImageDecoder(img_dimension=args.crop_size, feature_dimension= args.hidden_size) if cuda: encoder_Txt = encoder_Txt.cuda() encoder_Img = encoder_Img.cuda() for epoch in range(args.num_epochs): # VALIDATION TIME print('\033[92mEPOCH ::: VALIDATION ::: ' + str(epoch + 1)) # Load the models print("Loading the models...") # suffix = '-{}-05-28-13-14.pkl'.format(epoch+1) # mask = 300 prefix = "" suffix = '-{}-05-28-09-23.pkl'.format(epoch+1) # suffix = '-{}-05-28-11-35.pkl'.format(epoch+1) # suffix = '-{}-05-28-16-45.pkl'.format(epoch+1) # suffix = '-{}-05-29-00-28.pkl'.format(epoch+1) # suffix = '-{}-05-29-00-30.pkl'.format(epoch+1) # suffix = '-{}-05-29-01-08.pkl'.format(epoch+1) mask = 200 # suffix = '-{}-05-28-15-39.pkl'.format(epoch+1) # suffix = '-{}-05-29-12-11.pkl'.format(epoch+1) # suffix = '-{}-05-29-12-14.pkl'.format(epoch+1) # suffix = '-{}-05-29-14-24.pkl'.format(epoch+1) #best # suffix = '-{}-05-29-15-43.pkl'.format(epoch+1) date = "06-30-14-22" date = "07-01-12-49" #bad date = "07-01-16-38" date = "07-01-18-16" date = "07-02-15-38" date = "07-08-15-12" prefix = "{}/".format(date) suffix = '-{}-{}.pkl'.format(epoch+1,date) mask = 100 print(suffix) try: encoder_Img.load_state_dict(torch.load(os.path.join(args.model_path, prefix + 'encoder-img' + suffix))) encoder_Txt.load_state_dict(torch.load(os.path.join(args.model_path, prefix + 'encoder-txt' + suffix))) except FileNotFoundError: print("\n\033[91mFile not found...\nTerminating Validation Procedure!") break current_embeddings = np.concatenate( \ (txt_emb.cpu().data.numpy(),\ img_emb.unsqueeze(0).cpu().data.numpy())\ ,0) # current_embeddings = img_emb.data if i: # result_embeddings = torch.cat( \ result_embeddings = np.concatenate( \ (result_embeddings, current_embeddings) \ ,1) else: result_embeddings = current_embeddings # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:}'.format( batch=i, size=len(val_loader), bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, ) bar.next() bar.finish() a = [((result_embeddings[0][i] - result_embeddings[1][i]) ** 2).mean() for i in range(limit*args.batch_size)] print("Validation MSE: ",np.mean(a)) print("Validation MSE: ",np.mean(a)) print("Computing Nearest Neighbors...") i = 0 topk = [] kss = [1,10,50] for k in kss: if i: print("Normalized ") result_embeddings[0] = result_embeddings[0]/result_embeddings[0].sum() result_embeddings[1] = result_embeddings[1]/result_embeddings[1].sum() # k = 5 neighbors = NearestNeighbors(k, metric = 'cosine') neigh = neighbors neigh.fit(result_embeddings[1]) kneigh = neigh.kneighbors(result_embeddings[0], return_distance=False) ks = set() for n in kneigh: ks.update(set(n)) print(len(ks)/result_embeddings.shape[1]) # a = [((result_embeddings[0][i] - result_embeddings[1][i]) ** 2).mean() for i in range(128)] # rs = result_embeddings.sum(2) # a = (((result_embeddings[0][0]- result_embeddings[1][0])**2).mean()) # b = (((result_embeddings[0][0]- result_embeddings[0][34])**2).mean()) topk.append(np.mean([int(i in nn) for i,nn in enumerate(kneigh)])) print("Top-{k:},{k2:},{k3:} accuracy for Image Retrieval:\n\n\t\033[95m {tpk: .3f}% \t {tpk2: .3f}% \t {tpk3: .3f}% \n".format( k=kss[0], k2=kss[1], k3=kss[2], tpk= 100*topk[0], tpk2= 100*topk[1], tpk3= 100*topk[2]))