def __init__(self, params, vocab_en, vocab_trans): super(Lstm, self).__init__() self.emb_dim = params.emb_dim self.hidden_dim = params.hidden_dim self.dropout = params.dropout self.bidirection = params.bidirection self.emb_file_en = params.emb_file_en self.emb_file_trans = params.emb_file_trans self.n_words_en = vocab_en.n_words self.n_words_trans = vocab_trans.n_words # English embedding layer self.embedding_en = nn.Embedding(self.n_words_en, self.emb_dim, padding_idx=PAD_INDEX) # load English embedding embedding_en = load_embedding(self.emb_file_en) self.embedding_en.weight.data.copy_(torch.FloatTensor(embedding_en)) # Transfer language embeddings self.embedding_trans = nn.Embedding(self.n_words_trans, self.emb_dim, padding_idx=PAD_INDEX) # load transfer language embedding embedding_trans = load_embedding(self.emb_file_trans) self.embedding_trans.weight.data.copy_( torch.FloatTensor(embedding_trans)) # LSTM layers self.lstm = nn.LSTM(self.emb_dim, self.hidden_dim, dropout=self.dropout, bidirectional=self.bidirection, batch_first=True)
def __init__(self, params, vocab): super(Lstm, self).__init__() self.n_layer = params.n_layer self.emb_dim = params.emb_dim self.n_words = vocab.n_words self.hidden_dim = params.hidden_dim self.dropout = params.dropout self.bidirection = params.bidirection self.freeze_emb = params.freeze_emb self.emb_file = params.emb_file # embedding layer self.embedding = nn.Embedding(self.n_words, self.emb_dim, padding_idx=PAD_INDEX) # load embedding if self.emb_file.endswith("npy"): embedding = load_embedding_from_npy(self.emb_file) else: embedding = load_embedding(vocab, self.emb_dim, self.emb_file) self.embedding.weight.data.copy_(torch.FloatTensor(embedding)) # LSTM layers self.lstm = nn.LSTM(self.emb_dim, self.hidden_dim, num_layers=self.n_layer, dropout=self.dropout, bidirectional=self.bidirection, batch_first=True)
def __initialize(self): sd_path = self.configs.symbol_dict.path + '.yml' emb_path = self.configs.embedding_table.path + '.parquet' logger.info('Loading symbol_dict from {}'.format(sd_path)) self.sd = read_yaml(sd_path) logger.info('Loading emb_table from {}'.format(emb_path)) self.emb = load_embedding(emb_path)
def __init__(self, params, vocab_en): super(Lstm4pretr, self).__init__() self.n_layer = params.n_layer self.n_words_en = vocab_en.n_words self.emb_dim = params.emb_dim self.hidden_dim = params.hidden_dim self.dropout = params.dropout self.bidirection = params.bidirection self.embnoise = params.embnoise self.emb_file_en = params.emb_file_en # embedding layer self.embedding_en = nn.Embedding(self.n_words_en, self.emb_dim, padding_idx=PAD_INDEX) # load embedding embedding_en = load_embedding(vocab_en, self.emb_dim, self.emb_file_en) self.embedding_en.weight.data.copy_(torch.FloatTensor(embedding_en)) # LSTM layers self.lstm = nn.LSTM(self.emb_dim, self.hidden_dim, num_layers=self.n_layer, dropout=self.dropout, bidirectional=self.bidirection, batch_first=True)
def gen_embs_for_vocab(): from src.datareader import datareader from src.utils import load_embedding, init_experiment from config import get_params params = get_params() logger = init_experiment(params, logger_filename=params.logger_filename) _, vocab = datareader() embedding = load_embedding(vocab, 300, "/data/sh/glove.6B.300d.txt", "/data/sh/coachdata/snips/emb/oov_embs.txt") np.save("/data/sh/coachdata/snips/emb/slu_embs.npy", embedding)
def __init__(self, params, vocab): super(BiLSTMTagger, self).__init__() self.embedding = nn.Embedding(vocab.n_words, params.emb_dim, padding_idx=0) embedding = load_embedding(vocab, params.emb_dim, params.emb_file, params.usechar) self.embedding.weight.data.copy_(torch.FloatTensor(embedding)) self.dropout = params.dropout self.lstm = nn.LSTM(params.emb_dim, params.lstm_hidden_dim, num_layers=params.n_layer, dropout=params.dropout, bidirectional=True, batch_first=True) self.linear = nn.Linear(params.lstm_hidden_dim * 2, params.num_tag) self.crf_layer = CRF(params.num_tag)
def gen_embs_for_vocab(): from src.slu.datareader import datareader from src.utils import load_embedding, init_experiment from config import get_params params = get_params() logger = init_experiment(params, logger_filename=params.logger_filename) _, vocab = datareader() embedding = load_embedding(vocab, 300, "PATH_OF_THE_WIKI_EN_VEC", "../data/snips/emb/oov_embs.txt") np.save("../data/snips/emb/slu_embs.npy", embedding)
def __init__(self, params, vocab_en, vocab_trans): super(Lstm, self).__init__() self.n_layer = params.n_layer self.vocab_en = vocab_en self.vocab_trans = vocab_trans self.emb_dim = params.emb_dim self.hidden_dim = params.hidden_dim self.dropout = params.dropout self.bidirection = params.bidirection self.embnoise = params.embnoise self.emb_file_en = params.emb_file_en self.emb_file_trans = params.emb_file_trans if params.tar_only == False or params.zs == True: # embedding layer self.embedding_en = nn.Embedding(self.vocab_en.n_words, self.emb_dim, padding_idx=PAD_INDEX) # load embedding embedding_en = load_embedding(vocab_en, self.emb_dim, self.emb_file_en) self.embedding_en.weight.data.copy_( torch.FloatTensor(embedding_en)) self.embedding_trans = nn.Embedding(self.vocab_trans.n_words, self.emb_dim, padding_idx=PAD_INDEX) # load embedding embedding_trans = load_embedding(vocab_trans, self.emb_dim, self.emb_file_trans) self.embedding_trans.weight.data.copy_( torch.FloatTensor(embedding_trans)) # LSTM layers self.lstm = nn.LSTM(self.emb_dim, self.hidden_dim, num_layers=self.n_layer, dropout=self.dropout, bidirectional=self.bidirection, batch_first=True)
def transfer(params, trans_lang): # initialize experiment logger = init_experiment(params, logger_filename=params.logger_filename) logger.info("============== Evaluate Zero-Shot on %s ==============" % trans_lang) # dataloader _, _, dataloader_test, vocab = get_dataloader(params, lang=trans_lang) # get word embedding emb_file = params.emb_file_es if trans_lang == "es" else params.emb_file_th embedding = load_embedding(vocab, params.emb_dim, emb_file) # evaluate zero-shot evaluate_transfer = EvaluateTransfer(params, dataloader_test, embedding, vocab.n_words) intent_acc, slot_f1 = evaluate_transfer.evaluate() logger.info("Intent ACC: %.4f. Slot F1: %.4f." % (intent_acc, slot_f1))
def gen_embs_for_vocab(): _, _, _, vocab = datareader() embedding = load_embedding(vocab, 300, "PATH_OF_THE_WIKI_EN_VEC", "../data/ner/emb/oov_embs.txt") np.save("../data/ner/emb/ner_embs.npy", embedding)
def get_oov_words(): _, _, _, vocab = datareader() _ = load_embedding(vocab, 300, "PATH_OF_THE_WIKI_EN_VEC")
def main(): """Load the graph, create the embeddings, evaluate them with link prediction and save the results.""" args = parse_args() graph = utils.load_graph(args.weighted, args.directed, args.input) utils.print_graph_info(graph, "original graph") graph.remove_nodes_from(list(nx.isolates(graph))) utils.print_graph_info(graph, "graph without isolates") edge_splitter_test = EdgeSplitter(graph) graph_test, X_test_edges, y_test = edge_splitter_test.train_test_split( p=args.test_percentage, method="global") edge_splitter_train = EdgeSplitter(graph_test, graph) graph_train, X_edges, y = edge_splitter_train.train_test_split( p=args.train_percentage, method="global") X_train_edges, X_model_selection_edges, y_train, y_model_selection = train_test_split( X_edges, y, train_size=0.75, test_size=0.25) logger.info(f'\nEmbedding algorithm started.') start = time.time() embedding.create_embedding(args, graph_train) time_diff = time.time() - start logger.info(f'\nEmbedding algorithm finished in {time_diff:.2f} seconds.') embeddings = utils.load_embedding(args.output) logger.info(f'\nEmbedding evaluation started.') start = time.time() results = evaluation.evaluate(args.classifier, embeddings, X_train_edges, y_train, X_model_selection_edges, y_model_selection) time_diff = time.time() - start logger.info(f'Embedding evaluation finished in {time_diff:.2f} seconds.') best_result = max(results, key=lambda result: result["roc_auc"]) logger.info( f"\nBest roc_auc_score on train set using '{best_result['binary_operator'].__name__}': {best_result['roc_auc']}." ) logger.info(f'\nEmbedding algorithm started.') start = time.time() embedding.create_embedding(args, graph_test) time_diff = time.time() - start logger.info(f'\nEmbedding algorithm finished in {time_diff:.2f} seconds.') embedding_test = utils.load_embedding(args.output) roc_auc, average_precision, accuracy, f1 = evaluation.evaluate_model( best_result["classifier"], embedding_test, best_result["binary_operator"], X_test_edges, y_test) logger.info( f"Scores on test set using '{best_result['binary_operator'].__name__}'." ) logger.info(f"roc_auc_score: {roc_auc}") logger.info(f"average_precision_score: {average_precision}") logger.info(f"accuracy_score: {accuracy}") logger.info(f"f1_score on test set using: {f1}\n") if (args.results): evaluation.save_evaluation_results( args.dataset, args.method, args.classifier, (roc_auc, average_precision, accuracy, f1), args.results)