def __init__(self, model_name, vocab, lr, lr_decay, batch_size=64): """ PyTorch Lightning module that creates the overall model. Inputs: model_name - String denoting what encoder class to use. Either 'AWE', 'UniLSTM', 'BiLSTM', or 'BiLSTMMax' vocab - Vocabulary from alignment between SNLI dataset and GloVe vectors lr - Learning rate to use for the optimizer lr_decay - Learning rate decay factor to use each epoch batch_size - Size of the batches. Default is 64 """ super().__init__() self.save_hyperparameters() # create an embedding layer for the vocabulary embeddings self.glove_embeddings = nn.Embedding.from_pretrained(vocab.vectors) # check which encoder model to use if model_name == 'AWE': self.encoder = AWEEncoder() self.classifier = Classifier() elif model_name == 'UniLSTM': self.encoder = UniLSTM() self.classifier = Classifier(input_dim=4 * 2048) elif model_name == 'BiLSTM': self.encoder = BiLSTM() self.classifier = Classifier(input_dim=4 * 2 * 2048) else: self.encoder = BiLSTMMax() self.classifier = Classifier(input_dim=4 * 2 * 2048) # create the loss function self.loss_function = nn.CrossEntropyLoss() # create instance to save the last validation accuracy self.last_val_acc = None
def main(): sess = tf.Session() data = u.return_data() model = BiLSTM(u.Config, data[2]) logger = Logger(sess, u.Config) trainer = Train(sess, model, data, u.Config, logger) trainer.train()
def __init__(self, config): super(Sequence_Label, self).__init__() self.config = config # embed self.embed_num = config.embed_num self.embed_dim = config.embed_dim self.label_num = config.class_num self.paddingId = config.paddingId # dropout self.dropout_emb = config.dropout_emb self.dropout = config.dropout # lstm self.lstm_hiddens = config.lstm_hiddens self.lstm_layers = config.lstm_layers # pretrain self.pretrained_embed = config.pretrained_embed self.pretrained_weight = config.pretrained_weight # char self.use_char = config.use_char self.char_embed_num = config.char_embed_num self.char_paddingId = config.char_paddingId self.char_dim = config.char_dim self.conv_filter_sizes = self._conv_filter(config.conv_filter_sizes) self.conv_filter_nums = self._conv_filter(config.conv_filter_nums) assert len(self.conv_filter_sizes) == len(self.conv_filter_nums) # print(self.conv_filter_nums) # print(self.conv_filter_sizes) # exit() # use crf self.use_crf = config.use_crf # cuda or cpu self.device = config.device self.target_size = self.label_num if self.use_crf is False else self.label_num + 2 if self.use_char is True: self.encoder_model = BiLSTM_CNN(embed_num=self.embed_num, embed_dim=self.embed_dim, label_num=self.target_size, paddingId=self.paddingId, dropout_emb=self.dropout_emb, dropout=self.dropout, lstm_hiddens=self.lstm_hiddens, lstm_layers=self.lstm_layers, pretrained_embed=self.pretrained_embed, pretrained_weight=self.pretrained_weight, char_embed_num=self.char_embed_num, char_dim=self.char_dim, char_paddingId=self.char_paddingId, conv_filter_sizes=self.conv_filter_sizes, conv_filter_nums=self.conv_filter_nums, device=self.device) else: self.encoder_model = BiLSTM(embed_num=self.embed_num, embed_dim=self.embed_dim, label_num=self.target_size, paddingId=self.paddingId, dropout_emb=self.dropout_emb, dropout=self.dropout, lstm_hiddens=self.lstm_hiddens, lstm_layers=self.lstm_layers, pretrained_embed=self.pretrained_embed, pretrained_weight=self.pretrained_weight, device=self.device) if self.use_crf is True: args_crf = dict({'target_size': self.label_num, 'device': self.device}) self.crf_layer = CRF(**args_crf)
def setup(opt): if opt.model == "cnn": model = CNN(opt) elif opt.model == "bilstm": model = BiLSTM(opt) elif opt.model == "transformer": model = Transformer(opt) elif opt.model == "gah": model = GAH(opt) elif opt.model == 'gahs': model = GAHs(opt) else: raise Exception("model not supported: {}".format(opt.model)) return model
def main(args): exp_info = exp_config.Experiment(args.dataset) paths = exp_info.paths args.paths = paths args.metadata = exp_info.metadata np.random.seed(args.seed) torch.manual_seed(args.seed) batch_size = args.batch_size args.batch_size = 1 feature_size, train_loader, val_loader, test_loader, all_loader = exp_info.get_dataset( args, save=True) label_num = exp_info.get_label_num(args) hidden_size = 256 hidden_layers = 2 args.resume = os.path.join( paths.checkpoint_root, 'detection_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}'.format( args.task, args.model, args.epochs, args.lr, args.batch_size, args.lr_decay, 1 if not args.subsample else args.subsample, args.dropout_rate)) if args.model == 'lstm': detection_model = BiLSTM(feature_size, hidden_size, hidden_layers, label_num) else: detection_model = MLP(feature_size, hidden_size, label_num) detection_model = torch.nn.DataParallel(detection_model) logutils.load_checkpoint(args, detection_model) args.resume = os.path.join( paths.checkpoint_root, 'frame_prediction_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}_pd{}'.format( args.task, args.model, args.epochs, args.lr, args.batch_size, args.lr_decay, 1 if not args.subsample else args.subsample, args.dropout_rate, args.using_pred_duration)) if args.model == 'lstm': prediction_model = LSTM_Pred(feature_size, hidden_size, hidden_layers, label_num) else: prediction_model = MLP(feature_size, hidden_size, label_num) prediction_model = torch.nn.DataParallel(prediction_model) logutils.load_checkpoint(args, prediction_model) validate(test_loader, detection_model, prediction_model, args=args)
def setup(opt): if opt.contatenate == 1: opt.max_sequence_length = opt.max_sequence_length_contatenate if opt.model == "lstm_2L": model = LSTM2L(opt) elif opt.model == "cnn": model = CNN(opt) elif opt.model == "bilstm": model = BiLSTM(opt) elif opt.model == "bilstm_2L": model = BiLSTM2L(opt) else: raise Exception("model not supported: {}".format(opt.model)) return model
from models.BiLSTM import BiLSTM from models.Center_Net import Center_Net # model=Center_Net() model = BiLSTM() model.build()
def main(): args = parse_args() train_iter = DataIterator(args.train_file, args.max_seq_len, args.batch_size) dev_iter = DataIterator(args.dev_file, args.max_seq_len, args.batch_size) test_iter = DataIterator(args.test_file, args.max_seq_len, args.batch_size) train_data = DataLoader(args.train_file) vocab_size = len(train_data.vocab) weights = train_data.get_pretrained_weights(args.pretrained_embeddings) args.pretrained_weights = weights # udpate the args args.vocab_size = vocab_size # a small step to solve the problem of command line input interpreted as string args.kernel_sizes = [int(x) for x in args.kernel_sizes] logger = getLogger(args.run_log) print("\nParameters:") for attr, value in sorted(args.__dict__.items()): logger.info("\t{}={}".format(attr.upper(), value)) if args.model == "CNN": model = CNN(args) elif args.model == "BiLSTM": model = BiLSTM(args) elif args.model == "CNN_BiLSTM": model = CNN_BiLSTM(args) elif args.model == "CNN_BiLSTM_ATT": model = CNN_BiLSTM_ATTENTION(args) print(model) # starting training, comment this line if you are load a pretrained model if args.test is False: ## for epoch in range(args.num_epochs): best_acc = 0.0 #model.train() steps = train(epoch, train_iter, model, args, logger) for mode in ['development', 'testing']: if mode == 'development': evaluate(dev_iter, model, args, logger, mode) elif mode == 'testing': test_acc, predictions = evaluate(test_iter, model, args, logger, mode) if test_acc > best_acc: best_acc = test_acc save(model, args.saved_model, 'best', steps) save_predictions(args.test_file, args.prediction_file, predictions) else: model = torch.load(args.saved_model, map_location=lambda storage, loc: storage) #model.eval() evaluate(test_iter, model, args, logger, mode='testing')
def train(): config_path = sys.argv[1] with codecs.open(config_path, encoding='utf8') as fp: config = json.loads(fp.read().strip()) model_name = config['model_name'] print(model_name) if model_name == 'BiGRU': network = BiGRU() elif model_name == 'BiLSTM': network = BiLSTM() elif model_name == 'BasicCNN': network = BasicCNN() elif model_name == 'BiLstmPool': network = BiLstmPool() elif model_name == 'BiGruPool': network = BiGruPool() elif model_name == 'BiGruConv': network = BiGruConv() elif model_name == 'BiLstmConv': network = BiLstmConv() elif model_name == 'BiLstmConv3': network = BiLstmConv3() elif model_name == 'CnnPooling': network = CnnPooling() elif model_name == 'CnnLstmPooling': network = CnnLstmPooling() else: print("What the F**K!") return with open(config['word_index_path'], 'r') as fp: word_index = json.load(fp) with open(config['filter_json_path'], 'r') as fp: filter_json = json.load(fp) network.nb_words = min(len(word_index), config['network_config']['num_words']) + 1 network.set_optimizer( optimizer_name=config['network_config']['optimizer_name'], lr=config['network_config']['lr']) embedding_matrix = get_embedding_matrix( config['embedding_path'], word_index, max_features=config['network_config']['num_words'], embedding_dims=config['network_config']['embedding_dims']) network.build(embedding_matrix) paded_sequences, labels, _, _ = get_data( config['data_path'], filter_json=filter_json, num_words=config['network_config']['num_words'], maxlen=config['network_config']['maxlen']) categ_labels = to_categorical(labels) # Data splite train_feature, train_target, dev_feature, dev_target, test_feature, test_target = split_data( paded_sequences, categ_labels, 0.1) # train model with training data and evaluate with development data network.train(train_feature, train_target, dev_feature, dev_target) # test model with test model loss, accu = network.evaluate(test_feature, test_target, batch_size=512) print('The total loss is %s and the total accuracy is %s' % (loss, accu)) # get the accuracy, precision, recall and f1score # get the predicted label y = network.inference(test_feature, batch_size=512) # get the label from softmax y_class = np.array(map(np.argmax, y)) # get the label from target target_class = np.array(map(np.argmax, test_target)) for category in range(5): accuracy, precision, recall, f1score = get_a_p_r_f( target_class, y_class, category) print( 'For category %s: the accuracy is %s, the precision is %s, the recall is %s and the f1score is %s' % (category, accuracy, precision, recall, f1score))
batch_size=args.batch_size, device='cuda', sort=False) print("Building Model...") if args.model == "cnn": model = TextCNN(vocab_size=len(REVIEW.vocab), embedding_size=args.embedding_size, hidden_size=args.hidden_size, filter_sizes=[3, 4, 5], dropout=args.dropout, pretrain_wordvector=REVIEW.vocab.vectors) elif args.model == "bilstm": model = BiLSTM(vocab_size=len(REVIEW.vocab), embedding_size=args.embedding_size, hidden_size=args.hidden_size, dropout=args.dropout, pretrained_wordvector=REVIEW.vocab.vectors) elif args.model == "bilstm_attn": model = BiLSTMAttn(vocab_size=len(REVIEW.vocab), embedding_size=args.embedding_size, hidden_size=args.hidden_size, dropout=args.dropout, pretrained_wordvector=REVIEW.vocab.vectors) else: raise ValueError( "Model should be cnn, bilstm or bilstm_attn, {} is invalid.". format(args.model)) if torch.cuda.is_available(): model = model.cuda()
def evaluate(): config_path = sys.argv[1] with codecs.open(config_path, encoding='utf8') as fp: config = json.loads(fp.read().strip()) model_name = config['model_name'] print(model_name) if model_name == 'BiGRU': network = BiGRU() elif model_name == 'BiLSTM': network = BiLSTM() elif model_name == 'BasicCNN': network = BasicCNN() elif model_name == 'BiLstmPool': network = BiLstmPool() elif model_name == 'BiGruPool': network = BiGruPool() elif model_name == 'BiGruConv': network = BiGruConv() elif model_name == 'BiLstmConv': network = BiLstmConv() elif model_name == 'BiLstmConv3': network = BiLstmConv3() else: print("What the F**K!") return with open(config['word_index_path'], 'r') as fp: word_index = json.load(fp) with open(config['filter_json_path'], 'r') as fp: filter_json = json.load(fp) network.nb_words = min(len(word_index), config['network_config']['num_words']) + 1 network.set_optimizer( optimizer_name=config['network_config']['optimizer_name'], lr=config['network_config']['lr']) network.build() paded_sequences, labels, _, _ = get_data( config['data_path'], filter_json=filter_json, num_words=config['network_config']['num_words'], maxlen=config['network_config']['maxlen']) categ_labels = to_categorical(labels) _, _, _, _, test_feature, test_target = split_data(paded_sequences, categ_labels, 0.05) model_path = sys.argv[2] network.load_model(model_path) loss, accu = network.evaluate(test_feature, test_target, batch_size=512) print('The total loss is %s and the total accuracy is %s' % (loss, accu)) y = network.inference(test_feature, batch_size=512) y_class = np.array(map(np.argmax, y)) target_class = np.array(map(np.argmax, test_target)) for category in range(5): accuracy, precision, recall, f1score = get_a_p_r_f( target_class, y_class, category) print( 'For category %s: the accuracy is %s, the precision is %s, the recall is %s and the f1score is %s' % (category, accuracy, precision, recall, f1score))