Esempio n. 1
0
    def __init__(self, model_name, vocab, lr, lr_decay, batch_size=64):
        """
        PyTorch Lightning module that creates the overall model.
        Inputs:
            model_name - String denoting what encoder class to use.  Either 'AWE', 'UniLSTM', 'BiLSTM', or 'BiLSTMMax'
            vocab - Vocabulary from alignment between SNLI dataset and GloVe vectors
            lr - Learning rate to use for the optimizer
            lr_decay - Learning rate decay factor to use each epoch
            batch_size - Size of the batches. Default is 64
        """
        super().__init__()
        self.save_hyperparameters()

        # create an embedding layer for the vocabulary embeddings
        self.glove_embeddings = nn.Embedding.from_pretrained(vocab.vectors)

        # check which encoder model to use
        if model_name == 'AWE':
            self.encoder = AWEEncoder()
            self.classifier = Classifier()
        elif model_name == 'UniLSTM':
            self.encoder = UniLSTM()
            self.classifier = Classifier(input_dim=4 * 2048)
        elif model_name == 'BiLSTM':
            self.encoder = BiLSTM()
            self.classifier = Classifier(input_dim=4 * 2 * 2048)
        else:
            self.encoder = BiLSTMMax()
            self.classifier = Classifier(input_dim=4 * 2 * 2048)

        # create the loss function
        self.loss_function = nn.CrossEntropyLoss()

        # create instance to save the last validation accuracy
        self.last_val_acc = None
def main():
    sess = tf.Session()
    data = u.return_data()
    model = BiLSTM(u.Config, data[2])
    logger = Logger(sess, u.Config)
    trainer = Train(sess, model, data, u.Config, logger)
    trainer.train()
Esempio n. 3
0
    def __init__(self, config):
        super(Sequence_Label, self).__init__()
        self.config = config
        # embed
        self.embed_num = config.embed_num
        self.embed_dim = config.embed_dim
        self.label_num = config.class_num
        self.paddingId = config.paddingId
        # dropout
        self.dropout_emb = config.dropout_emb
        self.dropout = config.dropout
        # lstm
        self.lstm_hiddens = config.lstm_hiddens
        self.lstm_layers = config.lstm_layers
        # pretrain
        self.pretrained_embed = config.pretrained_embed
        self.pretrained_weight = config.pretrained_weight
        # char
        self.use_char = config.use_char
        self.char_embed_num = config.char_embed_num
        self.char_paddingId = config.char_paddingId
        self.char_dim = config.char_dim
        self.conv_filter_sizes = self._conv_filter(config.conv_filter_sizes)
        self.conv_filter_nums = self._conv_filter(config.conv_filter_nums)
        assert len(self.conv_filter_sizes) == len(self.conv_filter_nums)
        # print(self.conv_filter_nums)
        # print(self.conv_filter_sizes)
        # exit()
        # use crf
        self.use_crf = config.use_crf

        # cuda or cpu
        self.device = config.device

        self.target_size = self.label_num if self.use_crf is False else self.label_num + 2

        if self.use_char is True:
            self.encoder_model = BiLSTM_CNN(embed_num=self.embed_num, embed_dim=self.embed_dim, label_num=self.target_size,
                                            paddingId=self.paddingId, dropout_emb=self.dropout_emb, dropout=self.dropout,
                                            lstm_hiddens=self.lstm_hiddens, lstm_layers=self.lstm_layers,
                                            pretrained_embed=self.pretrained_embed, pretrained_weight=self.pretrained_weight,
                                            char_embed_num=self.char_embed_num, char_dim=self.char_dim,
                                            char_paddingId=self.char_paddingId, conv_filter_sizes=self.conv_filter_sizes,
                                            conv_filter_nums=self.conv_filter_nums, device=self.device)
        else:
            self.encoder_model = BiLSTM(embed_num=self.embed_num, embed_dim=self.embed_dim, label_num=self.target_size,
                                        paddingId=self.paddingId, dropout_emb=self.dropout_emb, dropout=self.dropout,
                                        lstm_hiddens=self.lstm_hiddens, lstm_layers=self.lstm_layers,
                                        pretrained_embed=self.pretrained_embed, pretrained_weight=self.pretrained_weight,
                                        device=self.device)
        if self.use_crf is True:
            args_crf = dict({'target_size': self.label_num, 'device': self.device})
            self.crf_layer = CRF(**args_crf)
def setup(opt):
    if opt.model == "cnn":
        model = CNN(opt)
    elif opt.model == "bilstm":
        model = BiLSTM(opt)
    elif opt.model == "transformer":
        model = Transformer(opt)
    elif opt.model == "gah":
        model = GAH(opt)
    elif opt.model == 'gahs':
        model = GAHs(opt)
    else:
        raise Exception("model not supported: {}".format(opt.model))
    return model
Esempio n. 5
0
def main(args):
    exp_info = exp_config.Experiment(args.dataset)
    paths = exp_info.paths
    args.paths = paths
    args.metadata = exp_info.metadata

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    batch_size = args.batch_size
    args.batch_size = 1
    feature_size, train_loader, val_loader, test_loader, all_loader = exp_info.get_dataset(
        args, save=True)
    label_num = exp_info.get_label_num(args)

    hidden_size = 256
    hidden_layers = 2

    args.resume = os.path.join(
        paths.checkpoint_root,
        'detection_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}'.format(
            args.task, args.model, args.epochs, args.lr, args.batch_size,
            args.lr_decay, 1 if not args.subsample else args.subsample,
            args.dropout_rate))
    if args.model == 'lstm':
        detection_model = BiLSTM(feature_size, hidden_size, hidden_layers,
                                 label_num)
    else:
        detection_model = MLP(feature_size, hidden_size, label_num)
    detection_model = torch.nn.DataParallel(detection_model)
    logutils.load_checkpoint(args, detection_model)

    args.resume = os.path.join(
        paths.checkpoint_root,
        'frame_prediction_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}_pd{}'.format(
            args.task, args.model, args.epochs, args.lr, args.batch_size,
            args.lr_decay, 1 if not args.subsample else args.subsample,
            args.dropout_rate, args.using_pred_duration))
    if args.model == 'lstm':
        prediction_model = LSTM_Pred(feature_size, hidden_size, hidden_layers,
                                     label_num)
    else:
        prediction_model = MLP(feature_size, hidden_size, label_num)
    prediction_model = torch.nn.DataParallel(prediction_model)
    logutils.load_checkpoint(args, prediction_model)

    validate(test_loader, detection_model, prediction_model, args=args)
Esempio n. 6
0
def setup(opt):

    if opt.contatenate == 1:
        opt.max_sequence_length = opt.max_sequence_length_contatenate

    if opt.model == "lstm_2L":
        model = LSTM2L(opt)
    elif opt.model == "cnn":
        model = CNN(opt)
    elif opt.model == "bilstm":
        model = BiLSTM(opt)
    elif opt.model == "bilstm_2L":
        model = BiLSTM2L(opt)
    else:
        raise Exception("model not supported: {}".format(opt.model))

    return model
Esempio n. 7
0
from models.BiLSTM import BiLSTM
from models.Center_Net import Center_Net

# model=Center_Net()
model = BiLSTM()
model.build()
Esempio n. 8
0
def main():
    args = parse_args()
    train_iter = DataIterator(args.train_file, args.max_seq_len,
                              args.batch_size)
    dev_iter = DataIterator(args.dev_file, args.max_seq_len, args.batch_size)
    test_iter = DataIterator(args.test_file, args.max_seq_len, args.batch_size)

    train_data = DataLoader(args.train_file)
    vocab_size = len(train_data.vocab)
    weights = train_data.get_pretrained_weights(args.pretrained_embeddings)
    args.pretrained_weights = weights
    # udpate the args
    args.vocab_size = vocab_size
    # a small step to solve the problem of command line input interpreted as string
    args.kernel_sizes = [int(x) for x in args.kernel_sizes]

    logger = getLogger(args.run_log)

    print("\nParameters:")

    for attr, value in sorted(args.__dict__.items()):
        logger.info("\t{}={}".format(attr.upper(), value))

    if args.model == "CNN":
        model = CNN(args)

    elif args.model == "BiLSTM":
        model = BiLSTM(args)

    elif args.model == "CNN_BiLSTM":
        model = CNN_BiLSTM(args)

    elif args.model == "CNN_BiLSTM_ATT":
        model = CNN_BiLSTM_ATTENTION(args)
    print(model)

    # starting training, comment this line if you are load a pretrained model
    if args.test is False:

        ##

        for epoch in range(args.num_epochs):
            best_acc = 0.0
            #model.train()

            steps = train(epoch, train_iter, model, args, logger)

            for mode in ['development', 'testing']:
                if mode == 'development':
                    evaluate(dev_iter, model, args, logger, mode)
                elif mode == 'testing':
                    test_acc, predictions = evaluate(test_iter, model, args,
                                                     logger, mode)
                    if test_acc > best_acc:
                        best_acc = test_acc
                        save(model, args.saved_model, 'best', steps)
                        save_predictions(args.test_file, args.prediction_file,
                                         predictions)

    else:
        model = torch.load(args.saved_model,
                           map_location=lambda storage, loc: storage)
        #model.eval()
        evaluate(test_iter, model, args, logger, mode='testing')
Esempio n. 9
0
def train():

    config_path = sys.argv[1]
    with codecs.open(config_path, encoding='utf8') as fp:
        config = json.loads(fp.read().strip())

    model_name = config['model_name']
    print(model_name)
    if model_name == 'BiGRU':
        network = BiGRU()
    elif model_name == 'BiLSTM':
        network = BiLSTM()
    elif model_name == 'BasicCNN':
        network = BasicCNN()
    elif model_name == 'BiLstmPool':
        network = BiLstmPool()
    elif model_name == 'BiGruPool':
        network = BiGruPool()
    elif model_name == 'BiGruConv':
        network = BiGruConv()
    elif model_name == 'BiLstmConv':
        network = BiLstmConv()
    elif model_name == 'BiLstmConv3':
        network = BiLstmConv3()
    elif model_name == 'CnnPooling':
        network = CnnPooling()
    elif model_name == 'CnnLstmPooling':
        network = CnnLstmPooling()
    else:
        print("What the F**K!")
        return

    with open(config['word_index_path'], 'r') as fp:
        word_index = json.load(fp)

    with open(config['filter_json_path'], 'r') as fp:
        filter_json = json.load(fp)

    network.nb_words = min(len(word_index),
                           config['network_config']['num_words']) + 1

    network.set_optimizer(
        optimizer_name=config['network_config']['optimizer_name'],
        lr=config['network_config']['lr'])

    embedding_matrix = get_embedding_matrix(
        config['embedding_path'],
        word_index,
        max_features=config['network_config']['num_words'],
        embedding_dims=config['network_config']['embedding_dims'])

    network.build(embedding_matrix)

    paded_sequences, labels, _, _ = get_data(
        config['data_path'],
        filter_json=filter_json,
        num_words=config['network_config']['num_words'],
        maxlen=config['network_config']['maxlen'])
    categ_labels = to_categorical(labels)

    # Data splite
    train_feature, train_target, dev_feature, dev_target, test_feature, test_target = split_data(
        paded_sequences, categ_labels, 0.1)

    # train model with training data and evaluate with development data
    network.train(train_feature, train_target, dev_feature, dev_target)

    # test model with test model
    loss, accu = network.evaluate(test_feature, test_target, batch_size=512)
    print('The total loss is %s and the total accuracy is %s' % (loss, accu))

    # get the accuracy, precision, recall and f1score

    # get the predicted label
    y = network.inference(test_feature, batch_size=512)
    # get the label from softmax
    y_class = np.array(map(np.argmax, y))
    # get the label from target
    target_class = np.array(map(np.argmax, test_target))
    for category in range(5):
        accuracy, precision, recall, f1score = get_a_p_r_f(
            target_class, y_class, category)
        print(
            'For category %s: the accuracy is %s, the precision is %s, the recall is %s and the f1score is %s'
            % (category, accuracy, precision, recall, f1score))
Esempio n. 10
0
        batch_size=args.batch_size,
        device='cuda',
        sort=False)

    print("Building Model...")
    if args.model == "cnn":
        model = TextCNN(vocab_size=len(REVIEW.vocab),
                        embedding_size=args.embedding_size,
                        hidden_size=args.hidden_size,
                        filter_sizes=[3, 4, 5],
                        dropout=args.dropout,
                        pretrain_wordvector=REVIEW.vocab.vectors)
    elif args.model == "bilstm":
        model = BiLSTM(vocab_size=len(REVIEW.vocab),
                       embedding_size=args.embedding_size,
                       hidden_size=args.hidden_size,
                       dropout=args.dropout,
                       pretrained_wordvector=REVIEW.vocab.vectors)
    elif args.model == "bilstm_attn":
        model = BiLSTMAttn(vocab_size=len(REVIEW.vocab),
                           embedding_size=args.embedding_size,
                           hidden_size=args.hidden_size,
                           dropout=args.dropout,
                           pretrained_wordvector=REVIEW.vocab.vectors)
    else:
        raise ValueError(
            "Model should be cnn, bilstm or bilstm_attn, {} is invalid.".
            format(args.model))

    if torch.cuda.is_available():
        model = model.cuda()
def evaluate():

    config_path = sys.argv[1]
    with codecs.open(config_path, encoding='utf8') as fp:
        config = json.loads(fp.read().strip())

    model_name = config['model_name']
    print(model_name)
    if model_name == 'BiGRU':
        network = BiGRU()
    elif model_name == 'BiLSTM':
        network = BiLSTM()
    elif model_name == 'BasicCNN':
        network = BasicCNN()
    elif model_name == 'BiLstmPool':
        network = BiLstmPool()
    elif model_name == 'BiGruPool':
        network = BiGruPool()
    elif model_name == 'BiGruConv':
        network = BiGruConv()
    elif model_name == 'BiLstmConv':
        network = BiLstmConv()
    elif model_name == 'BiLstmConv3':
        network = BiLstmConv3()
    else:
        print("What the F**K!")
        return

    with open(config['word_index_path'], 'r') as fp:
        word_index = json.load(fp)

    with open(config['filter_json_path'], 'r') as fp:
        filter_json = json.load(fp)

    network.nb_words = min(len(word_index),
                           config['network_config']['num_words']) + 1

    network.set_optimizer(
        optimizer_name=config['network_config']['optimizer_name'],
        lr=config['network_config']['lr'])

    network.build()

    paded_sequences, labels, _, _ = get_data(
        config['data_path'],
        filter_json=filter_json,
        num_words=config['network_config']['num_words'],
        maxlen=config['network_config']['maxlen'])

    categ_labels = to_categorical(labels)

    _, _, _, _, test_feature, test_target = split_data(paded_sequences,
                                                       categ_labels, 0.05)

    model_path = sys.argv[2]
    network.load_model(model_path)

    loss, accu = network.evaluate(test_feature, test_target, batch_size=512)
    print('The total loss is %s and the total accuracy is %s' % (loss, accu))
    y = network.inference(test_feature, batch_size=512)
    y_class = np.array(map(np.argmax, y))
    target_class = np.array(map(np.argmax, test_target))
    for category in range(5):
        accuracy, precision, recall, f1score = get_a_p_r_f(
            target_class, y_class, category)
        print(
            'For category %s: the accuracy is %s, the precision is %s, the recall is %s and the f1score is %s'
            % (category, accuracy, precision, recall, f1score))