コード例 #1
0
def getModel(args):

    if args.model == "rnn":
        model = RNN(input_dim=args.input_dims,
                    nclasses=args.nclasses,
                    hidden_dims=args.hidden_dims,
                    num_rnn_layers=args.num_layers,
                    dropout=args.dropout,
                    bidirectional=True)

    if args.model == "msresnet":
        model = MSResNet(input_channel=args.input_dims,
                         layers=[1, 1, 1, 1],
                         num_classes=args.nclasses,
                         hidden_dims=args.hidden_dims)

    if args.model == "tempcnn":
        model = TempCNN(input_dim=args.input_dims,
                        nclasses=args.nclasses,
                        sequence_length=args.samplet,
                        hidden_dims=args.hidden_dims,
                        kernel_size=args.kernel_size)

    elif args.model == "transformer":

        hidden_dims = args.hidden_dims  # 256
        n_heads = args.n_heads  # 8
        n_layers = args.n_layers  # 6
        len_max_seq = args.samplet
        dropout = args.dropout
        d_inner = hidden_dims * 4

        model = TransformerEncoder(in_channels=args.input_dims,
                                   len_max_seq=len_max_seq,
                                   d_word_vec=hidden_dims,
                                   d_model=hidden_dims,
                                   d_inner=d_inner,
                                   n_layers=n_layers,
                                   n_head=n_heads,
                                   d_k=hidden_dims // n_heads,
                                   d_v=hidden_dims // n_heads,
                                   dropout=dropout,
                                   nclasses=args.nclasses)

    if torch.cuda.is_available():
        model = model.cuda()

    pytorch_total_params = sum(p.numel() for p in model.parameters())
    print("initialized {} model ({} parameters)".format(
        args.model, pytorch_total_params))

    return model
コード例 #2
0
ファイル: train.py プロジェクト: ssss1029/code-transformers
def main():

    ####################################################################
    ## Data
    ####################################################################

    all_datasets = []
    for dataroot in args.dataroot:
        curr_dataset = BinaryDataset(root_dir=dataroot,
                                     binary_format='elf',
                                     targets=args.targets,
                                     mode='random-chunks',
                                     chunk_length=args.sequence_len)
        all_datasets.append(curr_dataset)

    train_data = torch.utils.data.ConcatDataset(all_datasets)
    logging.info("Train dataset len() = {0}".format(len(train_data)))
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=2)

    val_datasets = []
    for dataroot in args.val_dataroot:
        curr_dataset = BinaryDataset(root_dir=dataroot,
                                     binary_format='elf',
                                     targets=args.targets,
                                     mode='random-chunks',
                                     chunk_length=args.sequence_len)
        val_datasets.append(curr_dataset)

    val_data = torch.utils.data.ConcatDataset(val_datasets)
    logging.info("Validation dataset len() = {0}".format(len(val_data)))
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=args.batch_size,
                                             shuffle=True,
                                             num_workers=2)

    ####################################################################
    ## Model
    ####################################################################

    if args.targets == 'start' or args.targets == 'end':
        num_classes = 2
    elif args.targets == 'both':
        # TODO: Make sure if this really is 4 or if it is only 3 in practice
        num_classes = 4
    else:
        raise NotImplementedError()

    # Define model
    # For now, embedding dimension = hidden dimension

    if args.arch == 'gru':
        gru = torch.nn.GRU(input_size=args.hidden_size,
                           hidden_size=args.hidden_size,
                           num_layers=args.num_layers,
                           bias=True,
                           batch_first=True,
                           bidirectional=True)

        embedder = torch.nn.Embedding(num_embeddings=256,
                                      embedding_dim=args.hidden_size)

        model = RNN(rnn=gru, embedder=embedder, output_size=num_classes).cuda()
    elif args.arch == 'bert':
        config = BertConfig(
            vocab_size=256,
            hidden_size=args.hidden_size,
            num_hidden_layers=args.num_layers,
            num_attention_heads=args.num_attn_heads,
            intermediate_size=args.hidden_size *
            4,  # BERT originally uses 4x hidden size for this, so copying that. 
            hidden_act='gelu',
            hidden_dropout_prob=0.1,
            attention_probs_dropout_prob=0.1,
            max_position_embeddings=args.sequence_len,  # Sequence length max 
            type_vocab_size=1,
            initializer_range=0.02,
            layer_norm_eps=1e-12,
            pad_token_id=0,
            gradient_checkpointing=False,
            num_labels=num_classes)

        model = BertForTokenClassification(config=config).cuda()
    else:
        raise NotImplementedError()

    if args.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    elif args.optimizer == 'rmsprop':
        optimizer = torch.optim.RMSprop(model.parameters(),
                                        lr=args.lr,
                                        alpha=0.99,
                                        eps=1e-08,
                                        weight_decay=0,
                                        momentum=0,
                                        centered=False)
    else:
        raise NotImplementedError()

    if args.lr_scheduler == 'cosine':

        def cosine_annealing(step, total_steps, lr_max, lr_min):
            return lr_min + (lr_max - lr_min) * 0.5 * (
                1 + np.cos(step / total_steps * np.pi))

        scheduler = torch.optim.lr_scheduler.LambdaLR(
            optimizer,
            lr_lambda=lambda step: cosine_annealing(
                step,
                args.epochs * len(train_loader),
                1,  # since lr_lambda computes multiplicative factor
                1e-6 / (args.lr * args.batch_size / 256.)))
    elif args.lr_scheduler == 'none':
        scheduler = None
    else:
        raise NotImplementedError()

    with open(os.path.join(args.savedir, 'training_log.csv'), 'w') as f:
        f.write('epoch,train_loss,train_f1_average,val_loss,val_f1_average\n')

    logging.info("Beginning training")
    for epoch in range(args.epochs):
        train_loss_avg, train_f1_avg = train(model, optimizer, scheduler,
                                             train_loader, epoch, num_classes)

        val_loss_avg, val_f1_avg = validate(model, val_loader, num_classes)

        # torch.save(
        #     model.state_dict(),
        #     os.path.join(save_dir, "model.pth")
        # )

        # TODO: Save results and model

        with open(os.path.join(args.savedir, 'training_log.csv'), 'a') as f:
            f.write('%03d,%0.5f,%0.5f,%0.5f,%0.5f\n' %
                    ((epoch + 1), train_loss_avg, train_f1_avg, val_loss_avg,
                     val_f1_avg))
コード例 #3
0
ファイル: run.py プロジェクト: sailfish009/medal
            dropout_rate=args.dropout,
        )
    elif MODEL_TYPE == "electra":
        net = Electra(
            output_size=len(label_to_ix),
            device=DEVICE,
        )
    print('model: {}'.format(net))
    if USE_PRETRAIN:
        net = load_model(net, args.pretrained_model, DEVICE)
    if torch.cuda.device_count() > 1:
        net.to(DEVICE)
        print("Using", torch.cuda.device_count(), "GPUs")
        net = nn.DataParallel(net)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), args.lr)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.2, patience=8) \
        if args.use_scheduler else None

    # Create save directory
    time_stamp = time.strftime("%m-%d-%H-%M", time.localtime())
    save_dir = os.path.join(EXPERIMENT_DIR, time_stamp)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # Save configs
    model_desc_output = [
        ": ".join([str(k), str(v)]) for k, v in vars(args).items()
    ]
    with open(os.path.join(save_dir, 'configs.txt'), 'w') as file:
        file.writelines("\n".join(model_desc_output))