예제 #1
0
    print()
    string_labels = list(map(lambda x: label_to_cat[x], dataset.labels))
    print(f"Labels distribution: {Counter(string_labels)}")
    print()

    print(f"Dataset size: {len(dataset)}")
    print()
    LABELS_TO_ID = dataset.label_to_id
    print(f"Labels mapping: {LABELS_TO_ID}")
    print()

    metrics = {"validation": [AccuracyMeter]}

    model_config = config.SenseModelParameters(model_name=args.config_name,
                                               hidden_size=args.hidden_size,
                                               num_classes=len(LABELS_TO_ID),
                                               freeze_weights=False,
                                               context_layers=(-1, ))

    configuration = config.Configuration(
        model_parameters=model_config,
        model=args.model,
        save_path=args.save_path,
        sequence_max_len=args.seq_len,
        batch_size=args.batch_size,
        epochs=args.epochs,
        device=torch.device(args.device),
        tokenizer=transformers.AutoTokenizer.from_pretrained(args.model),
    )
    """
    model = TransformerWrapper.load_pretrained(
예제 #2
0
                        type=str,
                        dest="direction",
                        default="minimize")
    #parser.add_argument('--n_splits', type=int, dest="n_splits", default=3, required=False)
    #parser.add_argument('--fold', type=int, dest="fold", default=0, required=True)

    args = parser.parse_args()

    random.seed(43)

    metrics = {"training": [AccuracyMeter], "validation": [AccuracyMeter]}

    model_config = config.SenseModelParameters(
        model_name=args.config_name,
        hidden_size=args.hidden_size,
        num_classes=len(CATEGORIES),
        use_pretrained_embeddings=args.use_pretrained_embeddings,
        freeze_weights=False,
        context_layers=(-1, ))

    configuration = config.Configuration(
        model_parameters=model_config,
        model=args.model,
        save_path=args.save_path,
        sequence_max_len=args.seq_len,
        dropout_prob=args.dropout,
        lr=args.lr,
        batch_size=args.batch_size,
        epochs=args.epochs,
        device=torch.device(args.device),
        tokenizer=transformers.AutoTokenizer.from_pretrained(args.model),
예제 #3
0
    args = parser.parse_args()

    processor = dataset.WicProcessor()
    train_dataset = processor.build_dataset(args.train_path,
                                            args.gold_train_path)
    valid_dataset = processor.build_dataset(args.valid_path,
                                            args.gold_valid_path)
    train_data_loader = dataset.WiCDataLoader.build_batches(
        train_dataset, args.batch_size)
    valid_data_loader = dataset.WiCDataLoader.build_batches(
        valid_dataset, args.batch_size)

    model_config = config.SenseModelParameters(
        model_name=args.config_name,
        hidden_size=args.hidden_size,
        num_classes=2,
        use_pretrained_embeddings=args.use_pretrained_embeddings,
        freeze_weights=args.freeze_weights,
        context_layers=(-1, -2, -3, -4))

    configuration = config.Configuration(
        model_parameters=model_config,
        model=args.model,
        save_path=args.save_path,
        sequence_max_len=args.seq_len,
        dropout_prob=args.dropout,
        lr=args.lr,
        batch_size=args.batch_size,
        epochs=args.epochs,
        device=torch.device(args.device),
        embedding_map=config.CONFIG.embedding_map,
예제 #4
0
                        default="standard")
    parser.add_argument('--sense_features',
                        type=bool,
                        dest="senses_as_features",
                        default=False)

    POOLING_STRATEGIES = {
        "standard": SequencePoolingStrategy,
        "sense": WordSensePoolingStrategy
    }

    args = parser.parse_args()

    model_config = config.SenseModelParameters(
        model_name=args.config_name,
        hidden_size=args.hidden_size,
        num_classes=len(LABELS_MAPPING),
        use_pretrained_embeddings=args.use_pretrained_embeddings)

    configuration = config.WordModelConfiguration(
        model_parameters=model_config,
        model=args.model,
        save_path=args.save_path,
        sequence_max_len=args.seq_len,
        dropout_prob=args.dropout,
        lr=args.lr,
        batch_size=args.batch_size,
        epochs=args.epochs,
        device=torch.device(args.device),
        embedding_map=None,
        bnids_map=None,
예제 #5
0
    #save_file(train_data_loader, "../dataset/cached/", "train_jp-pawsx-16-softmax")
    #save_file(valid_data_loader, "../dataset/cached/", "valid_jp-pawsx-16-softmax")

    metrics = ({
        "training": [AccuracyMeter],
        "validation": [AccuracyMeter]
    } if args.loss == "softmax" else {
        "training": [SimilarityAveragePrecisionMeter, SimilarityAccuracyMeter],
        "validation":
        [SimilarityAveragePrecisionMeter, SimilarityAccuracyMeter]
    })

    model_config = config.SenseModelParameters(
        model_name=args.config_name,
        hidden_size=args.hidden_size,
        freeze_weights=args.freeze_weights,
        context_layers=(-1, ))

    configuration = config.Configuration(
        model_parameters=model_config,
        model=args.model,
        save_path=args.save_path,
        sequence_max_len=args.seq_len,
        dropout_prob=args.dropout,
        lr=args.lr,
        batch_size=args.batch_size,
        epochs=args.epochs,
        device=torch.device(args.device),
        tokenizer=transformers.AutoTokenizer.from_pretrained(args.model),
    )