print() string_labels = list(map(lambda x: label_to_cat[x], dataset.labels)) print(f"Labels distribution: {Counter(string_labels)}") print() print(f"Dataset size: {len(dataset)}") print() LABELS_TO_ID = dataset.label_to_id print(f"Labels mapping: {LABELS_TO_ID}") print() metrics = {"validation": [AccuracyMeter]} model_config = config.SenseModelParameters(model_name=args.config_name, hidden_size=args.hidden_size, num_classes=len(LABELS_TO_ID), freeze_weights=False, context_layers=(-1, )) configuration = config.Configuration( model_parameters=model_config, model=args.model, save_path=args.save_path, sequence_max_len=args.seq_len, batch_size=args.batch_size, epochs=args.epochs, device=torch.device(args.device), tokenizer=transformers.AutoTokenizer.from_pretrained(args.model), ) """ model = TransformerWrapper.load_pretrained(
type=str, dest="direction", default="minimize") #parser.add_argument('--n_splits', type=int, dest="n_splits", default=3, required=False) #parser.add_argument('--fold', type=int, dest="fold", default=0, required=True) args = parser.parse_args() random.seed(43) metrics = {"training": [AccuracyMeter], "validation": [AccuracyMeter]} model_config = config.SenseModelParameters( model_name=args.config_name, hidden_size=args.hidden_size, num_classes=len(CATEGORIES), use_pretrained_embeddings=args.use_pretrained_embeddings, freeze_weights=False, context_layers=(-1, )) configuration = config.Configuration( model_parameters=model_config, model=args.model, save_path=args.save_path, sequence_max_len=args.seq_len, dropout_prob=args.dropout, lr=args.lr, batch_size=args.batch_size, epochs=args.epochs, device=torch.device(args.device), tokenizer=transformers.AutoTokenizer.from_pretrained(args.model),
args = parser.parse_args() processor = dataset.WicProcessor() train_dataset = processor.build_dataset(args.train_path, args.gold_train_path) valid_dataset = processor.build_dataset(args.valid_path, args.gold_valid_path) train_data_loader = dataset.WiCDataLoader.build_batches( train_dataset, args.batch_size) valid_data_loader = dataset.WiCDataLoader.build_batches( valid_dataset, args.batch_size) model_config = config.SenseModelParameters( model_name=args.config_name, hidden_size=args.hidden_size, num_classes=2, use_pretrained_embeddings=args.use_pretrained_embeddings, freeze_weights=args.freeze_weights, context_layers=(-1, -2, -3, -4)) configuration = config.Configuration( model_parameters=model_config, model=args.model, save_path=args.save_path, sequence_max_len=args.seq_len, dropout_prob=args.dropout, lr=args.lr, batch_size=args.batch_size, epochs=args.epochs, device=torch.device(args.device), embedding_map=config.CONFIG.embedding_map,
default="standard") parser.add_argument('--sense_features', type=bool, dest="senses_as_features", default=False) POOLING_STRATEGIES = { "standard": SequencePoolingStrategy, "sense": WordSensePoolingStrategy } args = parser.parse_args() model_config = config.SenseModelParameters( model_name=args.config_name, hidden_size=args.hidden_size, num_classes=len(LABELS_MAPPING), use_pretrained_embeddings=args.use_pretrained_embeddings) configuration = config.WordModelConfiguration( model_parameters=model_config, model=args.model, save_path=args.save_path, sequence_max_len=args.seq_len, dropout_prob=args.dropout, lr=args.lr, batch_size=args.batch_size, epochs=args.epochs, device=torch.device(args.device), embedding_map=None, bnids_map=None,
#save_file(train_data_loader, "../dataset/cached/", "train_jp-pawsx-16-softmax") #save_file(valid_data_loader, "../dataset/cached/", "valid_jp-pawsx-16-softmax") metrics = ({ "training": [AccuracyMeter], "validation": [AccuracyMeter] } if args.loss == "softmax" else { "training": [SimilarityAveragePrecisionMeter, SimilarityAccuracyMeter], "validation": [SimilarityAveragePrecisionMeter, SimilarityAccuracyMeter] }) model_config = config.SenseModelParameters( model_name=args.config_name, hidden_size=args.hidden_size, freeze_weights=args.freeze_weights, context_layers=(-1, )) configuration = config.Configuration( model_parameters=model_config, model=args.model, save_path=args.save_path, sequence_max_len=args.seq_len, dropout_prob=args.dropout, lr=args.lr, batch_size=args.batch_size, epochs=args.epochs, device=torch.device(args.device), tokenizer=transformers.AutoTokenizer.from_pretrained(args.model), )