from datasets.conll2003 import Conll2003Dataset from models.bilstm_crf.bilstm_crf import BilstmCRF if __name__ == "__main__": device = torch.device("cpu" if not torch.cuda.is_available() else "cuda") batch_size = 200 print(f"Device: {device}") models_dir = 'experiments/models/saves/' # ----------------------------------- Prepare dataset ------------------------------------ TASK_TYPE = 'NER' LABELED_UNLABELED_RATIO = 0.1 ID_sim_beta = 1 dataset = Conll2003Dataset( save_file_path='datasets/saves/conll2003NER.pkl', task='NER') train_dataset, test_dataset = dataset.train_test_split() ltrain_dataset, utrain_dataset = train_dataset.split( LABELED_UNLABELED_RATIO) labeled_train_x, labeled_train_y = ltrain_dataset.x_embeddings, ltrain_dataset.y unlabeled_train_x, unlabeled_train_y = utrain_dataset.x_embeddings, utrain_dataset.y test_dataset_x, test_dataset_y = test_dataset.x_embeddings, test_dataset.y print(f"labelled size: {len(labeled_train_y)}", f"Unlabelled size: {len(unlabeled_train_y)}", f"test size: {len(test_dataset_y)})") # ------------------------------------ create model --------------------------------------- dropout_rate = 0.5 model = BilstmCRF(dataset.max_word_idx + 1, dataset.max_tag_idx + 1, 300,
from active_learning.utils import active_learning_loop_limited_tokens from datasets.conll2003 import Conll2003Dataset from models.supervised.supervised_mc import SupervisedModelAL if __name__ == "__main__": device = torch.device("cpu" if not torch.cuda.is_available() else "cuda") print(f"Device: {device}") models_dir = 'experiments/models/saves/' # ----------------------------------- Prepare dataset ------------------------------------ TASK_TYPE = 'NER' if len(sys.argv) < 2 else sys.argv[1] LABELED_UNLABELED_RATIO = 0.1 ID_sim_beta = 1 train_dataset, test_dataset = Conll2003Dataset( task=TASK_TYPE).train_test_split() ltrain_dataset, utrain_dataset = train_dataset.split( LABELED_UNLABELED_RATIO) labeled_train_x, labeled_train_y = ltrain_dataset.x_embeddings, ltrain_dataset.y unlabeled_train_x, unlabeled_train_y = utrain_dataset.x_embeddings, utrain_dataset.y test_dataset_x, test_dataset_y = test_dataset.x_embeddings, test_dataset.y print(f"labelled size: {len(labeled_train_y)}", f"Unlabelled size: {len(unlabeled_train_y)}", f"test size: {len(test_dataset_y)})") # ------------------------------------ create model --------------------------------------- dropout_rate = 0.4 model = SupervisedModelAL(train_dataset.max_tag_idx + 1, 100, 2, dropout_rate, dropout_rate) if device.type == 'cuda':
from datasets.conll2003 import Conll2003Dataset from models.bilstm_crf.bilstm_crf import BilstmCRF if __name__ == "__main__": device = torch.device("cpu" if not torch.cuda.is_available() else "cuda") batch_size = 200 print(f"Device: {device}") models_dir = 'experiments/models/saves/' # ----------------------------------- Prepare dataset ------------------------------------ TASK_TYPE = 'POS' LABELED_UNLABELED_RATIO = 0.1 ID_sim_beta = 1 dataset = Conll2003Dataset( save_file_path='datasets/saves/conll2003POS.pkl', task='POS') train_dataset, test_dataset = dataset.train_test_split() ltrain_dataset, utrain_dataset = train_dataset.split( LABELED_UNLABELED_RATIO) labeled_train_x, labeled_train_y = ltrain_dataset.x, ltrain_dataset.y unlabeled_train_x, unlabeled_train_y = utrain_dataset.x, utrain_dataset.y test_dataset_x, test_dataset_y = test_dataset.x, test_dataset.y print(f"labelled size: {len(labeled_train_y)}", f"Unlabelled size: {len(unlabeled_train_y)}", f"test size: {len(test_dataset_y)})") # ------------------------------------ create model --------------------------------------- dropout_rate = 0.5 model = BilstmCRF(dataset.max_word_idx + 1, dataset.max_tag_idx + 1, 30,