def test_tuner_and_logging(self):
        Xs, Ys = self.single_problem

        # Set up RandomSearchTuner
        tuner = RandomSearchTuner(EndModel, log_writer_class=LogWriter)

        # Run the search
        init_kwargs = {
            "seed": 1,
            "input_batchnorm": False,
            "middle_batchnorm": False,
            "layer_out_dims": [2, 10, 2],
            "verbose": False,
        }
        search_space = {"middle_dropout": [0.0, 1.0]}
        tuner.search(
            search_space,
            (Xs[1], Ys[1]),
            init_kwargs=init_kwargs,
            train_args=[(Xs[0], Ys[0])],
            train_kwargs={"n_epochs": 10},
            verbose=False,
        )

        # Load the log
        with open(tuner.report_path, "r") as f:
            tuner_report = json.load(f)

        # Confirm that when input dropout = 1.0, score tanks, o/w does well
        # - Tuner statistics at index 1 has dropout = 1, and 0 at index 0
        self.assertLess(tuner_report[1]["score"], 0.65)
        self.assertGreater(tuner_report[0]["score"], 0.95)

        # Clean up
        rmtree(tuner.log_rootdir)
예제 #2
0
def train_model(args):

    #global args
    #args = parser.parse_args()

    hidden_size = 128
    num_classes = 2
    encode_dim = 108  # using get_frm_output_size()

    L, Y = load_labels(args)

    # Label Model
    # labelling functions analysis
    print(lf_summary(L["dev"], Y=Y["dev"]))

    # majority vote of LFs
    mv = MajorityLabelVoter(seed=123)
    print('Majority Label Voter Metrics:')
    mv.score((L["dev"], Y["dev"]),
             metric=['accuracy', 'precision', 'recall', 'f1'])

    # training label model
    #label_model = LabelModel(k=num_classes, seed=123)
    #label_model.train_model(L["train"], Y["dev"], n_epochs = 500, log_train_every = 50)

    # evaluating label model
    #print('Trained Label Model Metrics:')
    #label_model.score((L["dev"], Y["dev"]), metric=['accuracy','precision', 'recall', 'f1'])

    print('Performing Hyperparameter Search:')
    train_args = [L["train"], Y["dev"]]
    train_kwargs = {}
    init_args = [num_classes]
    init_kwargs = {
        "optimizer": "sgd",
        #"input_batchnorm": True,
        #"use_cuda":torch.cuda.is_available(),
        'seed': 123
    }

    search_space = {
        'seed': [123],
        'n_epochs': [500],
        'learn_class_balance': [False, True],
        'lr': {
            'range': [1e-2, 1e-1],
            'scale': 'log'
        },
        'momentum': {
            'range': [0.7, 0.95],
            'scale': 'log'
        },
        #'l2':{'range': [1e-5, 1e-3], 'scale': 'log'},
        'log_train_every': [50],
        #'validation_metric': 'accuracy',
    }

    log_config = {"log_dir": "./run_logs", "run_name": 'oc_label_model'}

    max_search = 25
    tuner_config = {"max_search": max_search}

    validation_metric = 'accuracy'

    # Set up logger and searcher
    tuner = RandomSearchTuner(
        LabelModel,
        #**log_config,
        #log_writer_class=TensorBoardWriter,
        validation_metric=validation_metric,
        seed=1701)

    disc_model = tuner.search(search_space,
                              valid_data=(L["dev"], Y["dev"]),
                              train_args=train_args,
                              init_args=init_args,
                              init_kwargs=init_kwargs,
                              train_kwargs=train_kwargs,
                              max_search=tuner_config["max_search"],
                              clean_up=False)

    print('Trained Label Model Metrics:')
    disc_model.score((L["dev"], Y["dev"]),
                     metric=['accuracy', 'precision', 'recall', 'f1'])

    Ytrain_p = disc_model.predict_proba(L["train"])
예제 #3
0
def train_model(args):

    #global args
    #args = parser.parse_args()

    hidden_size = 128
    num_classes = 2
    encode_dim = 1000  # using get_frm_output_size()

    L, Y = load_labels(args)

    # creating 1 split
    data_list, Ytrain, Ydev = cv_1_split(args, Y["dev"])
    Y["train"] = Ytrain
    Y["dev"] = Ydev

    # End Model
    # Create datasets and dataloaders
    train, dev, test = load_dataset(args, data_list, Y)
    data_loader = get_data_loader(train, dev, test, args.batch_size,
                                  args.num_workers)
    #print(len(data_loader["train"])) # 18850 / batch_size
    #print(len(data_loader["dev"])) # 1500 / batch_size
    #print(len(data_loader["test"])) # 1000 / batch_size
    #import ipdb; ipdb.set_trace()

    # Define input encoder
    cnn_encoder = FrameEncoderOC

    if (torch.cuda.is_available()):
        device = 'cuda'
    else:
        device = 'cpu'
    #import ipdb; ipdb.set_trace()

    # Define LSTM module
    lstm_module = LSTMModule(
        encode_dim,
        hidden_size,
        bidirectional=False,
        verbose=False,
        lstm_reduction="attention",
        encoder_class=cnn_encoder,
    )

    train_args = [data_loader["train"]]

    train_kwargs = {
        'seed': args.seed,
        'progress_bar': True,
        'log_train_every': 1
    }

    init_args = [[hidden_size, num_classes]]

    init_kwargs = {
        "input_module": lstm_module,
        "optimizer": "adam",
        "verbose": False,
        "input_batchnorm": True,
        "use_cuda": torch.cuda.is_available(),
        #'checkpoint_dir':args.checkpoint_dir,
        'checkpoint': False,
        'checkpoint_best': False,
        #'log_valid_metrics':["accuracy"],
        'checkpoint_every': -1,
        'seed': args.seed,
        'device': device,
        #'task_metrics':["accuracy"]
    }

    search_space = {
        'n_epochs': [1],
        'batchnorm': [True],
        'dropout': [0.1, 0.25, 0.4],
        'lr': {
            'range': [1e-3, 1e-2],
            'scale': 'log'
        },
        'l2': {
            'range': [1e-5, 1e-4],
            'scale': 'log'
        },  #[ 1.21*1e-5],
        'checkpoint_metric': ['accuracy'],
    }

    log_config = {"log_dir": "./run_logs", "run_name": 'cnn_lstm_oc'}

    max_search = 2
    tuner_config = {"max_search": max_search}

    validation_metric = 'accuracy'

    # Set up logger and searcher
    tuner = RandomSearchTuner(EndModel,
                              **log_config,
                              log_writer_class=TensorBoardWriter,
                              validation_metric=validation_metric,
                              seed=1701)

    disc_model = tuner.search(
        search_space,
        valid_data=data_loader["dev"],
        train_args=train_args,
        init_args=init_args,
        init_kwargs=init_kwargs,
        train_kwargs=train_kwargs,
        max_search=tuner_config["max_search"],
        clean_up=False,
    )

    # evaluate end model
    disc_model.score(data_loader["dev"],
                     verbose=True,
                     metric=['accuracy', 'precision', 'recall', 'f1'])
예제 #4
0
def train_model(args):

    #global args
    #args = parser.parse_args()

	hidden_size = 128 
	num_classes = 2
	encode_dim = 1000 # using get_frm_output_size()

	L,Y = load_labels(args) 

	# Label Model
	# labelling functions analysis
	print(lf_summary(L["dev"], Y = Y["dev"]))

	# training label model
	label_model = LabelModel(k=num_classes, seed=123)
	label_model.train_model(L["train"], Y["dev"], n_epochs = 500, log_train_every = 50)

	# evaluating label model
	print('Trained Label Model Metrics:')
	label_model.score((L["dev"], Y["dev"]), metric=['accuracy','precision', 'recall', 'f1'])

	# comparison with majority vote of LFs
	mv = MajorityLabelVoter(seed=123)
	print('Majority Label Voter Metrics:')
	mv.score((L["dev"], Y["dev"]), metric=['accuracy','precision', 'recall', 'f1'])

	Ytrain_p = label_model.predict_proba(L["train"])
	#print(Ytrain_ps.shape) #(377*50,2)
	#Ydev_p = label_model.predict_proba(L["dev"])

	# test models
	#label_model.score((Ltest,Ytest), metric=['accuracy','precision', 'recall', 'f1'])

	# End Model
	# Create datasets and dataloaders
	train, dev, test = load_dataset(args, Ytrain_p, Y["dev"], Y["test"])
	data_loader = get_data_loader(train, dev, test, args.batch_size, args.num_workers)
	#print(len(data_loader["train"])) # 18850 / batch_size
	#print(len(data_loader["dev"])) # 1500 / batch_size
	#print(len(data_loader["test"])) # 1000 / batch_size 
	#import ipdb; ipdb.set_trace()

	# Define input encoder
	cnn_encoder = FrameEncoderOC

	if(torch.cuda.is_available()):
		device = 'cuda'
	else:
		device = 'cpu'
	#import ipdb; ipdb.set_trace()

	# Define LSTM module
	lstm_module = LSTMModule(
		encode_dim,
		hidden_size,
		bidirectional=False,
		verbose=False,
		lstm_reduction="attention",
		encoder_class=cnn_encoder,
		)

	train_args = [data_loader["train"]]

	train_kwargs = {
	'seed':args.seed,
	'progress_bar':True,
	'log_train_every':1}

	init_args = [
	[hidden_size, num_classes]
	]

	init_kwargs = {
	"input_module": lstm_module, 
	"optimizer": "adam",
	"verbose": False,
	"input_batchnorm": True,
	"use_cuda":torch.cuda.is_available(),
	'checkpoint_dir':args.checkpoint_dir,
	'seed':args.seed,
	'device':device}
	
	search_space = {
	'n_epochs':[10],
	'batchnorm':[True],
	'dropout': [0.1,0.25,0.4],
	'lr':{'range': [1e-3, 1e-2], 'scale': 'log'}, 
	'l2':{'range': [1e-5, 1e-4], 'scale': 'log'},#[ 1.21*1e-5],
	#'checkpoint_metric':['f1'],
	}	
	
	log_config = {
	"log_dir": "./run_logs", 
	"run_name": 'cnn_lstm_oc'
	}

	max_search = 5
	tuner_config = {"max_search": max_search }

	validation_metric = 'accuracy'

	# Set up logger and searcher
	tuner = RandomSearchTuner(EndModel, 
	**log_config,
	log_writer_class=TensorBoardWriter,
	validation_metric=validation_metric,
	seed=1701)
	
	disc_model = tuner.search(
	search_space,
	valid_data = data_loader["dev"],
	train_args=train_args,
	init_args=init_args,
	init_kwargs=init_kwargs,
	train_kwargs=train_kwargs,
	max_search=tuner_config["max_search"],
	clean_up=False,
	)

	# evaluate end model
	disc_model.score(data_loader["dev"], verbose=True, metric=['accuracy','precision', 'recall', 'f1'])
예제 #5
0
    def test_tuner_with_lstm(self):
        """Test basic functionality *and* determinism/seeding of the tuner
        with a more complex EndModel having an input module"""
        # From tests/metal/modules/test_lstm.py; TODO: Refactor this
        n = 1000
        SEQ_LEN = 5
        MAX_INT = 8
        X = torch.randint(1, MAX_INT + 1, (n, SEQ_LEN)).long()
        Y = torch.zeros(n).long()
        needles = np.random.randint(1, SEQ_LEN - 1, n)
        for i in range(n):
            X[i, needles[i]] = MAX_INT + 1
            Y[i] = X[i, needles[i] + 1]
        Xs = [X[:800], X[800:900], X[900:]]
        Ys = [Y[:800], Y[800:900], Y[900:]]

        embed_size = 4
        hidden_size = 10

        # Set up RandomSearchTuner
        tuner = RandomSearchTuner(
            EndModel,
            module_classes={"input_module": LSTMModule},
            log_writer_class=LogWriter,
            seed=123,
        )

        # EndModel init kwargs
        init_kwargs = {
            "seed": 123,
            "batchnorm": True,
            "k": MAX_INT,
            "layer_out_dims": [hidden_size * 2, MAX_INT],
            "input_batchnorm": True,
            "verbose": False,
        }

        # LSTMModule args & kwargs
        module_args = {}
        module_args["input_module"] = (embed_size, hidden_size)
        module_kwargs = {}
        module_kwargs["input_module"] = {
            "seed": 123,
            "bidirectional": True,
            "verbose": False,
            "lstm_reduction": "attention",
            "encoder_class": EmbeddingsEncoder,
            "encoder_kwargs": {"vocab_size": MAX_INT + 2},
        }

        # Set up search space
        # NOTE: No middle layers here, so these should return the same scores!
        search_space = {"middle_dropout": [0.0, 1.0]}

        # Run random grid search
        tuner.search(
            search_space,
            (Xs[1], Ys[1]),
            init_kwargs=init_kwargs,
            train_args=[(Xs[0], Ys[0])],
            train_kwargs={"n_epochs": 2},
            module_args=module_args,
            module_kwargs=module_kwargs,
            verbose=False,
        )

        # Load the log
        with open(tuner.report_path, "r") as f:
            tuner_report = json.load(f)

        # Confirm determinism
        self.assertEqual(tuner_report[0]["score"], tuner_report[1]["score"])

        # Clean up
        rmtree(tuner.log_rootdir)
예제 #6
0
def train_model(args):

    #global args
    #args = parser.parse_args()

    # Create datasets and dataloaders
    train, dev, test, classes = load_dataset(args)
    #print('train size:',len(train)) # 106
    #print('dev size:',len(dev)) # 216
    #print('test size:',len(test)) # 90
    # data in tuple of the form (series,label)
    # series shape [30,3,32,32]

    train_loader, dev_loader, test_loader = data_loader(
        train, dev, test, args.batch_size)

    hidden_size = 128
    num_classes = 2
    encode_dim = 132  # using get_frm_output_size()

    # Define input encoder
    cnn_encoder = FrameEncoderBAV

    # Define LSTM module
    lstm_module = LSTMModule(
        encode_dim,
        hidden_size,
        bidirectional=False,
        verbose=False,
        lstm_reduction="attention",
        encoder_class=cnn_encoder,
    )

    #import ipdb; ipdb.set_trace()

    if (torch.cuda.is_available()):
        device = 'cuda'
    else:
        device = 'cpu'

    train_args = [train_loader]

    train_kwargs = {
        'seed': 123,
        'log_train_every': 1,
        'checkpoint_metric': 'f1',
        'log_valid_metrics': ['accuracy', 'f1']
    }

    init_args = [[hidden_size, num_classes]]

    init_kwargs = {
        "input_module": lstm_module,
        "optimizer": "adam",
        "verbose": False,
        "input_batchnorm": True,
        #"use_cuda":torch.cuda.is_available(),
        'device': device,
        'seed': 123
    }
    '''
	search_space = {
	'seed' : [123],
	'n_epochs': [5],
	'batchnorm' : [True],
	'dropout': [0],
	'lr': [1e-3],
	'log_train_every': 1,
	'validation_metric': 'f1',
	}
	search_space = {
	'seed' : [123],
	'n_epochs': [30],
	'batchnorm' : [True, False],
	'dropout': [0.1,0.25,0.5],
	'lr': {'range': [1e-3, 1], 'scale': 'log'},
	'l2':{'range': [1e-5, 1e-3], 'scale': 'log'},
	'log_train_every': 1,
	'loss_weights':[[0.2,0.8],[0.4,0.6],[0.6,0.4],[0.8,0.2]],
	#'validation_metric': ['f1'],
	'validation_metric':[['roc-auc', 'accuracy', 'precision', 'recall', 'f1']],
	}
	'''
    search_space = {
        'n_epochs': [100],
        'batchnorm': [True],
        'dropout': [0.1, 0.25, 0.4],
        'lr': {
            'range': [1e-3, 1e-1],
            'scale': 'log'
        },
        'l2': {
            'range': [1e-5, 1e-3],
            'scale': 'log'
        },  #[ 1.21*1e-5],
        'loss_weights': [[0.04, 0.96]],
    }

    log_config = {"log_dir": "./run_logs", "run_name": 'cnn_lstm_bav'}

    max_search = 5
    tuner_config = {"max_search": max_search}

    validation_metric = 'f1'

    # Set up logger and searcher
    tuner = RandomSearchTuner(EndModel,
                              **log_config,
                              log_writer_class=TensorBoardWriter,
                              validation_metric=validation_metric,
                              seed=1701)

    disc_model = tuner.search(
        search_space,
        valid_data=dev_loader,
        train_args=train_args,
        init_args=init_args,
        init_kwargs=init_kwargs,
        train_kwargs=train_kwargs,
        max_search=tuner_config["max_search"],
        clean_up=False,
    )