def __init__(self, config_file): self.config_data = load_config_data(config_file) self.train_class = TrainClassifier(self.config_data['subset_config']) self.search_algo = self.get_search_algo( self.config_data['search_algo'], self.config_data['space'], self.config_data['metric'], self.config_data['mode']) # save subset method, to be used in log dir name self.subset_method = self.train_class.configdata['dss_strategy'][ 'type']
class TagsClassifier: @classmethod def __init__(self): self.algorithm = NaiveBayesClassifier self.trainer = TrainClassifier() self.classifier = None def classify(self, text): # if not previously trained then train else classifier = redis().get('nosy:tagsClassifier') self.train() features = self.trainer.feature_extractor(text.split()) result = self.classifier.classify(features) # print self.classifier.show_most_informative_features() return result @classmethod def train(cls): training_set = cls.trainer.train() # TrainClassifier().train() cls.classifier = cls.algorithm.train(training_set) # NaiveBayesClassifier.train(training_set)
class TagsClassifier(): @classmethod def __init__(self): self.algorithm = NaiveBayesClassifier self.trainer = TrainClassifier() self.classifier = None def classify(self, text): # if not previously trained then train else classifier = redis().get('nosy:tagsClassifier') self.train() features = self.trainer.feature_extractor(text.split()) result = self.classifier.classify(features) #print self.classifier.show_most_informative_features() return result @classmethod def train(cls): training_set = cls.trainer.train() #TrainClassifier().train() cls.classifier = cls.algorithm.train( training_set) #NaiveBayesClassifier.train(training_set)
def __init__(self): self.algorithm = NaiveBayesClassifier self.trainer = TrainClassifier() self.classifier = None
args=parser.parse_args() n_classes = 10 n_epochs = 200 pre = Preprocessing('digits') pre.load_data(filename='train.csv', name='train') X_df = pre.get(name='train').drop(columns=['0']) y_df = pre.get(name='train')['0'] dtype = torch.float device = torch.device("cpu") model_name = 'logreg_digits' model = LogReg(model_name, 256, n_classes) learning_rate = 0.0001 batch_size = 32 train_classifier = TrainClassifier(model, X_df, y_df) trained_model , optimizer, criterion, loss_hist, loss_val_hist, best_param = train_classifier.run_train(n_epochs = n_epochs, lr=learning_rate, batch_size=batch_size) pre.save_results(loss_hist, loss_val_hist, f'{model_name}') trained_model.load_state_dict(state_dict=best_param) trained_model.eval() if args.s_model: m_exporter = ModelExporter('digits') m_exporter.save_nn_model(trained_model, optimizer, 0, n_classes, n_epochs, trained_model.get_args())
args=parser.parse_args() pre = Preprocessing('IMDB') n_classes = 2 n_features = int(args.n_feat) n_epochs = 100 pre.load_data(filename=f'training_data_{n_features}.csv', name='training_data') X_df = pre.get(name='training_data').drop(columns=['target']) y_df = pre.get(name='training_data')['target'] model = LogReg('log_reg', n_features, n_classes) train_classifier = TrainClassifier(model, X_df, y_df) trained_model, optimizer, criterion, loss_hist, loss_validate_hist = train_classifier.run_train(n_epochs = n_epochs) pre.save_results(loss_hist, loss_validate_hist, f'log_reg_{100}') m_exporter = ModelExporter('IMDB') m_exporter.save_nn_model(trained_model, optimizer, n_features, n_classes, n_epochs) ##teeeeeest part pre.load_data(filename=f'test_data_{n_features}.csv', name='test_data') X_test_df = pre.get(name='test_data').drop(columns=['target']) y_test_df = pre.get(name='test_data')['target'] dtype = torch.float device = torch.device("cpu")
from train import TrainClassifier config_file = "configs/config_gradmatch_cifar10.py" classifier = TrainClassifier(config_file) classifier.configdata['dss_strategy']['select_every'] = 1 classifier.train()
class HyperParamTuning: def __init__(self, config_file): self.config_data = load_config_data(config_file) self.train_class = TrainClassifier(self.config_data['subset_config']) self.search_algo = self.get_search_algo( self.config_data['search_algo'], self.config_data['space'], self.config_data['metric'], self.config_data['mode']) # save subset method, to be used in log dir name self.subset_method = self.train_class.configdata['dss_strategy'][ 'type'] def param_tune(self, config): #update parameters in config dict new_config = self.update_parameters(self.train_class.configdata, config) self.train_class.configdata = new_config # turn on reporting to ray every time self.train_class.configdata['report_tune'] = True self.train_class.train() def start_eval(self): analysis = tune.run(self.param_tune, num_samples=self.config_data['num_evals'], config=self.config_data['space'], search_alg=self.search_algo, resources_per_trial={'gpu': 1}, local_dir=self.config_data['log_dir'] + self.subset_method + '/', log_to_file=True, name=self.config_data['name'], resume=self.config_data['resume']) best_config = analysis.get_best_config( metric=self.config_data['metric'], mode=self.config_data['mode']) print("Best Config: ", best_config) if self.config_data['final_train']: self.final_train(best_config) def get_search_algo(self, method, space, metric, mode): if method == "hyperopt" or method == "TPE": search = HyperOptSearch(space, metric="mean_accuracy", mode="max") return search def final_train(self, best_params): # change strategy to Full (i.e use whole dataset) # update (optimized) parameters new_config = self.update_parameters(self.train_class.configdata, best_params) self.train_class.configdata = new_config self.train_class.configdata['dss_strategy']['type'] = 'Full' self.train_class.train() def update_parameters(self, config, new_config): # a generic function to update parameters if 'learning_rate' in new_config: config['optimizer']['lr'] = new_config['learning_rate'] if 'optimizer' in new_config: config['optimizer']['type'] = new_config['optimizer'] if 'trn_batch_size' in new_config: config['dataloader']['batch_size'] = new_config['trn_batch_size'] return config
from train import TrainClassifier config_file = "configs/config_gradmatch_cifar10.py" classifier = TrainClassifier(config_file) classifier.configdata['dss_strategy']['select_every'] = 1 classifier.configdata['model']['architecture'] = 'MobileNet2' classifier.configdata['optimizer']['weight_decay'] = 4e-5 classifier.train()