Esempio n. 1
0
 def __init__(self, config_file):
     self.config_data = load_config_data(config_file)
     self.train_class = TrainClassifier(self.config_data['subset_config'])
     self.search_algo = self.get_search_algo(
         self.config_data['search_algo'], self.config_data['space'],
         self.config_data['metric'], self.config_data['mode'])
     # save subset method, to be used in log dir name
     self.subset_method = self.train_class.configdata['dss_strategy'][
         'type']
Esempio n. 2
0
File: tags.py Progetto: joacar/nosy
class TagsClassifier:
    @classmethod
    def __init__(self):
        self.algorithm = NaiveBayesClassifier
        self.trainer = TrainClassifier()
        self.classifier = None

    def classify(self, text):
        # if not previously trained then train else classifier = redis().get('nosy:tagsClassifier')
        self.train()
        features = self.trainer.feature_extractor(text.split())
        result = self.classifier.classify(features)
        # print self.classifier.show_most_informative_features()
        return result

    @classmethod
    def train(cls):
        training_set = cls.trainer.train()  # TrainClassifier().train()
        cls.classifier = cls.algorithm.train(training_set)  # NaiveBayesClassifier.train(training_set)
Esempio n. 3
0
class TagsClassifier():
    @classmethod
    def __init__(self):
        self.algorithm = NaiveBayesClassifier
        self.trainer = TrainClassifier()
        self.classifier = None

    def classify(self, text):
        # if not previously trained then train else classifier = redis().get('nosy:tagsClassifier')
        self.train()
        features = self.trainer.feature_extractor(text.split())
        result = self.classifier.classify(features)
        #print self.classifier.show_most_informative_features()
        return result

    @classmethod
    def train(cls):
        training_set = cls.trainer.train()  #TrainClassifier().train()
        cls.classifier = cls.algorithm.train(
            training_set)  #NaiveBayesClassifier.train(training_set)
Esempio n. 4
0
File: tags.py Progetto: joacar/nosy
 def __init__(self):
     self.algorithm = NaiveBayesClassifier
     self.trainer = TrainClassifier()
     self.classifier = None
Esempio n. 5
0
    args=parser.parse_args()

    n_classes = 10
    n_epochs = 200

    pre = Preprocessing('digits')
    pre.load_data(filename='train.csv', name='train')

    X_df = pre.get(name='train').drop(columns=['0'])
    y_df = pre.get(name='train')['0']

    dtype = torch.float
    device = torch.device("cpu")

    model_name = 'logreg_digits'
    model = LogReg(model_name, 256, n_classes)

    learning_rate = 0.0001
    batch_size = 32

    train_classifier = TrainClassifier(model, X_df, y_df)
    trained_model , optimizer, criterion, loss_hist, loss_val_hist, best_param = train_classifier.run_train(n_epochs = n_epochs, lr=learning_rate, batch_size=batch_size)
    pre.save_results(loss_hist, loss_val_hist, f'{model_name}')

    trained_model.load_state_dict(state_dict=best_param)
    trained_model.eval()

    if args.s_model:
        m_exporter = ModelExporter('digits')
        m_exporter.save_nn_model(trained_model, optimizer, 0, n_classes, n_epochs, trained_model.get_args())
Esempio n. 6
0
    args=parser.parse_args()

    pre = Preprocessing('IMDB')

    n_classes = 2
    n_features = int(args.n_feat)
    n_epochs = 100
    pre.load_data(filename=f'training_data_{n_features}.csv', name='training_data')

    X_df = pre.get(name='training_data').drop(columns=['target'])
    y_df = pre.get(name='training_data')['target']

    model = LogReg('log_reg', n_features, n_classes)

    train_classifier = TrainClassifier(model, X_df, y_df)
    trained_model, optimizer, criterion, loss_hist, loss_validate_hist = train_classifier.run_train(n_epochs = n_epochs)
    pre.save_results(loss_hist, loss_validate_hist, f'log_reg_{100}')

    m_exporter = ModelExporter('IMDB')
    m_exporter.save_nn_model(trained_model, optimizer, n_features, n_classes, n_epochs)

    ##teeeeeest part
    pre.load_data(filename=f'test_data_{n_features}.csv', name='test_data')

    X_test_df = pre.get(name='test_data').drop(columns=['target'])
    y_test_df = pre.get(name='test_data')['target']


    dtype = torch.float
    device = torch.device("cpu")
Esempio n. 7
0
from train import TrainClassifier
config_file = "configs/config_gradmatch_cifar10.py"
classifier = TrainClassifier(config_file)
classifier.configdata['dss_strategy']['select_every'] = 1
classifier.train()
Esempio n. 8
0
class HyperParamTuning:
    def __init__(self, config_file):
        self.config_data = load_config_data(config_file)
        self.train_class = TrainClassifier(self.config_data['subset_config'])
        self.search_algo = self.get_search_algo(
            self.config_data['search_algo'], self.config_data['space'],
            self.config_data['metric'], self.config_data['mode'])
        # save subset method, to be used in log dir name
        self.subset_method = self.train_class.configdata['dss_strategy'][
            'type']

    def param_tune(self, config):
        #update parameters in config dict
        new_config = self.update_parameters(self.train_class.configdata,
                                            config)
        self.train_class.configdata = new_config
        # turn on reporting to ray every time
        self.train_class.configdata['report_tune'] = True
        self.train_class.train()

    def start_eval(self):
        analysis = tune.run(self.param_tune,
                            num_samples=self.config_data['num_evals'],
                            config=self.config_data['space'],
                            search_alg=self.search_algo,
                            resources_per_trial={'gpu': 1},
                            local_dir=self.config_data['log_dir'] +
                            self.subset_method + '/',
                            log_to_file=True,
                            name=self.config_data['name'],
                            resume=self.config_data['resume'])

        best_config = analysis.get_best_config(
            metric=self.config_data['metric'], mode=self.config_data['mode'])
        print("Best Config: ", best_config)

        if self.config_data['final_train']:
            self.final_train(best_config)

    def get_search_algo(self, method, space, metric, mode):
        if method == "hyperopt" or method == "TPE":
            search = HyperOptSearch(space, metric="mean_accuracy", mode="max")
        return search

    def final_train(self, best_params):
        # change strategy to Full (i.e use whole dataset)
        # update (optimized) parameters
        new_config = self.update_parameters(self.train_class.configdata,
                                            best_params)
        self.train_class.configdata = new_config
        self.train_class.configdata['dss_strategy']['type'] = 'Full'

        self.train_class.train()

    def update_parameters(self, config, new_config):
        # a generic function to update parameters
        if 'learning_rate' in new_config:
            config['optimizer']['lr'] = new_config['learning_rate']
        if 'optimizer' in new_config:
            config['optimizer']['type'] = new_config['optimizer']
        if 'trn_batch_size' in new_config:
            config['dataloader']['batch_size'] = new_config['trn_batch_size']

        return config
Esempio n. 9
0
from train import TrainClassifier
config_file = "configs/config_gradmatch_cifar10.py"
classifier = TrainClassifier(config_file)
classifier.configdata['dss_strategy']['select_every'] = 1
classifier.configdata['model']['architecture'] = 'MobileNet2'
classifier.configdata['optimizer']['weight_decay'] = 4e-5
classifier.train()
Esempio n. 10
0
 def __init__(self):
     self.algorithm = NaiveBayesClassifier
     self.trainer = TrainClassifier()
     self.classifier = None