Esempio n. 1
0
def load_and_get_model_for_inference(trained_model_arch,
                                     trained_checkpoint_dir, filetype,
                                     input_shape, num_classes):
    model_factory = ModelFactory()
    model = model_factory.get_model(
        trained_model_arch,
        input_shape,
        is_training=False,
        num_classes=num_classes,
        learning_rate=0.001)  # A dummy learning rate since it is test mode.
    # The ModelCheckpoint in train pipeline saves the weights inside the checkpoint directory as follows.
    if filetype == '.h5':
        weights_path = trained_checkpoint_dir + "best_model_dir-auc.h5"
        model = tf.keras.models.load_model(weights_path)
    elif filetype == 'tf':
        weights_path = os.path.join(trained_checkpoint_dir, "variables",
                                    "variables")
        model.load_weights(weights_path)
    else:
        raise ValueError(
            "The provided saved model filetype not recognized: %s" % filetype)

    print(
        "The model has been created and the weights have been loaded from: %s"
        % weights_path)
    model.summary()
    return model
Esempio n. 2
0
 def __init__(self, config):
     self.manager = ModelManager(
         config.flavor,
         config.server,
         config.database,
         config.driver,
         config.port,
         config.schema
     )
     self.factory = ModelFactory(
         config.environment,
         config.flavor,
         FieldFactory()
     )
Esempio n. 3
0
def createModel():
    json_data = request.get_json(force=True)

    # check if all fields are there
    if json_data.get('model_name') is None:
        abort(make_response("model_name field is missing.\n", 422))

    if json_data.get('model_type') is None:
        abort(make_response("model_type field is missing.\n", 422))

    if json_data.get('retrain_counter') is None:
        abort(make_response("no retrain information set.\n", 422))

    # add model to list of models
    app.r.sadd('models', json_data.get('model_name'))

    # save model definition
    mdl = ModelFactory.createModel(json_data.get('model_type'),
                                   json_data.get('model_name'),
                                   json_data.get('retrain_counter'))

    if mdl is None:
        return abort(make_response("No model available of type " +
                                   json_data.get('model_type') + "\n",
                     422))

    app.r.set(json_data.get('model_name') + '_object', pickle.dumps(mdl))

    return "created model: " + str(mdl) + "\n", 201
Esempio n. 4
0
def load_model():
    model_file_path = 'src/best_weights_1555982768.7076797.h5'
    #model_file_path = 'best_weights_1555982768.7076797.h5'
    model_factory = ModelFactory()
    model = model_factory.get_model(class_names,
                                    model_name=model_type,
                                    use_base_weights=False,
                                    weights_path=model_file_path,
                                    input_shape=(img_height, img_width, 3))
    optimizer = keras.optimizers.Adam(lr=1e-3, beta_1=0.9, beta_2=0.999)
    model.compile(optimizer=optimizer,
                  loss="binary_crossentropy",
                  metrics=["accuracy", "binary_accuracy"])
    model.load_weights(model_file_path)

    return model
Esempio n. 5
0
def createModel():
    json_data = request.get_json(force=True)

    # check if all fields are there
    if json_data.get('model_name') is None:
        abort(make_response("model_name field is missing.\n", 422))

    if json_data.get('model_type') is None:
        abort(make_response("model_type field is missing.\n", 422))

    if json_data.get('retrain_counter') is None:
        abort(make_response("no retrain information set.\n", 422))

    # add model to list of models
    app.r.sadd('models', json_data.get('model_name'))

    # save model definition
    mdl = ModelFactory.createModel(json_data.get('model_type'),
                                   json_data.get('model_name'),
                                   json_data.get('retrain_counter'))

    if mdl is None:
        return abort(make_response("No model available of type " +
                                   json_data.get('model_type') + "\n",
                     422))

    app.r.set(json_data.get('model_name') + '_object', pickle.dumps(mdl))

    return "created model: " + str(mdl) + "\n", 201
Esempio n. 6
0
class Main:
    def __init__(self, config):
        self.manager = ModelManager(
            config.flavor,
            config.server,
            config.database,
            config.driver,
            config.port,
            config.schema
        )
        self.factory = ModelFactory(
            config.environment,
            config.flavor,
            FieldFactory()
        )


    def __call__(self):
        # Connect to the server and query model metadata
        with self.manager as meta:
            tables = meta.tables()
            columns = [meta.columns(table=table[3]) for table in tables]

        # Zip the serialized values together
        zipped = zip(tables, columns)

        # Map the Model.__repr__ onto the zipped records.
        results = map(
            lambda x: str(self.factory.make(x[0][1], x[0][2], x[0][3], fields=x[1])),
            zipped
        )

        # Send the table creation back to the server.
        for result in results:
            print(result)
Esempio n. 7
0
def main(dataset_name, model_name, epochs=50, batch_size=128):
    dataset = DatasetFactory.get_by_name(dataset_name, train_percentage=0.95)
    checkpoint = 'checkpoint/{}_{}'.format(dataset_name, model_name)
    solver = Solver(dataset, checkpoint, train_batch_size=batch_size)
    model = ModelFactory.get_by_name(model_name, dataset)
    solver.train_model(model, epochs=epochs, warmup_epochs=10, num_epoch_to_log=5, learning_rate=1e-3, weight_decay=1e-4)
    solver.test(model)
Esempio n. 8
0
    def __init__(self, parent=None):
        super(MaeBird, self).__init__(parent)
        
        self.models = ModelFactory()

        self.table = None
        self.languages = {'perimary': 'Fin', 'secondary': 'Eng', 
                         'tertiary': 'Swe'}
        
        self.dbtype = __DB__
        self.dbfile = None
        self.db = None
        
        self.matches = []
        self.currentsearchitem = 0
        
        self.fullscreen = False
        self.setupUi(self)
        
        self.setWindowTitle(__APPNAME__ + ' ' + __VERSION__)
        
        # TODO: loading settings should be moved to a separate method
        settings = QSettings()
        
        # Set up logging
        loggingdir = settings.value("Logging/loggingDir")
        if loggingdir is None:
            loggingdir = __USER_DATA_DIR__
        self.logger = Logger('root', loggingdir=loggingdir)
        if settings.value("Settings/debugging"):
            self.logger.debugging = int(settings.value("Settings/debugging"))
            self.logger.debug('Logging initialized')
        
        # Try to load previous session
        if settings.value("Settings/saveSettings"):
            self.saveSettings = int(settings.value("Settings/saveSettings"))
        else:
            self.saveSettings = 1
                      
        if self.saveSettings:
            QTimer.singleShot(0, self.load_initial_data)
            #QTimer.singleShot(0, self.load_initial_model)
        
        self.header = self.tableView.horizontalHeader()
        self.header.sectionDoubleClicked.connect(self.sort_table)
        
        self.search.textEdited.connect(self.update_ui)
        self.search.setFocus()
        self.searchNextButton.clicked.connect(self.update_ui)
        self.searchPrevButton.clicked.connect(self.update_ui)
        
        self.tableView.pressed.connect(self.update_ui)
        
        self.tableView.doubleClicked.connect(
                    lambda: self.handle_observation(ObservationDialog.SHOW))
        self.addButton.clicked.connect(
                    lambda: self.handle_observation(ObservationDialog.ADD))
        self.deleteButton.clicked.connect(
                    lambda: self.handle_observation(ObservationDialog.DELETE))
Esempio n. 9
0
def get_parser():
    """
    Constructs and returns a parser object to handle the command-line arguments.

    :return: argparse.ArgumentParser
    """
    # Get parsers for various model architectures.
    model_parser = ModelFactory.get_all_parsers()
    # Get parsers for various optimizers.
    optimizer_parser = OptimizerFactory.get_all_parsers()
    # Add parent parsers.
    parent_parsers = model_parser + optimizer_parser
    parser = argparse.ArgumentParser(parents=parent_parsers)

    # Generic options
    parser.add_argument('--checkpoint-step', type=int, default=1,
                        help='Number of epochs between successive checkpoint creations')
    parser.add_argument('--config-file', type=str, default=[], nargs='*',
                        help='File(s) to read the command-line arguments from')
    parser.add_argument('--continue', action='store_true',
                        help='Continue the execution of the last experiment saved into the export directory')
    parser.add_argument('--debug', action='store_true', help='Show debug messages')
    parser.add_argument('--export-dir', type=str, required=True, help='Export directory')
    parser.add_argument('--no-gpu', action='store_true', help='Use CPU')
    
    parser.add_argument("--wandb-directory", type=str, default="../wandb")
    parser.add_argument("--disable-wandb", action="store_true", help="No Wandb logging")

    # Data options
    parser.add_argument('--batch-size', type=int, default=[16], nargs='*', help='Batch size(s)')
    parser.add_argument('--dataset', type=str, default=[consts.SIGMORPHON2020], nargs='*',
                        choices=[consts.SIGMORPHON2020], help='Dataset(s) to train on')
    parser.add_argument('--sigmorphon2020-root', type=str, help='Root directory for the SIGMORPHON 2020 dataset')

    # Language options
    parser.add_argument('--language-families', type=str, nargs='*', default=None,
                        help='The families of languages to load the data for.'
                             ' If not provided, all available families will be used.')
    parser.add_argument('--language-info-file', type=str, default='lang_config.tsv',
                        help='The language information file.')
    parser.add_argument('--languages', type=str, nargs='*', default=None,
                        help='The languages to load the data for.'
                             ' If not provided, all available languages will be used.')

    # Optimizer options
    parser.add_argument('--optimizer', type=str, default=[OptimizerFactory.optimizers[0]],
                        choices=OptimizerFactory.optimizers, nargs='*', help='Optimizer algorithm(s)')
    parser.add_argument('--num-epochs', type=int, default=30, help='Number(s) of epochs')

    # Model options
    parser.add_argument('--model-architecture', type=str, default=[ModelFactory.architectures[0]], nargs='*',
                        choices=ModelFactory.architectures, help='Model architecture(s)')
    
    # Parallelism Optoions, affect various
    parser.add_argument('--loader-threads', type=int, default=0, help='Data loading threads. Default to 0 (load in main)')
    parser.add_argument('--use-dataparallel', action='store_true', help='Use torch.nn.DataParallel to wrap the model?')

    return parser
Esempio n. 10
0
    def test_factory_creation(self):
        model_name = 'lin_reg'
        retrain_counter = 10
        model_obj = StandardModels.LinearRegression(model_name, retrain_counter)
        model_obj2 = ModelFactory.createModel('LinearRegression',
                                              model_name,
                                              retrain_counter)

        self.assertEqual(model_obj, model_obj2)
Esempio n. 11
0
    def test_online_linear_regression(self):
        model_name = 'onl_lin_reg'
        retrain_counter = 1
        model_obj = ModelFactory.createModel('OnlineLinearRegression',
                                             model_name, retrain_counter)

        self.assertEqual(model_obj.model_name, model_name)
        self.assertEqual(model_obj.retrain_counter, retrain_counter)
        self.assertEqual(model_obj.model_type, "OnlineLinearRegression")
Esempio n. 12
0
    def test_factory_creation(self):
        model_name = 'lin_reg'
        retrain_counter = 10
        model_obj = StandardModels.LinearRegression(model_name,
                                                    retrain_counter)
        model_obj2 = ModelFactory.createModel('LinearRegression', model_name,
                                              retrain_counter)

        self.assertEqual(model_obj, model_obj2)
Esempio n. 13
0
    def test_online_linear_regression(self):
        model_name = 'onl_lin_reg'
        retrain_counter = 1
        model_obj = ModelFactory.createModel('OnlineLinearRegression',
                                              model_name,
                                              retrain_counter)

        self.assertEqual(model_obj.model_name, model_name)
        self.assertEqual(model_obj.retrain_counter, retrain_counter)
        self.assertEqual(model_obj.model_type, "OnlineLinearRegression")
Esempio n. 14
0
 def __init__(self, dataset_config: Config.Dataset, buffer_size: int = 64):
     super(SyntheticDataset).__init__()
     self.device = torch.device(
         "cuda" if torch.cuda.is_available() else "cpu")
     self.label = dataset_config.classes[-1]
     self.model = ModelFactory.create(
         dataset_config.gan_model,
         n_classes=len(dataset_config.classes)).to(self.device)
     self.buffer_size = buffer_size
     self.pointer = 0
     self.buffer = self.generate_buffer()
Esempio n. 15
0
 def train(self):
     """Enqueue an event to train person group and report train progress"""
     group = ModelFactory.registered_group()
     group.cognitive.train()
     # Keep querying training status every 1 second
     while True:
         train_result = group.cognitive.trainingStatus()
         # Completed training event will have status succeeded or failed
         if (train_result['status'] == 'succeeded' or
                 train_result['status'] == 'failed'):
             break
         time.sleep(1)
Esempio n. 16
0
    def __init__(self, api, model_factory=None):
        self.api = api
        self.model_factory = model_factory or ModelFactory()
        self.mockups = {}

        for resource_name, resource in self.api._registry.items():
            model_class = resource._meta.object_class
            self.register(resource)
            try:
                model_factory[model_class]
            except:
                self.model_factory.register(model_class)
Esempio n. 17
0
 def identify(self, register_new):
     """Identify a person or a group of people captured by camera"""
     registered_group = ModelFactory.registered_group()
     faces = self.record(False)
     recognized_people = registered_group.identify(faces)
     if len(recognized_people) > 0:
         for person in recognized_people:
             print 'Xin chao %s' % person
             Speaker.speak(person)
     # If register on premise mode is turn on then register person
     # in case we cannot identify who he is
     if register_new is True and len(recognized_people) == 0:
         self.record(True)
Esempio n. 18
0
 def __init__(self, env: MultiEnv, model_factory: ModelFactory, curiosity_factory: CuriosityFactory,
              normalize_state: bool, normalize_reward: bool, reporter: Reporter = NoReporter()) -> None:
     self.env = env
     self.reporter = reporter
     self.state_converter = Converter.for_space(self.env.observation_space)
     self.action_converter = Converter.for_space(self.env.action_space)
     self.model = model_factory.create(self.state_converter, self.action_converter)
     self.curiosity = curiosity_factory.create(self.state_converter, self.action_converter)
     self.reward_normalizer = StandardNormalizer() if normalize_reward else NoNormalizer()
     self.state_normalizer = self.state_converter.state_normalizer() if normalize_state else NoNormalizer()
     self.normalize_state = normalize_state
     self.device: torch.device = None
     self.dtype: torch.dtype = None
     self.numpy_dtype: object = None
Esempio n. 19
0
    def register(self, data_path):
        """Register a group of people specified in data folder and train

            Args:
                data_path (string): train data folder
        """
        if data_path == 'default':
            data_path = Utilities.train_data_path()
        if Utilities.file_exists(data_path):
            group = ModelFactory.registered_group()
            group.save()
            for alias_name in os.listdir(data_path):
                # Ignore gitkeep file and collect data from all folders
                if alias_name != '.gitkeep':
                    logger.log('Registering %s...' % alias_name)
                    Person.register(group, alias_name)
            # After everything is done, call api to train newly created group
            self.train()
Esempio n. 20
0
class MaeBird(QMainWindow, Ui_MainWindow):
    def __init__(self, parent=None):
        super(MaeBird, self).__init__(parent)
        
        self.models = ModelFactory()

        self.table = None
        self.languages = {'perimary': 'Fin', 'secondary': 'Eng', 
                         'tertiary': 'Swe'}
        
        self.dbtype = __DB__
        self.dbfile = None
        self.db = None
        
        self.matches = []
        self.currentsearchitem = 0
        
        self.fullscreen = False
        self.setupUi(self)
        
        self.setWindowTitle(__APPNAME__ + ' ' + __VERSION__)
        
        # TODO: loading settings should be moved to a separate method
        settings = QSettings()
        
        # Set up logging
        loggingdir = settings.value("Logging/loggingDir")
        if loggingdir is None:
            loggingdir = __USER_DATA_DIR__
        self.logger = Logger('root', loggingdir=loggingdir)
        if settings.value("Settings/debugging"):
            self.logger.debugging = int(settings.value("Settings/debugging"))
            self.logger.debug('Logging initialized')
        
        # Try to load previous session
        if settings.value("Settings/saveSettings"):
            self.saveSettings = int(settings.value("Settings/saveSettings"))
        else:
            self.saveSettings = 1
                      
        if self.saveSettings:
            QTimer.singleShot(0, self.load_initial_data)
            #QTimer.singleShot(0, self.load_initial_model)
        
        self.header = self.tableView.horizontalHeader()
        self.header.sectionDoubleClicked.connect(self.sort_table)
        
        self.search.textEdited.connect(self.update_ui)
        self.search.setFocus()
        self.searchNextButton.clicked.connect(self.update_ui)
        self.searchPrevButton.clicked.connect(self.update_ui)
        
        self.tableView.pressed.connect(self.update_ui)
        
        self.tableView.doubleClicked.connect(
                    lambda: self.handle_observation(ObservationDialog.SHOW))
        self.addButton.clicked.connect(
                    lambda: self.handle_observation(ObservationDialog.ADD))
        self.deleteButton.clicked.connect(
                    lambda: self.handle_observation(ObservationDialog.DELETE))
        
    def closeEvent(self, event):
        settings = QSettings()
        if self.saveSettings:
            db = self.dbfile if self.db is not None else ''
            settings.setValue("Database/LastDb", db)
            
            if self.tableView.model() is not None:
                settings.setValue("Database/DefaultModel", self.tableView.model().name)
            
                visible_fields = [not bool(self.tableView.isColumnHidden(i)) for i in range(0, self.tableView.model().columnCount())]
                settings.setValue("Database/visibleFields", visible_fields)
            settings.setValue("Settings/debugging", int(self.logger.debugging))
        
        settings.setValue("Settings/saveSettings", int(self.saveSettings))
    
    def load_initial_data(self):
        settings = QSettings()
        dbfile = unicode(settings.value("Database/LastDb"))
        modelname = unicode(settings.value("Database/DefaultModel"))
        if dbfile and QFile.exists(dbfile):
            self.load_db(dbfile, modelname=modelname)
            self.logger.debug("Loaded database %s with model %s" % (dbfile,
                                                                     modelname))
        
        if settings.value("Database/visibleFields"):
            visible_fields = [item for item in settings.value("Database/visibleFields")]
        
            # FIXME: in absence of QVariant, deal with values
            visible_fields = [False if item == 'false' else True for item in visible_fields]
            if not all(visible_fields):
                self.logger.debug("Hiding fields %s" % visible_fields)
            self.show_fields(visible_fields)

    def load_db(self, dbname, modelname=None):
        self.db = QSqlDatabase.addDatabase(self.dbtype)
        self.db.setDatabaseName(dbname)
        if not self.db.open():
            QMessageBox.warning(self, "Batabase connection",
                "Database Error: %s" % (self.db.lastError().text()))
            return
        self.dbfile = dbname
        
        if modelname not in self.models.model_names:
            modeldlg = ModelDialog(self.models.model_names)
            if modeldlg.exec_():
                modelname = modeldlg.selected_model()
        
        if modelname:
            self.load_model(modelname)
    
    def load_model(self, modelname):
        ''' Loads a specific database model and sets it to view.  
        '''
        try:
            model = self.models.get_model(modelname)
        except NotImplementedError, e:
            QMessageBox.warning(self, "Database model",
                "Database Model Error: %s" % str(e))
            return
        self.tableView.setModel(model(self))
        self.tableView.setItemDelegate(QSqlRelationalDelegate(self))
        self.tableView.setSelectionMode(QTableView.SingleSelection)
        self.tableView.setSelectionBehavior(QTableView.SelectRows)
        self.tableView.setColumnHidden(0, True)
        self.tableView.resizeColumnsToContents()
        self.update_ui()
Esempio n. 21
0
class Trainer():
    def __init__(self, image_shape, io):
        self.mf = ModelFactory(image_shape)
        self.io = io

    def train(self, path1, path2):
        print('path to imgs:', path1, " path to arts:", path2)
        g1, g2, d1, d2, c1, c2 = self.mf.training_models()
        photo_imgs = io.imgs_to_np(path1)
        impr_imgs = io.imgs_to_np(path2)
        #photo_imgs, impr_imgs = IOHandler().load_npz('real-images.npz')
        self._model_train(d1, d2, g1, g2, c1, c2, (photo_imgs, impr_imgs))

    def _generate_real_samples(self, dataset, n_samples, patch_shape):
        ix = np.random.randint(0, dataset.shape[0], n_samples)
        X = dataset[ix]
        y = np.ones((n_samples, patch_shape, patch_shape, 1))
        return X, y

    def _summarize_performance(self, step, g_model, trainX, name, n_samples=5):
        # select a sample of input images
        X_in, _ = self._generate_real_samples(trainX, n_samples, 0)
        # generate translated images
        X_out, _ = g_model.generate_fake_samples(X_in, 0)
        # scale all pixels from [-1,1] to [0,1]
        X_in = (X_in + 1) / 2.0
        X_out = (X_out + 1) / 2.0
        self.io.save_fig('figs', X_in, X_out)

    #image pool abstraction function
    def _update_image_pool(self, pool, images, max_size=50):
        selected = list()
        for image in images:
            if len(pool) < max_size:
                # stock the pool
                pool.append(image)
                selected.append(image)
            elif np.random.random() < 0.5:
                # use image, but don't add it to the pool
                selected.append(image)
            else:
                # replace an existing image and use replaced image
                ix = np.random.randint(0, len(pool))
                selected.append(pool[ix])
                pool[ix] = image
        return np.asarray(selected)

    # train cyclegan models
    def _model_train(self, d_model_A, d_model_B, g_model_AtoB, g_model_BtoA,
                     c_model_AtoB, c_model_BtoA, dataset):
        # define properties of the training run
        n_epochs, n_batch, = 25, 1  #* tpu_strategy.num_replicas_in_sync
        # determine the output square shape of the discriminator
        n_patch = d_model_A.model.output_shape[1]
        # unpack dataset
        trainA, trainB = dataset
        # prepare image pool for fakes
        poolA, poolB = list(), list()
        # calculate the number of batches per training epoch
        bat_per_epo = int(len(trainA) / n_batch)
        # calculate the number of training iterations
        n_steps = bat_per_epo * n_epochs

        print("doing {} steps".format(n_steps))
        print(bat_per_epo)
        # manually enumerate epochs
        for i in range(n_steps):
            # select a batch of real samples
            X_realA, y_realA = self._generate_real_samples(
                trainA, n_batch, n_patch)
            X_realB, y_realB = self._generate_real_samples(
                trainB, n_batch, n_patch)
            # generate a batch of fake samples
            X_fakeA, y_fakeA = g_model_BtoA.generate_fake_samples(
                X_realB, n_patch)
            X_fakeB, y_fakeB = g_model_AtoB.generate_fake_samples(
                X_realA, n_patch)
            # update fakes from pool
            X_fakeA = self._update_image_pool(poolA, X_fakeA)
            X_fakeB = self._update_image_pool(poolB, X_fakeB)

            # update generator B->A via adversarial and cycle loss
            c_model_BtoA.set_trainable(True)
            g_loss2, _, _, _, _ = c_model_BtoA.model.train_on_batch(
                [X_realB, X_realA], [y_realA, X_realA, X_realB, X_realA])
            c_model_BtoA.set_trainable(False)

            # update discriminator for A -> [real/fake]
            dA_loss1 = d_model_A.model.train_on_batch(X_realA, y_realA)
            dA_loss2 = d_model_A.model.train_on_batch(X_fakeA, y_fakeA)
            # update generator A->B via adversarial and cycle loss
            c_model_AtoB.set_trainable(True)
            g_loss1, _, _, _, _ = c_model_AtoB.model.train_on_batch(
                [X_realA, X_realB], [y_realB, X_realB, X_realA, X_realB])
            c_model_AtoB.set_trainable(False)

            # update discriminator for B -> [real/fake]
            dB_loss1 = d_model_B.model.train_on_batch(X_realB, y_realB)
            dB_loss2 = d_model_B.model.train_on_batch(X_fakeB, y_fakeB)
            # summarize performance
            s = ('>%d, dA[%.3f,%.3f] dB[%.3f,%.3f] g[%.3f,%.3f]' %
                 (i + 1, dA_loss1, dA_loss2, dB_loss1, dB_loss2, g_loss1,
                  g_loss2))
            sys.stdout.write(s)
            sys.stdout.flush()
            # evaluate the model performance every so often
            if (i + 1) % int(300) == 0:
                # plot A->B translation
                self._summarize_performance(i, g_model_AtoB, trainA, 'AtoB')
                # plot B->A translation
                self._summarize_performance(i, g_model_BtoA, trainB, 'BtoA')
            if (i + 1) % (1000) == 0:
                # save the models
                self.io.save_models('models',
                                    step=i,
                                    models=[
                                        d_model_A, d_model_B, g_model_AtoB,
                                        g_model_BtoA, c_model_AtoB,
                                        c_model_BtoA
                                    ])
from keras.models import Model, Sequential, load_model
from keras.utils.vis_utils import plot_model
import pandas as pd
from utils import RgbGenerator
from models import ModelFactory, ModelType
train = pd.read_csv('20bnjester_csv_files/train.csv')
valid = pd.read_csv('20bnjester_csv_files/valid.csv')

model = ModelFactory(rgbpath='trained_models/rgblstm.h5',
                     trained=True).getModel(ModelType.RGB)
model.summary()

plot_model(model,
           to_file='model_plot.png',
           show_shapes=True,
           show_layer_names=True)

predict_gen = RgbGenerator(valid)

y_pred = model.predict_generator(predict_gen, 1750)

y_pred = np.argmax(y_pred, axis=1)

print(y_pred)

y_true = np.argmax(valid.iloc[:14000, 1:].values, axis=1)

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
Esempio n. 23
0
class Engine:
    def __init__(self,
                 args: Args,
                 cfg: ConfigTree,
                 local_rank: int,
                 final_validate=False):
        self.args = args
        self.cfg = cfg
        self.local_rank = local_rank

        self.model_factory = ModelFactory(cfg)
        self.data_loader_factory = DataLoaderFactoryV3(cfg, final_validate)
        self.final_validate = final_validate

        self.device = torch.device(
            f'cuda:{local_rank}' if torch.cuda.is_available() else 'cpu')

        model_type = cfg.get_string('model_type')
        if model_type == '1stream':
            self.model = self.model_factory.build(local_rank)  # basic model
        elif model_type == 'multitask':
            self.model = self.model_factory.build_multitask_wrapper(local_rank)
        else:
            raise ValueError(f'Unrecognized model_type "{model_type}"')
        if not final_validate:
            self.train_loader = self.data_loader_factory.build(
                vid=False,  # need label to gpu
                split='train',
                device=self.device)
        self.validate_loader = self.data_loader_factory.build(
            vid=False, split='val', device=self.device)

        if final_validate:
            self.n_crop = cfg.get_int(
                'temporal_transforms.validate.final_n_crop')
        else:
            self.n_crop = cfg.get_int('temporal_transforms.validate.n_crop')

        self.criterion = nn.CrossEntropyLoss()

        self.learning_rate = self.cfg.get_float('optimizer.lr')
        optimizer_type = self.cfg.get_string('optimizer.type', default='sgd')
        if optimizer_type == 'sgd':
            self.optimizer = torch.optim.SGD(
                self.model.parameters(),
                lr=self.learning_rate,
                momentum=self.cfg.get_float('optimizer.momentum'),
                dampening=self.cfg.get_float('optimizer.dampening'),
                weight_decay=self.cfg.get_float('optimizer.weight_decay'),
                nesterov=self.cfg.get_bool('optimizer.nesterov'),
            )
        elif optimizer_type == 'adam':
            self.optimizer = torch.optim.Adam(
                self.model.parameters(),
                lr=self.learning_rate,
                eps=self.cfg.get_float('optimizer.eps'),
            )
        else:
            raise ValueError(f'Unknown optimizer {optimizer_type})')

        self.num_epochs = cfg.get_int('num_epochs')
        self.schedule_type = self.cfg.get_string('optimizer.schedule')
        if self.schedule_type == "plateau":
            self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer=self.optimizer,
                mode='min',
                patience=self.cfg.get_int('optimizer.patience'),
                verbose=True)
        elif self.schedule_type == "multi_step":
            self.scheduler = torch.optim.lr_scheduler.MultiStepLR(
                optimizer=self.optimizer,
                milestones=self.cfg.get("optimizer.milestones"),
            )
        elif self.schedule_type == "cosine":
            self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer=self.optimizer,
                T_max=self.num_epochs,
                eta_min=self.learning_rate / 1000)
        elif self.schedule_type == 'none':
            self.scheduler = torch.optim.lr_scheduler.LambdaLR(
                optimizer=self.optimizer,
                lr_lambda=lambda epoch: 1,
            )
        else:
            raise ValueError("Unknow schedule type")

        self.arch = cfg.get_string('model.arch')

        if local_rank == 0:
            self.summary_writer = SummaryWriter(
                log_dir=str(args.experiment_dir))
        else:
            self.summary_writer = None

        self.best_acc1 = 0.
        self.current_epoch = 0
        self.next_epoch = None
        logger.info('Engine: n_crop=%d', self.n_crop)

        self.checkpoint_manager = CheckpointManager(self.args.experiment_dir,
                                                    keep_interval=None)
        self.loss_meter = None

    def has_next_epoch(self):
        return not self.final_validate and self.current_epoch < self.num_epochs - 1

    def load_checkpoint(self, checkpoint_path):
        states = torch.load(checkpoint_path, map_location=self.device)
        if states['arch'] != self.arch:
            raise ValueError(
                f'Loading checkpoint arch {states["arch"]} does not match current arch {self.arch}'
            )

        logger.info('Loading checkpoint from %s', checkpoint_path)
        self.model.module.load_state_dict(states['model'])
        logger.info('Checkpoint loaded')

        self.optimizer.load_state_dict(states['optimizer'])
        self.scheduler.load_state_dict(states['scheduler'])
        self.current_epoch = states['epoch']
        self.best_acc1 = states['best_acc1']

    def load_moco_checkpoint(self, checkpoint_path: str):
        cp = torch.load(checkpoint_path, map_location=self.device)
        if 'model' in cp and 'arch' in cp:
            logger.info('Loading MoCo checkpoint from %s (epoch %d)',
                        checkpoint_path, cp['epoch'])
            moco_state = cp['model']
            prefix = 'encoder_q.'
        else:
            # This checkpoint is from third-party
            logger.info('Loading third-party model from %s', checkpoint_path)
            if 'state_dict' in cp:
                moco_state = cp['state_dict']
            else:
                # For c3d
                moco_state = cp
                logger.warning(
                    'if you are not using c3d sport1m, maybe you use wrong checkpoint'
                )
            if next(iter(moco_state.keys())).startswith('module'):
                prefix = 'module.'
            else:
                prefix = ''
        """
        fc -> fc. for c3d sport1m. Beacuse fc6 and fc7 is in use.
        """
        blacklist = ['fc.', 'linear', 'head', 'new_fc', 'fc8']
        blacklist += ['encoder_fuse']

        def filter(k):
            return k.startswith(prefix) and not any(
                k.startswith(f'{prefix}{fc}') for fc in blacklist)

        model_state = {
            k[len(prefix):]: v
            for k, v in moco_state.items() if filter(k)
        }
        msg = self.model.module.load_state_dict(model_state, strict=False)
        # assert set(msg.missing_keys) == {"fc.weight", "fc.bias"} or \
        #        set(msg.missing_keys) == {"linear.weight", "linear.bias"} or \
        #        set(msg.missing_keys) == {'head.projection.weight', 'head.projection.bias'} or \
        #        set(msg.missing_keys) == {'new_fc.weight', 'new_fc.bias'},\
        #     msg

        logger.warning(
            f'Missing keys: {msg.missing_keys}, Unexpected keys: {msg.unexpected_keys}'
        )

    def train_context(self):
        return EpochContext(self,
                            name='Train',
                            n_crop=1,
                            dataloader=self.train_loader,
                            tensorboard_prefix='train')

    def validate_context(self):
        return EpochContext(self,
                            name='Validate',
                            n_crop=self.n_crop,
                            dataloader=self.validate_loader,
                            tensorboard_prefix='val')

    def train_epoch(self):
        epoch = self.next_epoch
        if epoch is None:
            epoch = self.train_context()
        self.next_epoch = self.validate_context()

        self.model.train()
        with epoch:
            for loss, *_ in epoch.forward():
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
        self.loss_meter = epoch.loss_meter

    def validate_epoch(self):
        epoch = self.next_epoch
        if epoch is None:
            epoch = self.validate_context()
        if self.has_next_epoch():
            self.next_epoch = self.train_context()
        else:
            self.next_epoch = None

        self.model.eval()
        all_logits = torch.empty(0,
                                 device=next(self.model.parameters()).device)
        indices = []
        with epoch:
            with torch.no_grad():
                for _, logits, others in epoch.forward():
                    all_logits = torch.cat((all_logits, logits), dim=0)
                    if others:
                        assert len(others[0]) == logits.size(0), \
                            f'Length of indices and logits not match. {others[0]} vs {logits.size(0)}'
                        indices.extend(others[0])

            epoch.sync_meters()
            logger.info(
                'Validation finished.\n\tLoss = %f\n\tAcc@1 = %.2f%% (%d/%d)\n\tAcc@5 = %.2f%% (%d/%d)',
                epoch.loss_meter.avg.item(),
                epoch.top1_meter.avg.item(),
                epoch.top1_meter.sum.item() / 100,
                epoch.top1_meter.count.item(),
                epoch.top5_meter.avg.item(),
                epoch.top5_meter.sum.item() / 100,
                epoch.top5_meter.count.item(),
            )

        if self.final_validate:
            ds = self.validate_loader.dataset
            if hasattr(ds, 'save_results'):
                assert indices, 'Dataset should return indices to sort logits'
                assert len(indices) == all_logits.size(0), \
                    f'Length of indices and logits not match. {len(indices)} vs {all_logits.size(0)}'
                with (self.args.experiment_dir /
                      f'results_{self.local_rank}.json').open('w') as f:
                    ds.save_results(f, indices, all_logits)
        return epoch.top1_meter.avg.item()

    def run(self):

        num_epochs = 1 if self.args.debug else self.num_epochs

        self.model.train()

        while self.current_epoch < num_epochs:
            logger.info("Current LR:{}".format(self.scheduler._last_lr))
            if self.summary_writer is not None:
                self.summary_writer.add_scalar('train/lr',
                                               utils.get_lr(self.optimizer),
                                               self.current_epoch)
            self.train_epoch()
            acc1 = self.validate_epoch()
            if self.schedule_type == "plateau":
                self.scheduler.step(self.loss_meter.val.item())
            else:
                self.scheduler.step()

            self.current_epoch += 1

            if self.local_rank == 0:
                is_best = acc1 > self.best_acc1
                self.best_acc1 = max(acc1, self.best_acc1)

                # save_checkpoint({
                #     'epoch': self.current_epoch,
                #     'arch': self.arch,
                #     'model': self.model.module.state_dict(),
                #     'best_acc1': self.best_acc1,
                #     'optimizer': self.optimizer.state_dict(),
                #     'scheduler': self.scheduler.state_dict(),
                # }, is_best, self.args.experiment_dir)
                self.checkpoint_manager.save(
                    {
                        'epoch': self.current_epoch,
                        'arch': self.arch,
                        'model': self.model.module.state_dict(),
                        'best_acc1': self.best_acc1,
                        'optimizer': self.optimizer.state_dict(),
                        'scheduler': self.scheduler.state_dict(),
                    }, is_best, self.current_epoch)

        if self.summary_writer is not None:
            self.summary_writer.flush()
Esempio n. 24
0
 def __init__(self, image_shape, io):
     self.mf = ModelFactory(image_shape)
     self.io = io
Esempio n. 25
0
    def __init__(self,
                 args: Args,
                 cfg: ConfigTree,
                 local_rank: int,
                 final_validate=False):
        self.args = args
        self.cfg = cfg
        self.local_rank = local_rank

        self.model_factory = ModelFactory(cfg)
        self.data_loader_factory = DataLoaderFactoryV3(cfg, final_validate)
        self.final_validate = final_validate

        self.device = torch.device(
            f'cuda:{local_rank}' if torch.cuda.is_available() else 'cpu')

        model_type = cfg.get_string('model_type')
        if model_type == '1stream':
            self.model = self.model_factory.build(local_rank)  # basic model
        elif model_type == 'multitask':
            self.model = self.model_factory.build_multitask_wrapper(local_rank)
        else:
            raise ValueError(f'Unrecognized model_type "{model_type}"')
        if not final_validate:
            self.train_loader = self.data_loader_factory.build(
                vid=False,  # need label to gpu
                split='train',
                device=self.device)
        self.validate_loader = self.data_loader_factory.build(
            vid=False, split='val', device=self.device)

        if final_validate:
            self.n_crop = cfg.get_int(
                'temporal_transforms.validate.final_n_crop')
        else:
            self.n_crop = cfg.get_int('temporal_transforms.validate.n_crop')

        self.criterion = nn.CrossEntropyLoss()

        self.learning_rate = self.cfg.get_float('optimizer.lr')
        optimizer_type = self.cfg.get_string('optimizer.type', default='sgd')
        if optimizer_type == 'sgd':
            self.optimizer = torch.optim.SGD(
                self.model.parameters(),
                lr=self.learning_rate,
                momentum=self.cfg.get_float('optimizer.momentum'),
                dampening=self.cfg.get_float('optimizer.dampening'),
                weight_decay=self.cfg.get_float('optimizer.weight_decay'),
                nesterov=self.cfg.get_bool('optimizer.nesterov'),
            )
        elif optimizer_type == 'adam':
            self.optimizer = torch.optim.Adam(
                self.model.parameters(),
                lr=self.learning_rate,
                eps=self.cfg.get_float('optimizer.eps'),
            )
        else:
            raise ValueError(f'Unknown optimizer {optimizer_type})')

        self.num_epochs = cfg.get_int('num_epochs')
        self.schedule_type = self.cfg.get_string('optimizer.schedule')
        if self.schedule_type == "plateau":
            self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer=self.optimizer,
                mode='min',
                patience=self.cfg.get_int('optimizer.patience'),
                verbose=True)
        elif self.schedule_type == "multi_step":
            self.scheduler = torch.optim.lr_scheduler.MultiStepLR(
                optimizer=self.optimizer,
                milestones=self.cfg.get("optimizer.milestones"),
            )
        elif self.schedule_type == "cosine":
            self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer=self.optimizer,
                T_max=self.num_epochs,
                eta_min=self.learning_rate / 1000)
        elif self.schedule_type == 'none':
            self.scheduler = torch.optim.lr_scheduler.LambdaLR(
                optimizer=self.optimizer,
                lr_lambda=lambda epoch: 1,
            )
        else:
            raise ValueError("Unknow schedule type")

        self.arch = cfg.get_string('model.arch')

        if local_rank == 0:
            self.summary_writer = SummaryWriter(
                log_dir=str(args.experiment_dir))
        else:
            self.summary_writer = None

        self.best_acc1 = 0.
        self.current_epoch = 0
        self.next_epoch = None
        logger.info('Engine: n_crop=%d', self.n_crop)

        self.checkpoint_manager = CheckpointManager(self.args.experiment_dir,
                                                    keep_interval=None)
        self.loss_meter = None
Esempio n. 26
0
    def __init__(self, config, experiment_id, load_from_directory=None):
        """
        Initializes an experiment by initializing its variables, loading the corresponding dataset, loading the
        language information, and instantiating the model and the optimizer.

        :param config: The configurations of this experiment as a dictionary object.
        :param experiment_id: A unique identifier for this experiment.
        :param load_from_directory: A string indicating the path to a directory. If omitted, a new experiment is
        created. Else, an already serialized experiment will be read from the directory that is pointed to by this
        argument.
        """
        self.config = config
        self.id = experiment_id
        self.current_epoch = 0
        self.best_test_score = float('inf')
        self.best_epoch_number = -1
        self.loss_function = self.AutoPaddedLoss(
            torch.nn.CrossEntropyLoss(ignore_index=Alphabet.unknown_integer))
        self.model = None
        self.optimizer = None

        # Find the corresponding dataset.
        assert config[consts.DATASET] in [consts.SIGMORPHON2020]
        if config[consts.DATASET] == consts.SIGMORPHON2020:
            # Create a data loader factory.
            data_loader_factory = SigmorphonData_Factory(
                config[consts.SIGMORPHON2020_ROOT],
                config[consts.LANGUAGE_INFO_FILE])
            # Create a data loader for the training data.
            logging.getLogger(
                consts.MAIN).info('Creating the training dataset.')
            dataloader_kwargs = {
                'batch_size': self.config[consts.BATCH_SIZE],
                'collate_type': 'unpacked'
            }
            self.train_loader = data_loader_factory.get_dataset(
                type=[consts.TRAIN],
                families=self.config[consts.LANGUAGE_FAMILIES],
                languages=self.config[consts.LANGUAGES],
                dataloader_kwargs=dataloader_kwargs)
            # Create a data loader for the testing data.
            logging.getLogger(
                consts.MAIN).info('Creating the testing dataset.')
            self.test_loader = data_loader_factory.get_dataset(
                type=[consts.DEV],
                families=self.config[consts.LANGUAGE_FAMILIES],
                languages=self.config[consts.LANGUAGES],
                dataloader_kwargs=dataloader_kwargs)
        else:
            raise Exception('Unsupported dataset.')

        # Instantiate the model indicated by the configurations.
        self.model = ModelFactory.create_model(
            config[consts.MODEL_ARCHITECTURE], self.config,
            self.train_loader.dataset.get_dimensionality())

        # Move to the preferred device
        self.loss_function = self.loss_function.to(self.config[consts.DEVICE])
        self.model = self.model.to(self.config[consts.DEVICE])
        if self.config[consts.DATA_PARALLEL]:
            self.model = torch.nn.DataParallel(self.model)

        # Instantiate the optimizer indicated by the configurations.
        self.optimizer = OptimizerFactory.create_optimizer(
            config[consts.OPTIMIZER], self.model, self.config)

        # If the `load_from_directory` argument is given, load the state of the experiment from a file.
        if load_from_directory is not None:
            self.deserialize(load_from_directory)
#!/usr/bin/env python
# coding: utf-8

# In[1]:

import cv2

# In[2]:

from models import ModelType, ModelFactory

# In[3]:

model = ModelFactory(rgbpath='trained_models/rgblstm.h5',
                     trained=True).getModel(ModelType.RGB)

# In[4]:

model.summary()

# In[5]:

from keras.layers import Input
from keras.models import Model

# In[6]:

rgbinput = Input((150, 100, 3))

x = model.layers[1].layer(rgbinput)
for layer in model.layers[2:-3]:
Esempio n. 28
0
def main():

    print("\n###############################################################")
    print("##########################DATA PREPARATION#####################")
    print("###############################################################\n")
    ROOT_DIR = os.getcwd()
    print(ROOT_DIR)
    INPUT_DIR = os.path.join(ROOT_DIR, config.INPUT_FOLDER)
    print(INPUT_DIR)
    PATIENTS_INFO = os.path.join(INPUT_DIR, config.INFO_PATIENTS)
    print(PATIENTS_INFO)

    IMAGES_REGEX = os.path.join(INPUT_DIR, config.IMAGES_ACESS)
    images_paths = config_func.getImages(IMAGES_REGEX)
    print(images_paths[:5])

    data = pd.read_csv(PATIENTS_INFO)
    print(data.iloc[0])
    data = data.sort_values(config.IMAGE_ID, ascending=True)
    print(data.head(5))

    #ADD NEW COLUMN (PATH IMAGE) AND POPULATE WITH COHERENT PATH FOR EACH IMAGE
    data = config_func.addNewColumn_Populate_DataFrame(data, config.PATH, images_paths)
    data = data.sort_index()
    print(data.head(5))
    print(data.iloc[0][config.PATH])

    #IMPUTATE NULL VALUES
    data = config_func.impute_null_values(data, config.AGE, mean=True)
    print(data.isnull().sum())
    print(data.head(5))
    data.dx = data.dx.astype('category')
    print(data.info())

    #GET IMAGE DATASET WITH SPECIFIC SIZE
    X, Y = config_func.getDataFromImages(dataframe=data, size=config.WANTED_IMAGES)
    print(X.shape)
    print(Y.shape)
    #number_by_perc = [sum(Y == i) for i in range(len(data.dx.unique()))]

    # STRATIFY X_TEST, X_VAL AND X_TEST
    indexes = np.arange(X.shape[0])
    X_train, X_val, y_train, y_val, indeces_train, indices_val = train_test_split(X, Y, indexes, test_size=config.VALIDATION_SPLIT, shuffle=True,
                                                      random_state=config.RANDOM_STATE, stratify=Y)
    indexes = indeces_train
    X_train, X_test, y_train, y_test, indices_train, indices_test = train_test_split(X_train, y_train, indexes, test_size=config.TEST_SPLIT,
                                                        shuffle=True, random_state=config.RANDOM_STATE, stratify=y_train)

    print(X_train.shape)
    print(y_train.shape)
    print(X_val.shape)
    print(y_val.shape)
    print(X_test.shape)
    print(y_test.shape)


    if config.FLAG_SEGMENT_IMAGES == 1:
        ## ---------------------------U-NET APPLICATION ------------------------------------
        dataset = Data.Data(X_train=X_train, X_val=X_val, X_test=X_test,
                         y_train=y_train, y_val=y_val, y_test=y_test)
        unet_args = (0, 0) # args doesn't matter --> any tuple is valid here, only in U-Net model

        fact = ModelFactory.ModelFactory()
        unet = fact.getModel(config.U_NET, dataset, *unet_args) # args doesn't matter

        ## check save and load predictions array to file
        PREDICTIONS_TEMP_FILE_PATH = os.path.join(INPUT_DIR, config.TEMP_ARRAYS)
        if os.path.exists(PREDICTIONS_TEMP_FILE_PATH):
            with open(PREDICTIONS_TEMP_FILE_PATH, 'rb') as f:
                predictions = np.load(f)
        else: ## if not exists
            with open(PREDICTIONS_TEMP_FILE_PATH, 'wb') as f:
                model, predictions, history = unet.template_method()
                predictions = np.array(predictions) ## transform list to numpy array
                np.save(f, predictions)

        ## create folder if not exists
        masks_path_folder = os.path.join(INPUT_DIR, config.MASKS_FOLDER)
        if not os.path.exists(masks_path_folder):
            os.makedirs(masks_path_folder)
        if not os.listdir(masks_path_folder): ## if folder is empty (no images inside)
            ## insert mask images in mask folder
            for i in range(predictions.shape[0]):
                cv2.imwrite(os.path.join(masks_path_folder, data.at[indices_train[i], config.IMAGE_ID]+'.jpg'), predictions[i])

        # plt.figure(figsize=(16, 16))
        # plt.imshow(cv2.cvtColor(self.data.X_train[2], cv2.COLOR_BGR2RGB))
        # plt.title('Original Image')
        # plt.show()
        # plt.imshow(mask, plt.cm.binary_r)
        # plt.title('Binary Mask')
        # plt.show()
        # plt.imshow(cv2.cvtColor(concatenated_mask, cv2.COLOR_BGR2RGB))
        # plt.title('Segmented Image')
        # plt.show()

    # NORMALIZE DATA
    X_train, X_val, X_test = config_func.normalize(X_train, X_val, X_test)

    # ONE HOT ENCODING TARGETS
    y_train, y_val, y_test = config_func.one_hot_encoding(y_train, y_val, y_test)

    print("\n###############################################################")
    print("##########################CLASSIFICATION#######################")
    print("###############################################################\n")

    # CREATION OF DATA OBJECT
    data_obj = Data.Data(X_train=X_train, X_val=X_val, X_test=X_test,
                         y_train=y_train, y_val=y_val, y_test=y_test)

    ## INSTANCE OF MODEL FACTORY
    model_fact = ModelFactory.ModelFactory()

    ## STRATEGIES OF TRAIN INSTANCES
    undersampling = UnderSampling.UnderSampling()
    oversampling = OverSampling.OverSampling()
    data_augment = DataAugmentation.DataAugmentation()

    ## ---------------------------ALEXNET APPLICATION ------------------------------------

    ## DEFINITION OF NUMBER OF CNN AND DENSE LAYERS
    args = (6,1)

    # CREATE MODEL
    alexNet = model_fact.getModel(config.ALEX_NET, data_obj, *args)

    # APPLY STRATEGIES OF TRAIN
    #alexNet.addStrategy(undersampling)
    alexNet.addStrategy(oversampling)
    alexNet.addStrategy(data_augment)

    # VALUES TO POPULATE ON CONV AND DENSE LAYERS
    # definition of args to pass to template_method (conv's number of filters, dense neurons and batch size)
    alex_args = (
        3, # number of normal convolutional layer (+init conv)
        1, # number of stack cnn layers
        73, # number of feature maps of initial conv layer
        23, # growth rate
        1, # number of FCL Layers
        65, # number neurons of Full Connected Layer
        12# batch size
    )

    # APPLY BUILD, TRAIN AND PREDICT
    #model, predictions, history = alexNet.template_method(*alex_args)
    #alexNet.save(model, config.ALEX_NET_WEIGHTS_FILE)

    ## PLOT FINAL RESULTS
    #config_func.print_final_results(data_obj.y_test, predictions, history, dict=False)

    ## ---------------------------VGGNET APPLICATION ------------------------------------

    ## DEFINITION OF NUMBER OF CNN AND DENSE LAYERS
    vggLayers = (5, 1)

    ## GET VGGNET MODEL
    vggnet = model_fact.getModel(config.VGG_NET, data_obj, *vggLayers)

    ## ATTRIBUTION OS TRAIN STRATEGIES
    vggnet.addStrategy(oversampling)
    vggnet.addStrategy(data_augment)

    # VALUES TO POPULATE ON CONV AND DENSE LAYERS
    vgg_args = (
        4,  # number of stack cnn layers (+ init stack)
        71,  # number of feature maps of initial conv layer
        18,  # growth rate
        1, # number of FCL Layers
        61,  # number neurons of Full Connected Layer
        12 # batch size
    )

    # APPLY BUILD, TRAIN AND PREDICT
    #model, predictions, history = vggnet.template_method(*vgg_args)
    #vggnet.save(model, config.VGG_NET_WEIGHTS_FILE)

    ## PLOT FINAL RESULTS
    #config_func.print_final_results(data_obj.y_test, predictions, history, dict=False)

    ## ---------------------------RESNET APPLICATION ------------------------------------

    # number of conv and dense layers respectively
    number_cnn_dense = (5, 1)

    # creation of ResNet instance
    resnet = model_fact.getModel(config.RES_NET, data_obj, *number_cnn_dense)

    # apply strategies to resnet
    resnet.addStrategy(oversampling)
    resnet.addStrategy(data_augment)

    # definition of args to pass to template_method (conv's number of filters, dense neurons and batch size)
    resnet_args = (
        56,  # number of filters of initial CNN layer
        4,  # number of consecutive conv+identity blocks
        2, # number of identity block in each (conv+identity) block
        42,  # growth rate
        12,  # batch size
    )

    # APPLY BUILD, TRAIN AND PREDICT
    #model, predictions, history = resnet.template_method(*resnet_args)
    #resnet.save(model, config.RES_NET_WEIGHTS_FILE)

    ## PLOT FINAL RESULTS
    #config_func.print_final_results(data_obj.y_test, predictions, history, dict=False)

    ## ---------------------------DENSENET APPLICATION ------------------------------------

    # # DICTIONARIES DEFINITION
    numberLayers = (
        4,  # BLOCKS
        1  # DENSE LAYERS
    )

    valuesLayers = (
        59,  # initial number of Feature Maps
        4,  # number of dense blocks
        5,  # number of layers in each block
        11,  # growth rate
        1.0,  # compression rate
        21  # batch size
    )

    densenet = model_fact.getModel(config.DENSE_NET, data_obj, *numberLayers)

    densenet.addStrategy(oversampling)
    densenet.addStrategy(data_augment)


    #model, predictions, history = densenet.template_method(*valuesLayers)
    #densenet.save(model, config.DENSE_NET_WEIGHTS_FILE)

    #config_func.print_final_results(data_obj.y_test, predictions, history)

    ## --------------------------- ENSEMBLE OF MODELS ------------------------------------

    # get weights of all methods from files
    alexNet2 = load_model(config.ALEX_NET_WEIGHTS_FILE)
    vggnet2 = load_model(config.VGG_NET_WEIGHTS_FILE)
    #vggnet2.name = 'model_2'
    #vggnet.save(vggnet2, config.VGG_NET_WEIGHTS_FILE)
    resnet2 = load_model(config.RES_NET_WEIGHTS_FILE)
    #resnet2.name = 'model_3'
    #resnet.save(resnet2, config.RES_NET_WEIGHTS_FILE)
    densenet2 = load_model(config.DENSE_NET_WEIGHTS_FILE)
    #densenet2.name = 'model_4'
    #densenet.save(densenet2, config.DENSE_NET_WEIGHTS_FILE)

    models = [alexNet2, vggnet2, resnet2, densenet2]

    ##call ensemble method
    ensemble_model = config_func.ensemble(models=models)
    predictions = ensemble_model.predict(data_obj.X_test)
    argmax_preds = np.argmax(predictions, axis=1)  # BY ROW, BY EACH SAMPLE
    argmax_preds = keras.utils.to_categorical(argmax_preds)

    ## print final results
    config_func.print_final_results(data_obj.y_test, argmax_preds, history=None, dict=True)

    # save ensemble model
    ensemble_model.save(config.ENSEMBLE_ALL)
    del ensemble_model
Esempio n. 29
0
def _choose_slave(args):
    # prob not threadsafe
    X, y, X_val, y_val, model_type, m, params = args
    model = ModelFactory.create_model(model_type, m, **params)
    model.fit(X, y)
    return (model.score(X_val, y_val), model, params)
Esempio n. 30
0
def main():

    print("\n###############################################################")
    print("##########################DATA PREPARATION#####################")
    print("###############################################################\n")

    # acess image data
    PROJECT_DIR = os.getcwd()
    INPUT_DIR = os.path.join(PROJECT_DIR,
                             config.INPUT_DIR)  # path of input directory
    IMAGES_DIR = os.path.join(INPUT_DIR, config.IMAGES_ACESS)

    # define paths for all classes (stroma, tumor, mucosa, empty, lympho, adipose, complex, debris)
    STROMA_FOLDER = os.path.join(IMAGES_DIR, config.STROMA_DIR,
                                 config.IMAGES_REGEX)
    TUMOR_FOLDER = os.path.join(IMAGES_DIR, config.TUMOR_DIR,
                                config.IMAGES_REGEX)
    MUCOSA_FOLDER = os.path.join(IMAGES_DIR, config.MUCOSA_DIR,
                                 config.IMAGES_REGEX)
    EMPTY_FOLDER = os.path.join(IMAGES_DIR, config.EMPTY_DIR,
                                config.IMAGES_REGEX)
    LYMPHO_FOLDER = os.path.join(IMAGES_DIR, config.LYMPHO_DIR,
                                 config.IMAGES_REGEX)
    ADIPOSE_FOLDER = os.path.join(IMAGES_DIR, config.ADIPOSE_DIR,
                                  config.IMAGES_REGEX)
    COMPLEX_FOLDER = os.path.join(IMAGES_DIR, config.COMPLEX_DIR,
                                  config.IMAGES_REGEX)
    DEBRIS_FOLDER = os.path.join(IMAGES_DIR, config.DEBRIS_DIR,
                                 config.IMAGES_REGEX)
    LIST_CLASSES_FOLDER = [
        STROMA_FOLDER, TUMOR_FOLDER, MUCOSA_FOLDER, EMPTY_FOLDER,
        LYMPHO_FOLDER, ADIPOSE_FOLDER, COMPLEX_FOLDER, DEBRIS_FOLDER
    ]

    # get images from all folders
    # classes targets --> 0: Stroma, 1: Tumor, 2: Mucosa, 3: Empty, 4: Lympho, 5: Adipose, 6: Complex, 7: Debris
    images = []
    labels = []
    for i, j in zip(LIST_CLASSES_FOLDER, range(config.NUMBER_CLASSES)):
        images.append(config_func.getImages(i))
        labels.extend([j for i in range(len(images[j]))])

    # flatten images list
    images = [path for sublist in images for path in sublist]

    # construct DataFrame with two columns: (image_path, target)
    data = pd.DataFrame(list(zip(images, labels)),
                        columns=[config.IMAGE_PATH, config.TARGET])

    # subsample data, if not wanted, rate 1 should be passed
    if config.SUBSAMPLE_PERCENTAGE != 1:
        data = config_func.get_subsample_of_data(1, data)
        print(data.head(5))
        print(data.shape)
        print(data[config.TARGET].value_counts())

    # get pixel data from images and respectives targets
    X, Y = config_func.resize_images(config.WIDTH, config.HEIGHT, data)
    print(X.shape)
    print(Y.shape)

    # STRATIFY X_TEST, X_VAL AND X_TEST
    X_train, X_val, y_train, y_val = train_test_split(
        X,
        Y,
        test_size=config.VALIDATION_SPLIT,
        shuffle=True,
        random_state=config.RANDOM_STATE,
        stratify=Y)

    X_train, X_test, y_train, y_test = train_test_split(
        X_train,
        y_train,
        test_size=config.TEST_SPLIT,
        shuffle=True,
        random_state=config.RANDOM_STATE,
        stratify=y_train)

    # normalization of data
    X_train, X_val, X_test = config_func.normalize(X_train, X_val, X_test)

    # one-hot encoding targets
    y_train, y_val, y_test = config_func.one_hot_encoding(y_train=y_train,
                                                          y_val=y_val,
                                                          y_test=y_test)

    print("\n###############################################################")
    print("##########################CLASSIFICATION#######################")
    print("###############################################################\n")

    # creation of Data instance
    data_obj = Data.Data(X_train=X_train,
                         X_val=X_val,
                         X_test=X_test,
                         y_train=y_train,
                         y_val=y_val,
                         y_test=y_test)

    # creation of Factory model's instance
    model_factory = ModelFactory.ModelFactory()

    # creation of Factory optimization algorithms instance
    optimization_factory = OptimizerFactory.OptimizerFactory()

    # definition of train strategies instances
    data_augment = DataAugmentation.DataAugmentation()

    ## ---------------------------ALEXNET APPLICATION ------------------------------------

    # number of conv layers and dense respectively
    alex_number_layers = (5, 1)

    # creation of AlexNet instance
    alexNet = model_factory.getModel(config.ALEX_NET, data_obj,
                                     *alex_number_layers)

    # apply strategies to alexNet
    alexNet.addStrategy(data_augment)

    # definition of args to pass to template_method (conv's number of filters, dense neurons and batch size)
    alex_args = (
        2,  # number of normal convolutional layer (+init conv)
        2,  # number of stack cnn layers
        70,  # number of feature maps of initial conv layer
        19,  # growth rate
        1,  # number of FCL Layers
        43,  # number neurons of Full Connected Layer
        9  # batch size
    )

    # apply build, train and predict
    #model, predictions, history = alexNet.template_method(*alex_args)
    ##alexNet.save(model, config.ALEX_NET_WEIGHTS_FILE)

    # print final results
    #config_func.print_final_results(y_test=data_obj.y_test, predictions=predictions, history=history, dict=False)

    ## ---------------------------VGGNET APPLICATION ------------------------------------

    # number of conv layers and dense respectively
    vgg_number_layers = (4, 1)

    # creation of VGGNet instance
    vggnet = model_factory.getModel(config.VGG_NET, data_obj,
                                    *vgg_number_layers)

    # apply strategies to vggnet
    vggnet.addStrategy(data_augment)

    # definition of args to pass to template_method (conv's number of filters, dense neurons and batch size)

    vgg_args = (
        4,  # number of stack cnn layers (+ init stack)
        64,  # number of feature maps of initial conv layer
        12,  # growth rate
        1,  # number of FCL Layers
        16,  # number neurons of Full Connected Layer
        config.BATCH_SIZE_ALEX_AUG  # batch size
    )

    # apply build, train and predict
    #model, predictions, history = vggnet.template_method(*vgg_args)
    ##vggnet.save(model, config.VGG_NET_WEIGHTS_FILE)

    # print final results
    #config_func.print_final_results(y_test=data_obj.y_test, predictions=predictions, history=history, dict=False)

    ## ---------------------------RESNET APPLICATION ------------------------------------

    # number of conv and dense layers respectively
    number_cnn_dense = (5, 1)

    # creation of ResNet instance
    resnet = model_factory.getModel(config.RES_NET, data_obj,
                                    *number_cnn_dense)

    # apply strategies to resnet
    resnet.addStrategy(data_augment)

    # definition of args to pass to template_method (conv's number of filters, dense neurons and batch size)
    resnet_args = (
        48,  # number of filters of initial CNN layer
        4,  # number of consecutive conv+identity blocks
        0,  # repetition of identity block's, by default resnet-18 is 1 (1conv block + 1 identity block) for all layers
        8,  # growth rate
        config.BATCH_SIZE_ALEX_AUG,  # batch size
    )

    # apply build, train and predict
    #model, predictions, history = resnet.template_method(*resnet_args)
    ##resnet.save(model, config.RES_NET_WEIGHTS_FILE)

    # print final results
    #config_func.print_final_results(y_test=data_obj.y_test, predictions=predictions, history=history, dict=False)

    ## ---------------------------DENSENET APPLICATION ------------------------------------

    # # DICTIONARIES DEFINITION
    numberLayers = (
        4,  #BLOCKS
        1  #DENSE LAYERS
    )

    valuesLayers = (
        24,  # initial number of Feature Maps
        4,  # number of dense blocks
        5,  # number of layers in each block
        12,  # growth rate
        0.5,  # compression rate
        config.BATCH_SIZE_ALEX_AUG  # batch size
    )

    densenet = model_factory.getModel(config.DENSE_NET, data_obj,
                                      *numberLayers)

    densenet.addStrategy(data_augment)

    model, predictions, history = densenet.template_method(*valuesLayers)

    config_func.print_final_results(data_obj.y_test, predictions, history)
Esempio n. 31
0
def main(
        config_path: str = 'configs/classification.yaml',
        dataset_name: str = 'svhn',
        imbalance_ratio: int = 1,
        oversampling: str = 'none',  # none, oversampling, gan
        ada: bool = False,  # only for gan training
        seed: int = 1,  # No seed if 0
        wandb_logs: bool = False,
        test: bool = False,
        load_model: bool = False):
    # Ensure output directory exists
    if not os.path.exists(OUTPUT_PATH):
        os.mkdir(OUTPUT_PATH)

    # Set a seed
    if seed:
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        np.random.seed(seed)
        random.seed(seed)

    # Load configuration
    logger.info(f'Loading config at "{config_path}"...')
    config = load_config(config_path, dataset_name, imbalance_ratio,
                         oversampling, ada, load_model)

    if config.trainer.task == 'generation' and test:
        raise ValueError('Cannot test the generation models')

    # Init logging with WandB
    mode = 'offline' if wandb_logs else 'disabled'
    wandb.init(mode=mode,
               dir=OUTPUT_PATH,
               entity=WANDB_TEAM,
               project=PROJECT_NAME,
               group=config.trainer.task,
               config=dataclasses.asdict(config))

    # Load model
    logger.info('Loading model...')
    model = ModelFactory.create(model_config=config.model,
                                n_classes=len(config.dataset.classes))

    # Load dataset
    logger.info('Loading dataset...')
    train_dataset, valid_dataset, test_dataset = DatasetFactory.create(
        dataset_config=config.dataset)

    # Instatiate trainer
    logger.info('Loading trainer...')
    trainer = TrainerFactory.create(trainer_config=config.trainer,
                                    train_dataset=train_dataset,
                                    valid_dataset=valid_dataset,
                                    model=model,
                                    classes=config.dataset.classes)

    if test:
        logger.info('Testing...')
        trainer.test(test_dataset)
    else:
        logger.info('Training...')
        trainer.train()

    # Cleanup
    wandb.finish()

    logger.info('done :)')
def train(train_metadata_file_path,
          val_metadata_file_path,
          images_dir_path,
          out_dir,
          model_arch,
          num_classes,
          label_name=None,
          sequence_image_count=1,
          data_pipeline_mode="mode_flat_all",
          class_weight=None,
          whole_epochs=100,
          batch_size=32,
          learning_rate=0.001,
          patience=2,
          min_delta_auc=0.01,
          input_size=(224, 224, 3)):
    """
    Train a VGG16 model based on single image.

    :param train_metadata_file_path: The path to the metadata '.csv' file containing training image names.
    :param val_metadata_file_path: The path to the metadata '.csv' file containing validation image names.
    :param images_dir_path: The path containing the images.
    :param out_dir: The path to which the saved models need to be written.
    :param model_arch: The model architecture provided as string, which are present in the 'models' module.
    :param num_classes: The number of classes present in the data. If num_classes=1, it requires the 'label_name'.
    :param label_name: Required if num_classes=1. The name of the label to pick from the data.
    :param sequence_image_count: The number of images in the sequence dataset. Default: 1.
    :param data_pipeline_mode: The mode of the data pipeline. Default: "mode_flat_all".
    :param class_weight: The class_weights for imbalanced data. Example: {0: 1.0, 1: 0.5}, if class "0" is twice less
        represented than class "1" in your data. Default: None.
    :param whole_epochs: The maximum number of epochs to be trained. Note that the model maybe early-stopped. Default: 100.
    :param batch_size: The batch size used for the data. Ensure that it fits within the GPU memory. Default: 32.
    :param learning_rate: The constant learning rate to be used for the Adam optimizer. Default: 0.001.
    :param patience: The number of epochs (full train dataset) to wait before early stopping. Default: 2.
    :param min_delta_auc: The minimum delta of validation auc for early stopping after patience. Default: 0.01.
    :param input_size: The shape of the tensors returned by the data pipeline mode. Default: (224, 224, 3).

    """
    if num_classes == 1 and label_name is None:
        raise ValueError(
            "Since num_classes equals 1, the label_name must be provided.")

    train_data_epoch_subdivisions = 4
    early_stop_monitor = "val_auc"
    early_stop_min_delta = min_delta_auc
    early_stop_patience = patience * train_data_epoch_subdivisions  # One run through the train dataset.
    prefetch_buffer_size = 3  # Can be also be set to tf.data.experimental.AUTOTUNE

    os.makedirs(out_dir)

    # Build model architecture.
    model_factory = ModelFactory()
    model = model_factory.get_model(model_arch,
                                    input_size,
                                    is_training=True,
                                    num_classes=num_classes,
                                    learning_rate=learning_rate)
    print("Created the model architecture: %s" % model.name)
    model.summary()

    # Prepare the training dataset.
    print("Preparing training and validation datasets.")
    train_data_pipeline = PipelineGenerator(
        train_metadata_file_path,
        images_dir_path,  # XXX: This function calls requires this path to end with slash.
        # This needs to be handled in the PipelineGenerator.
        is_training=True,
        sequence_image_count=sequence_image_count,
        label_name=label_name,
        mode=data_pipeline_mode)
    train_dataset = train_data_pipeline.get_pipeline()
    train_dataset = train_dataset.batch(batch_size).prefetch(
        prefetch_buffer_size)

    # Prepare the validation dataset
    val_data_pipeline = PipelineGenerator(
        val_metadata_file_path,
        images_dir_path,
        is_training=False,
        sequence_image_count=sequence_image_count,
        label_name=label_name,
        mode=data_pipeline_mode)
    val_dataset = val_data_pipeline.get_pipeline()
    val_dataset = val_dataset.batch(batch_size).prefetch(prefetch_buffer_size)

    # TODO: Find a way to log the activation maps, either during training, or after the training has completed.

    # Prepare the callbacks.
    print("Preparing Tensorflow Keras Callbacks.")
    earlystop_callback = keras.callbacks.EarlyStopping(
        monitor=early_stop_monitor,
        min_delta=early_stop_min_delta,
        patience=early_stop_patience)

    # XXX: We use the HDF5 method to store the sequence models due to a bug in tensorflow TimeDistributed wrapper
    if data_pipeline_mode in PipelineGenerator.TIMESTEP_MODES:
        model_extension = ".h5"
    else:
        model_extension = ".ckpt"

    best_model_checkpoint_auc_callback = keras.callbacks.ModelCheckpoint(
        filepath=os.path.join(out_dir, "best_model_dir-auc" + model_extension),
        mode='max',
        monitor='val_auc',
        save_best_only=True,
        save_weights_only=False,
        verbose=1)
    best_model_checkpoint_loss_callback = keras.callbacks.ModelCheckpoint(
        filepath=os.path.join(out_dir,
                              "best_model_dir-loss" + model_extension),
        mode='min',
        monitor='val_loss',
        save_best_only=True,
        save_weights_only=False,
        verbose=1)

    tensorboard_callback = keras.callbacks.TensorBoard(log_dir=os.path.join(
        out_dir, "TBGraph"),
                                                       write_graph=True,
                                                       write_images=True)

    callbacks = [
        earlystop_callback, best_model_checkpoint_auc_callback,
        best_model_checkpoint_loss_callback, tensorboard_callback
    ]

    # Start model training.
    # Defining an 'epoch' to be a quarter of the train dataset.
    num_train_samples = train_data_pipeline.get_size()
    num_val_samples = val_data_pipeline.get_size()
    # Number of batches per one run through the train dataset.
    num_training_steps_per_whole_dataset = int(num_train_samples / batch_size)
    num_val_steps_per_whole_dataset = int(num_val_samples / batch_size)
    steps_per_epoch = int(num_training_steps_per_whole_dataset /
                          train_data_epoch_subdivisions)
    max_num_epochs = int(whole_epochs * train_data_epoch_subdivisions)
    max_train_steps = int(max_num_epochs * steps_per_epoch)

    print(
        "Number of train samples: %s, which correspond to  ~%s batches for one complete run through the "
        "train dataset. Number of validation samples: %s, which correspond to ~%s batches for complete iteration. "
        "Considering a 1/%s fraction of the train dataset as an epoch (steps_per_epoch: %s) "
        "after which validation and model checkpoints are saved. Running training for a maximum of %s steps, "
        "which correspond to max_num_epochs: %s (whole_epochs: %s). "
        "Early stopping has been set based on '%s' of min_delta of %s with a patience of %s."
        % (num_train_samples, num_training_steps_per_whole_dataset,
           num_val_samples, num_val_steps_per_whole_dataset,
           train_data_epoch_subdivisions, steps_per_epoch, max_train_steps,
           max_num_epochs, whole_epochs, early_stop_monitor,
           early_stop_min_delta, early_stop_patience))

    print("\nStarting the model training.")
    start_time = time.time()

    model.fit(train_dataset,
              epochs=max_num_epochs,
              steps_per_epoch=steps_per_epoch,
              validation_data=val_dataset,
              validation_steps=num_val_steps_per_whole_dataset,
              callbacks=callbacks,
              class_weight=class_weight)

    time_taken = time.time() - start_time
    print(
        "Training completed and the output has been saved in %s. Time taken: %s seconds."
        % (out_dir, time_taken))
Esempio n. 33
0
                                  pin_memory=True)

    dataloader_valid = DataLoader(dset_valid,
                                  batch_size=BATCH_SIZE,
                                  sampler=sampler_valid,
                                  num_workers=N_WORKERS,
                                  pin_memory=True)

    dataloader_test = DataLoader(dset_test,
                                 batch_size=BATCH_SIZE,
                                 sampler=sampler_test,
                                 num_workers=N_WORKERS,
                                 pin_memory=True)

    # Create model    
    model = ModelFactory.create(config)    
    model.to(device, dtype=dtype)
    lr = float(LEARNING_RATE)
    optimizer = model.get_optimizer(model, lr)
    lr_scheduler = model.get_lr_scheduler(optimizer)       

    if DPATH_LOAD_CKPT:
        if not fpath_load_ckpt:
            fpath_load_ckpt = get_ckpt(DPATH_LOAD_CKPT, LOAD_POLICY) #get_best_ckpt_with_criterion(dpath_load_ckpt, LOAD_POLICY)
        load_model(fpath_load_ckpt, model)
        print("[%s]"%(LOAD_POLICY.upper()), fpath_load_ckpt, "has been loaded...")
    # end of if
    model = nn.DataParallel(model)

    loss_ce = nn.CrossEntropyLoss()   
    def classification_loss(logits, target_labels):
Esempio n. 34
0
def test_with_cover_stego_biased_proportions(prob_cover=0.99, prob_stego=0.01):
    print("[Testing with prob_stego: %.3f" % (prob_stego))
    # Create dataset and dataloader
    dset_test = TimitTestSet(dpath_cover,
                             dpath_stego,
                             seed=SEED,
                             prob_cover=prob_cover,
                             prob_stego=prob_stego,
                             dtype=np.float32)
    n_data = len(dset_test)
    n_train = math.floor(0.8 * n_data)
    ix_end_valid = n_train
    indices = np.arange(n_data)
    sampler_test = SubsetRandomSampler(indices[ix_end_valid:])

    # Create dataloader_train
    dataloader_test = DataLoader(dset_test,
                                 batch_size=BATCH_SIZE,
                                 sampler=sampler_test,
                                 num_workers=N_WORKERS,
                                 pin_memory=True)

    # Create model
    model = ModelFactory.create(config)
    model.to(device, dtype=dtype)
    model = nn.DataParallel(model)

    global fpath_load_ckpt
    if dpath_load_ckpt:
        if not fpath_load_ckpt:
            fpath_load_ckpt = get_ckpt(dpath_load_ckpt, policy=LOAD_POLICY)

        load_model(fpath_load_ckpt, model)
        print("[%s]" % LOAD_POLICY.upper(), fpath_load_ckpt,
              "has been loaded...")
    elif fpath_load_ckpt:
        load_model(fpath_load_ckpt, model)

    loss_ce = nn.CrossEntropyLoss()

    def classification_loss(logits, target_labels):
        return loss_ce(logits, target_labels)

    def classify(model, batch):
        audios, labels = batch
        audios = audios.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        logits = model(audios)
        loss = classification_loss(logits, labels)
        acc = accuracy_top1(logits, labels)
        ps = labels.to(torch.float32).mean().item()

        return (logits, loss.item(), acc, ps)

    def compute_rates(labels_pred, labels_true):
        # Calculate confusion matrix
        cm = confusion_matrix(labels_pred, labels_true, labels=(1, 0))

        tp = cm[0, 0]  # True stegos  (stegos)
        tn = cm[1, 1]  # True covers  (covers)
        fp = cm[0, 1]  # False stegos (covers)
        fn = cm[1, 0]  # False covers (stegos)

        p = tp + fn
        n = tn + fp
        tpr = tp / p  # Sensitivity
        fpr = fp / n  # False alarm (1 - specificity)
        tnr = tn / n  # Specificity
        fnr = fn / p  # Miss rate

        return tpr, fpr, tnr, fnr

    # Lists for statistics
    list_stats = []
    list_acc = []
    list_loss = []
    list_prob = []

    # Lists for true labels, scores, predictions
    list_scores = []
    list_labels = []

    num_audios = 0
    model.eval()

    for epoch in tqdm.tqdm(range(N_REPEATS)):
        # Testing model
        sum_acc = 0
        sum_loss = 0
        sum_prob_stego = 0
        list_single_test_preds = []
        list_single_test_labels = []

        for step, batch in enumerate(dataloader_test):
            num_audios += 2 * len(batch)
            with torch.no_grad():
                # ps denotes prob. of fetching stegos
                logits, loss, acc, ps = classify(model, batch)
                sum_acc += acc
                sum_loss += loss
                sum_prob_stego += ps

                # Compute score for roc_curve
                sm = torch.softmax(logits, dim=0)
                list_scores.append(sm[:, 1].cpu().numpy())

                _, labels = batch
                list_single_test_labels.append(labels.cpu().numpy())

                preds = logits.topk(1).indices.view(-1)  # Predictions
                list_single_test_preds.append(preds.cpu().numpy())

        # end of for

        avg_acc = sum_acc / len(dataloader_test)
        avg_loss = sum_loss / len(dataloader_test)
        avg_prob_stego = sum_prob_stego / len(dataloader_test)

        # Compute the rates
        labels_pred = np.concatenate(list_single_test_preds)
        labels_true = np.concatenate(list_single_test_labels)
        tpr, fpr, tnr, fnr = compute_rates(labels_pred, labels_true)

        fstr = "- Acc:%.4f, Loss:%.6f, Ps:%.4f, " \
               "FA(fpr):%.4f, MD(fnr):%.4f, PE:%.4f"
        print()
        print(fstr % (avg_acc, avg_loss, avg_prob_stego, fpr, fnr, 0.5 *
                      (fpr + fnr)))
        # end of for
        list_acc.append(avg_acc)
        list_loss.append(avg_loss)
        list_prob.append(avg_prob_stego)
        list_labels.append(labels_true)
        list_stats.append({
            "test_avg_acc": avg_acc,
            "test_avg_loss": avg_loss,
            "test_avg_prob_stego": avg_prob_stego,
            "test_avg_prob_cover": 1 - avg_prob_stego,
            "test_tpr": tpr,
            "test_fpr": fpr,
            "test_tnr": tnr,
            "test_fnr": fnr,
        })
    # end of for

    # Compute ROC
    labels_true = np.concatenate(list_labels)
    y_score = np.concatenate(list_scores)
    roc_fpr, roc_tpr, roc_thr = roc_curve(labels_true, y_score)
    roc_auc = roc_auc_score(labels_true, y_score)

    print()
    print("- Avg. acc:", "%.4f ± %.4f" % (np.mean(list_acc), np.std(list_acc)))
    print("- Avg. loss:",
          "%.6f ± %.4f" % (np.mean(list_loss), np.std(list_loss)))
    print("- Avg. prob:",
          "%.4f ± %.4f" % (np.mean(list_prob), np.std(list_prob)))
    print("- Total num. tested audios:", num_audios)
    print()

    df_stats = pd.DataFrame(list_stats)

    dict_stats = {
        "model": MODEL,
        "steganography": STEGANOGRAPHY.lower(),
        "ps": ps,
        "stats": df_stats,
        "roc": {
            "roc_auc": roc_auc,
            "roc_tpr": roc_tpr,
            "roc_fpr": roc_fpr,
            "roc_thr": roc_thr
        }
    }

    return dict_stats
Esempio n. 35
0
def main():
    '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''
    '    DATA PREPARATION (PRE-PROCESSING, CLEAN, TRANSFORM)  '
    ''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''
    print("#################", "DATA PREPARATION", "####################\n")
    # CREATION OF DATAFRAME WITH ALL IMAGES --> [ID_PATIENT, PATH_IMAGE, TARGET]
    data = pd.DataFrame(index=np.arange(0, config.SIZE_DATAFRAME),
                        columns=[config.ID, config.IMAGE_PATH, config.TARGET])

    # POPULATE DATAFRAME
    data = config_func.populate_DataFrame(data)

    #TRANSFORM DATA INTO NUMPY ARRAY'S
    X, Y = config_func.resize_images(config.WIDTH, config.HEIGHT, data)

    #DIVISION OF DATASET'S BETWEEN TRAIN, VALIDATION AND TEST --> I NEED ATTENTION, BECAUSE CLASSES ARE UNBALANCED
    indexes = np.arange(X.shape[0])
    X_train, X_val, y_train, y_val, indeces_train, indices_val = train_test_split(
        X,
        Y,
        indexes,
        test_size=config.VALIDATION_SIZE,
        stratify=Y,
        shuffle=True,
        random_state=config.RANDOM_STATE
    )  #RANDOM STATE IS NEEDED TO GUARANTEES REPRODUCIBILITY
    indexes = indeces_train
    X_train, X_test, y_train, y_test, indices_train, indices_test = train_test_split(
        X_train,
        y_train,
        indexes,
        test_size=config.TEST_SIZE,
        stratify=y_train,
        shuffle=True,
        random_state=config.RANDOM_STATE)
    print(X_train.shape)
    print(X_val.shape)
    print(X_test.shape)

    #NORMALIZE DATA
    X_train, X_val, X_test = config_func.normalize(X_train, X_val, X_test)

    #ONE HOT ENCODING TARGETS
    y_train, y_val, y_test = config_func.one_hot_encoding(
        y_train, y_val, y_test)
    print("#################", "DATA PREPARATION CONCLUDED",
          "####################\n")

    #CREATE OBJECT DATA
    d = Data.Data(X_train=X_train,
                  X_val=X_val,
                  X_test=X_test,
                  y_train=y_train,
                  y_val=y_val,
                  y_test=y_test)

    factoryModel = ModelFactory.ModelFactory()
    numberLayers = (
        4,  #CNN LAYERS
        1  #DENSE LAYERS
    )

    ## STRATEGIES OF TRAIN INSTANCES

    underSampling = UnderSampling.UnderSampling()
    data_aug = DataAugmentation.DataAugmentation()

    ## ---------------------------ALEXNET APPLICATION ------------------------------------

    ## DICTIONARIES DEFINITION
    numberLayers = (
        4,  #CNN LAYERS
        1  #DENSE LAYERS
    )

    valuesLayers = (
        2,  ## number of normal convolutional layers
        2,  ## number of stacked cnn layers
        16,  ## number of feature maps of first conv layer
        16,  ## growth rate
        2,  ## number of FCL's preceding output layer (sigmoid layer)
        16,  ## number of neurons of Full Connected Layer
        config.BATCH_SIZE_ALEX_AUG  #batch size
    )

    # CREATION OF MODEL
    alexNetModel = factoryModel.getModel(config.ALEX_NET, d, *numberLayers)

    ## APPLY STRATEGIES OF TRAIN
    alexNetModel.addStrategy(underSampling)
    alexNetModel.addStrategy(data_aug)

    #model, predictions, history = alexNetModel.template_method(*valuesLayers)

    #config_func.print_final_results(d.y_test, predictions, history)

    ## ---------------------------VGGNET APPLICATION ------------------------------------

    ## DICTIONARIES DEFINITION
    numberLayers = (
        4,  #CNN LAYERS
        1  #DENSE LAYERS
    )

    valuesLayers = (
        5,  # conv stacks
        24,  # number of feature maps of initial convolution layer
        16,  # growth rate
        1,  ## number of FCL's preceding output layer (sigmoid layer)
        16,  # number neurons of Full Connected Layer
        config.BATCH_SIZE_ALEX_AUG  # batch size
    )

    vggNetModel = factoryModel.getModel(config.VGG_NET, d, *numberLayers)

    vggNetModel.addStrategy(underSampling)
    vggNetModel.addStrategy(data_aug)

    #model, predictions, history = vggNetModel.template_method(*valuesLayers)

    #config_func.print_final_results(d.y_test, predictions, history)

    ## ---------------------------RESNET APPLICATION ------------------------------------

    # number of conv and dense layers respectively
    number_cnn_dense = (5, 1)

    # creation of ResNet instance
    resnet = factoryModel.getModel(config.RES_NET, d, *number_cnn_dense)

    # apply strategies to resnet
    resnet.addStrategy(underSampling)
    resnet.addStrategy(data_aug)

    # definition of args to pass to template_method (conv's number of filters, dense neurons and batch size)
    resnet_args = (
        48,  # number of filters of initial CNN layer
        4,  # number of consecutive conv+identity blocks
        1,  # repetition of identity block's, by default resnet-18 is 1 (1conv block + 1 identity block) for all layers
        8,  # growth rate
        config.BATCH_SIZE_ALEX_AUG,  # batch size
    )

    # apply build, train and predict
    #model, predictions, history = resnet.template_method(*resnet_args)
    ##resnet.save(model, config.RES_NET_WEIGHTS_FILE)

    # print final results
    #config_func.print_final_results(y_test=d.y_test, predictions=predictions, history=history, dict=False)

    ## ---------------------------DENSENET APPLICATION ------------------------------------

    # # DICTIONARIES DEFINITION
    numberLayers = (
        4,  #BLOCKS
        1  #DENSE LAYERS
    )

    valuesLayers = (
        24,  # initial number of Feature Maps
        5,  # number of dense blocks
        2,  # number of layers in each block
        12,  # growth rate
        0.5,  # compression rate
        config.BATCH_SIZE_ALEX_AUG  # batch size
    )

    densenet = factoryModel.getModel(config.DENSE_NET, d, *numberLayers)

    densenet.addStrategy(underSampling)
    densenet.addStrategy(data_aug)

    #model, predictions, history = densenet.template_method(*valuesLayers)

    #config_func.print_final_results(d.y_test, predictions, history)

    ## ------------------------PSO OPTIMIZATION ------------------------------------------

    #PSO OPTIMIZATION
    optFact = OptimizerFactory.OptimizerFactory()

    # definition optimizers for models
    pso_alex = optFact.createOptimizer(config.PSO_OPTIMIZER, alexNetModel,
                                       *config.pso_init_args_alex)
    pso_vgg = optFact.createOptimizer(config.PSO_OPTIMIZER, vggNetModel,
                                      *config.pso_init_args_vgg)
    pso_res = optFact.createOptimizer(config.PSO_OPTIMIZER, resnet,
                                      *config.pso_init_args_resnet)
    pso_dense = optFact.createOptimizer(config.PSO_OPTIMIZER, densenet,
                                        *config.pso_init_args_densenet)

    # call optimize function
    cost, pos, optimizer = pso_alex.optimize()

    #plot cost history and plot position history
    print("Custo: {}".format(cost))
    config_func.print_Best_Position_PSO(pos, config.ALEX_NET)  # print position
    pso_alex.plotCostHistory(optimizer=optimizer)
    pso_alex.plotPositionHistory(optimizer, np.array(config.X_LIMITS),
                                 np.array(config.Y_LIMITS), config.POS_VAR_EXP,
                                 config.LABEL_X_AXIS, config.LABEL_Y_AXIS)