コード例 #1
0
    def test_transformers1(self):
        '''
        Prepare labeled data for single sentence BERT classification problem
        COVERAGE: bert_prepare_data() in bert_utils.py
                  class BertDMH() in bert_utils.py
        '''

        if self.data_dir is None:
            unittest.TestCase.skipTest(
                self, "DLPY_DATA_DIR is not set in the environment variables")

        if (self.data_dir_local is None) or (not os.path.isdir(
                self.data_dir_local)):
            unittest.TestCase.skipTest(
                self, "DLPY_DATA_DIR_LOCAL is not set in the environment "
                "variables or it does not exist.")

        if not self.necessary_packages_installed:
            unittest.TestCase.skipTest(self, "missing transformers package")

        if not os.path.isfile(
                os.path.join(self.data_dir_local, 'imdb_master.csv')):
            unittest.TestCase.skipTest(
                self, "cannot locate imdb_master.csv in DLPY_DATA_DIR_LOCAL")

        from transformers import BertTokenizer
        model_name = 'bert-base-uncased'

        # instantiate BERT tokenizer
        tokenizer = BertTokenizer.from_pretrained(
            model_name, cache_dir=self.data_dir_local)

        # read dataset for IMDB movie review sentiment classification
        reviews = pd.read_csv(os.path.join(self.data_dir_local,
                                           'imdb_master.csv'),
                              header=0,
                              names=['type', 'review', 'label', 'file'],
                              encoding='latin_1')

        input_label = 'review'  # input data is review text
        target_label = 'label'  # target data is sentiment label

        # extract "train" data
        t_idx1 = reviews['type'] == 'train'
        t_idx2 = reviews['label'] != 'unsup'
        inputs = reviews[t_idx1 & t_idx2][input_label].to_list()
        targets = reviews[t_idx1 & t_idx2][target_label].to_list()

        # limit the number of observations to 1000
        if len(inputs) > 1000:
            inputs = inputs[:1000]
            targets = targets[:1000]

        # create numeric target labels
        for ii, val in enumerate(targets):
            inputs[ii] = inputs[ii].replace("<br />", "")
            if val == 'neg':
                targets[ii] = 1
            elif val == 'pos':
                targets[ii] = 2

        # prepare data
        num_tgt_var, train = bert_prepare_data(self.s,
                                               tokenizer,
                                               128,
                                               input_a=list(inputs),
                                               target=list(targets),
                                               classification_problem=True)

        # check for the existence of the training table
        res = self.s.retrieve('table.tableexists',
                              _messagelevel='error',
                              name=train)
        self.assertTrue(res['exists'] != 0, "Training table not created.")

        # ensure table has the proper number of columns
        res = self.s.retrieve('table.columninfo',
                              _messagelevel='error',
                              table=train)
        self.assertTrue(
            len(res['ColumnInfo']['Column'].to_list()) == 5,
            "Training table has extra/missing columns.")

        # clean up data table if it exists
        try:
            model_tbl_opts = input_table_check(train)
            self.s.table.droptable(quiet=True, **model_tbl_opts)
        except TypeError:
            self.assertTrue(False, "BERT data preparation failed")

        # clean up tokenizer
        del tokenizer
コード例 #2
0
    def test_transformers3(self):
        '''
        Prepare test data (no labels) for two sentence BERT classification problem
        COVERAGE: bert_prepare_data() in bert_utils.py
                  class BertDMH() in bert_utils.py
        '''

        if self.data_dir is None:
            unittest.TestCase.skipTest(
                self, "DLPY_DATA_DIR is not set in the environment variables")

        if (self.data_dir_local is None) or (not os.path.isdir(
                self.data_dir_local)):
            unittest.TestCase.skipTest(
                self, "DLPY_DATA_DIR_LOCAL is not set in the environment "
                "variables or it does not exist.")

        if not self.necessary_packages_installed:
            unittest.TestCase.skipTest(self, "missing transformers package")

        if not os.path.isfile(
                os.path.join(self.data_dir_local, 'qnli_train.tsv')):
            unittest.TestCase.skipTest(
                self, "cannot locate qnli_train.csv in DLPY_DATA_DIR_LOCAL")

        from transformers import BertTokenizer
        model_name = 'bert-base-uncased'

        # instantiate BERT tokenizer
        tokenizer = BertTokenizer.from_pretrained(
            model_name, cache_dir=self.data_dir_local)

        # read QNLI dataset
        train_data = pd.read_csv(
            '/dept/cas/DeepLearn/docair/glue/qnli/train.tsv',
            header=0,
            sep='\t',
            error_bad_lines=False,
            warn_bad_lines=False,
            names=['index', 'question', 'sentence', 'label'])

        input_a_label = 'question'
        input_b_label = 'sentence'

        input_a = train_data[input_a_label].to_list()
        input_b = train_data[input_b_label].to_list()

        # limit the number of observations to 1000
        if len(input_a) > 1000:
            input_a = input_a[:1000]
            input_b = input_b[:1000]

        # prepare data
        num_tgt_var, test = bert_prepare_data(self.s,
                                              tokenizer,
                                              128,
                                              input_a=input_a,
                                              input_b=input_b,
                                              classification_problem=True)

        # check for the existence of the training table
        res = self.s.retrieve('table.tableexists',
                              _messagelevel='error',
                              name=test)
        self.assertTrue(res['exists'] != 0, "Test table not created.")

        # ensure table has the proper number of columns
        res = self.s.retrieve('table.columninfo',
                              _messagelevel='error',
                              table=test)
        self.assertTrue(
            len(res['ColumnInfo']['Column'].to_list()) == 3,
            "Test table has extra/missing columns.")

        # clean up data table if it exists
        try:
            model_tbl_opts = input_table_check(test)
            self.s.table.droptable(quiet=True, **model_tbl_opts)
        except TypeError:
            self.assertTrue(False, "BERT data preparation failed")

        # clean up tokenizer
        del tokenizer
コード例 #3
0
    def test_transformers4(self):
        '''
        Load a base BERT model and add classification head.
        COVERAGE: BERT_Model() class in bert_model.py
                  all private class functions (e.g. _XXX) in bert_model.py
                  compile() in bert_model.py
                  load_weights() in bert_model.py
                  write_block_information() in bert_utils.py
                  get_data_spec() in bert_model.py
                  create_data_spec() in bert_utils.py
                  generate_target_var_names() in bert_utils.py
                  extract_pytorch_parms() in bert_utils.py
                  find_pytorch_tensor() in bert_utils.py
                  
        '''

        model_name = 'bert-base-uncased'
        cache_dir = self.data_dir_local

        try:
            stderr = sys.stderr
            sys.stderr = open(os.devnull, 'w')
            import h5py
            h5py_installed = True
            sys.stderr = stderr
        except:
            h5py_installed = False

        if self.data_dir is None:
            unittest.TestCase.skipTest(
                self, "DLPY_DATA_DIR is not set in the environment variables")

        if (self.data_dir_local is None) or (not os.path.isdir(
                self.data_dir_local)):
            unittest.TestCase.skipTest(
                self, "DLPY_DATA_DIR_LOCAL is not set in the environment "
                "variables or it does not exist.")

        if (not self.necessary_packages_installed) or (not h5py_installed):
            unittest.TestCase.skipTest(self,
                                       "missing transformers or h5py package")

        # test case parameters
        n_classes = 2
        num_encoder_layers = 2
        num_tgt_var = 1

        # instantiate BERT model
        bert = BERT_Model(self.s,
                          cache_dir,
                          model_name,
                          n_classes,
                          num_hidden_layers=num_encoder_layers,
                          verbose=False)

        # compile model
        bert.compile(num_target_var=num_tgt_var)

        if not os.path.isfile(
                os.path.join(cache_dir, model_name + '.kerasmodel.h5')):
            assertTrue(False, "HDF5 file not written.")

        # check for the existence of the model table
        res = self.s.retrieve('table.tableexists',
                              _messagelevel='error',
                              name=bert.model_name)
        self.assertTrue(res['exists'] != 0, "Model table not created.")

        # attempt to create CASLIB to cache directory
        try:
            caslib, extra_path, newlib = caslibify(self.s,
                                                   cache_dir,
                                                   task='save')
            do_load_weights = True
        except DLPyError:
            do_load_weights = False

        # attach model weights - skip if server unable to "see" cache directory
        if do_load_weights:
            bert.load_weights(os.path.join(cache_dir,
                                           model_name + '.kerasmodel.h5'),
                              num_target_var=num_tgt_var,
                              freeze_base_model=False)

            # check for the existence of the weight table
            res = self.s.retrieve('table.tableexists',
                                  _messagelevel='error',
                                  name=bert.model_name + '_weights')
            self.assertTrue(res['exists'] != 0, "Weight table not created.")

        # create data spec for model
        data_spec = bert.get_data_spec(num_tgt_var)

        # drop table(s)
        try:
            model_tbl_opts = input_table_check(bert.model_name)
            self.s.table.droptable(quiet=True, **model_tbl_opts)
        except TypeError:
            self.assertTrue(False, "Unable to drop model table.")

        if do_load_weights:
            try:
                model_tbl_opts = input_table_check(bert.model_name +
                                                   '_weights')
                self.s.table.droptable(quiet=True, **model_tbl_opts)
            except TypeError:
                self.assertTrue(False, "Unable to drop weight table.")

        # remove HDF5 file
        if os.path.isfile(
                os.path.join(cache_dir, model_name + '.kerasmodel.h5')):
            os.remove(os.path.join(cache_dir, model_name + '.kerasmodel.h5'))

        # clean up BERT model
        del bert
コード例 #4
0
    def test_transformers2(self):
        '''
        Prepare labeled data for single sentence BERT regression problem
        COVERAGE: bert_prepare_data() in bert_utils.py
                  class BertDMH() in bert_utils.py
        '''

        if self.data_dir is None:
            unittest.TestCase.skipTest(
                self, "DLPY_DATA_DIR is not set in the environment variables")

        if (self.data_dir_local is None) or (not os.path.isdir(
                self.data_dir_local)):
            unittest.TestCase.skipTest(
                self, "DLPY_DATA_DIR_LOCAL is not set in the environment "
                "variables or it does not exist.")

        if not self.necessary_packages_installed:
            unittest.TestCase.skipTest(self, "missing transformers package")

        if not os.path.isfile(
                os.path.join(self.data_dir_local,
                             'task1_training_edited.csv')):
            unittest.TestCase.skipTest(
                self,
                "cannot locate task1_training_edited.csv in DLPY_DATA_DIR_LOCAL"
            )

        from transformers import BertTokenizer
        model_name = 'bert-base-uncased'

        # instantiate BERT tokenizer
        tokenizer = BertTokenizer.from_pretrained(
            model_name, cache_dir=self.data_dir_local)

        # read regression data set
        reviews = pd.read_csv(
            os.path.join(self.data_dir_local, 'task1_training_edited.csv'),
            header=None,
            names=['id', 'original', 'edit', 'grades', 'meanGrade'])

        inputs = reviews['original'].tolist()[1:]
        reviews['meanGrade'] = pd.to_numeric(reviews['meanGrade'],
                                             errors='coerce').fillna(0)
        targets = reviews['meanGrade'].tolist()[1:]
        for ii, val in enumerate(targets):
            targets[ii] = round(val)

        # limit the number of observations to 1000
        if len(inputs) > 1000:
            inputs = inputs[:1000]
            targets = targets[:1000]

        # prepare data
        num_tgt_var, train, valid = bert_prepare_data(
            self.s,
            tokenizer,
            128,
            input_a=list(inputs),
            target=list(targets),
            train_fraction=0.8,
            classification_problem=False)

        # check for the existence of the training table
        res = self.s.retrieve('table.tableexists',
                              _messagelevel='error',
                              name=train)
        self.assertTrue(res['exists'] != 0, "Training table not created.")

        # ensure table has the proper number of columns
        res = self.s.retrieve('table.columninfo',
                              _messagelevel='error',
                              table=train)
        self.assertTrue(
            len(res['ColumnInfo']['Column'].to_list()) == 5,
            "Training table has extra/missing columns.")

        # check for the existence of the validation table
        res = self.s.retrieve('table.tableexists',
                              _messagelevel='error',
                              name=valid)
        self.assertTrue(res['exists'] != 0, "Validation table not created.")

        # ensure table has the proper number of columns
        res = self.s.retrieve('table.columninfo',
                              _messagelevel='error',
                              table=valid)
        self.assertTrue(
            len(res['ColumnInfo']['Column'].to_list()) == 5,
            "Validation table has extra/missing columns.")

        # clean up training table if it exists
        try:
            model_tbl_opts = input_table_check(train)
            self.s.table.droptable(quiet=True, **model_tbl_opts)
        except TypeError:
            self.assertTrue(False, "BERT data preparation failed")

        # clean up validation table if it exists
        try:
            model_tbl_opts = input_table_check(valid)
            self.s.table.droptable(quiet=True, **model_tbl_opts)
        except TypeError:
            self.assertTrue(False, "BERT data preparation failed")

        # clean up models
        del tokenizer
コード例 #5
0
    def test_model_conversion3(self):
        '''
        Import CNN image classification model and override attributes
          - instantiate a Keras LeNet model and translate to DLPy/Viya model
            override CNN model attributes with RNN atttributes - never would be done
            in practice, just to verify that new attributes written
        NOTE: cannot attach weights unless both client and server share
              the same file system
        COVERAGE: from_keras_model(), load_weights() in network.py
                  keras_to_sas() in sas_keras_parse.py
                  write_keras_hdf5() in write_keras_model_parm.py
                  all functions in model_conversion_utils.py
                  CNN-related function in write_sas_code.py
        '''

        if self.data_dir is None:
            unittest.TestCase.skipTest(
                self, "DLPY_DATA_DIR is not set in the environment variables")

        if (self.data_dir_local is None) or (not os.path.isfile(
                os.path.join(self.data_dir_local, 'lenet.h5'))):
            unittest.TestCase.skipTest(
                self,
                "DLPY_DATA_DIR_LOCAL is not set in the environment variables or lenet.h5 file is missing"
            )

        if self.keras_installed:
            from keras.models import Sequential
            from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten
        else:
            unittest.TestCase.skipTest(self, "keras is not installed")

        model = Sequential()
        model.add(
            Conv2D(20,
                   kernel_size=(5, 5),
                   strides=(1, 1),
                   activation='relu',
                   input_shape=(28, 28, 1),
                   padding="same"))
        model.add(
            MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
        model.add(
            Conv2D(50,
                   kernel_size=(5, 5),
                   strides=(1, 1),
                   activation='relu',
                   padding='same'))
        model.add(
            MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
        model.add(Flatten())
        model.add(Dense(500, activation='relu'))
        model.add(Dense(10, activation='softmax'))

        model.load_weights(os.path.join(self.data_dir_local, 'lenet.h5'))
        model.summary()

        model_name = 'lenet'
        model1, use_gpu = Model.from_keras_model(
            conn=self.s,
            keras_model=model,
            output_model_table=model_name,
            include_weights=True,
            scale=1.0 / 255.0,
            input_weights_file=os.path.join(self.data_dir_local, 'lenet.h5'))

        if os.path.isdir(self.data_dir):
            try:
                copyfile(
                    os.path.join(os.getcwd(), 'lenet_weights.kerasmodel.h5'),
                    os.path.join(self.data_dir, 'lenet_weights.kerasmodel.h5'))
                copy_success = True
            except:
                print(
                    'Unable to copy weights file, skipping test of overriding attributes'
                )
                copy_success = False

            if copy_success:
                self.s.table.addcaslib(activeonadd=False,
                                       datasource={'srctype': 'path'},
                                       name='MODEL_CONVERT',
                                       path=self.data_dir,
                                       subdirectories=True)

                model1.load_weights(path=os.path.join(
                    self.data_dir, 'lenet_weights.kerasmodel.h5'),
                                    labels=False,
                                    use_gpu=use_gpu)

                os.remove(
                    os.path.join(self.data_dir, 'lenet_weights.kerasmodel.h5'))

                # parameter for (nonexistent) RNN layers
                rnn_size = 10
                feature_dim = 4

                # output classes
                output_dim = 29

                # maximum sequence length
                max_seq_len = 100

                # define data specs needed to import Keras model weights
                tokensize = feature_dim

                inputs = []
                for fi in range(max_seq_len):
                    for vi in range(tokensize):
                        inputs.append('_f%d_v%d_' % (fi, vi))
                targets = ['y%d' % i for i in range(0, max_seq_len)]

                data_spec = []
                data_spec.append(
                    DataSpec(type_='NUMERICNOMINAL',
                             layer=model.layers[0].name + "_input",
                             data=inputs,
                             numeric_nominal_parms=DataSpecNumNomOpts(
                                 length='_num_frames_',
                                 token_size=feature_dim)))
                data_spec.append(
                    DataSpec(type_='NUMERICNOMINAL',
                             layer=model.layers[-1].name,
                             data=targets,
                             nominals=targets,
                             numeric_nominal_parms=DataSpecNumNomOpts(
                                 length='ylen', token_size=1)))

                # override model attributes
                from dlpy.attribute_utils import create_extended_attributes
                create_extended_attributes(self.s, model_name, model1.layers,
                                           data_spec)

        if os.path.isfile(
                os.path.join(os.getcwd(), 'lenet_weights.kerasmodel.h5')):
            os.remove(os.path.join(os.getcwd(), 'lenet_weights.kerasmodel.h5'))

        # clean up model table
        model_tbl_opts = input_table_check(model_name)
        self.s.table.droptable(quiet=True, **model_tbl_opts)

        # clean up models
        del model
        del model1
コード例 #6
0
    def test_model_conversion2(self):
        '''
        Import RNN sequence to sequence models
          - instantiate Keras RNN models and translate to DLPy/Viya models
        NOTE: cannot attach weights unless both client and server share
              the same file system
        COVERAGE: from_keras_model(), load_weights() in network.py
                  keras_to_sas() in sas_keras_parse.py
                  write_keras_hdf5() in write_keras_model_parm.py
                  all functions in model_conversion_utils.py
                  CNN-related function in write_sas_code.py
        '''

        if self.data_dir is None:
            unittest.TestCase.skipTest(
                self, "DLPY_DATA_DIR is not set in the environment variables")

        if (self.data_dir_local is None) or (not os.path.isfile(
                os.path.join(self.data_dir_local, 'lenet.h5'))):
            unittest.TestCase.skipTest(
                self,
                "DLPY_DATA_DIR_LOCAL is not set in the environment variables or lenet.h5 file is missing"
            )

        if not self.keras_installed:
            unittest.TestCase.skipTest(self, "keras is not installed")

        # parameter for RNN layers
        rnn_size = 10
        feature_dim = 4

        # output classes
        output_dim = 29

        # maximum sequence length
        max_seq_len = 100

        # define data specs needed to import Keras model weights
        tokensize = feature_dim

        inputs = []
        for fi in range(max_seq_len):
            for vi in range(tokensize):
                inputs.append('_f%d_v%d_' % (fi, vi))
        targets = ['y%d' % i for i in range(0, max_seq_len)]

        data_spec = []
        data_spec.append(
            DataSpec(type_='NUMERICNOMINAL',
                     layer='the_input',
                     data=inputs,
                     numeric_nominal_parms=DataSpecNumNomOpts(
                         length='_num_frames_', token_size=feature_dim)))
        data_spec.append(
            DataSpec(type_='NUMERICNOMINAL',
                     layer='out',
                     data=targets,
                     nominals=targets,
                     numeric_nominal_parms=DataSpecNumNomOpts(length='ylen',
                                                              token_size=1)))

        # try all RNN model types
        for layer_type in [
                'simplernn', 'lstm', 'gru', 'cudnnlstm', 'cudnngru'
        ]:
            for bidirectional in [True, False]:
                model = define_keras_rnn_model(layer_type, bidirectional,
                                               rnn_size, feature_dim,
                                               output_dim)

                model_name = 'dlpy_model'
                model1, use_gpu = Model.from_keras_model(
                    conn=self.s,
                    keras_model=model,
                    max_num_frames=max_seq_len,
                    include_weights=True,
                    output_model_table=model_name)

                model1.print_summary()

                # try to load weights, but skip any GPU-based models because worker/soloist may not have GPU
                if os.path.isdir(self.data_dir) and (not use_gpu):
                    try:
                        copyfile(
                            os.path.join(os.getcwd(),
                                         'dlpy_model_weights.kerasmodel.h5'),
                            os.path.join(self.data_dir,
                                         'dlpy_model_weights.kerasmodel.h5'))
                        copy_success = True
                    except:
                        print(
                            'Unable to copy weights file, skipping test of attaching weights'
                        )
                        copy_success = False

                    if copy_success:
                        model1.load_weights(path=os.path.join(
                            self.data_dir, 'dlpy_model_weights.kerasmodel.h5'),
                                            labels=False,
                                            use_gpu=use_gpu)
                        os.remove(
                            os.path.join(self.data_dir,
                                         'dlpy_model_weights.kerasmodel.h5'))
                else:
                    print('GPU model, skipping test of attaching weights')

                if os.path.isfile(
                        os.path.join(os.getcwd(),
                                     'dlpy_model_weights.kerasmodel.h5')):
                    os.remove(
                        os.path.join(os.getcwd(),
                                     'dlpy_model_weights.kerasmodel.h5'))

                # clean up models
                del model
                del model1

                # clean up model table
                model_tbl_opts = input_table_check(model_name)
                self.s.table.droptable(quiet=True, **model_tbl_opts)
コード例 #7
0
    def test_model_conversion1(self):
        '''
        Import CNN image classification model
          - instantiate a Keras LeNet model and translate to DLPy/Viya model
        NOTE: cannot attach weights unless both client and server share
              the same file system
        COVERAGE: from_keras_model(), load_weights() in network.py
                  keras_to_sas() in sas_keras_parse.py
                  write_keras_hdf5_from_file() in write_keras_model_parm.py
                  all functions in model_conversion_utils.py
                  all function in keras_utils.py
                  CNN-related function in write_sas_code.py
        '''

        if self.data_dir is None:
            unittest.TestCase.skipTest(
                self, "DLPY_DATA_DIR is not set in the environment variables")

        if (self.data_dir_local is None) or (not os.path.isfile(
                os.path.join(self.data_dir_local, 'lenet.h5'))):
            unittest.TestCase.skipTest(
                self, "DLPY_DATA_DIR_LOCAL is not set in the environment "
                "variables or lenet.h5 file is missing")

        if self.keras_installed:
            from keras.models import Sequential
            from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten
        else:
            unittest.TestCase.skipTest(self, "keras is not installed")

        model = Sequential()
        model.add(
            Conv2D(20,
                   kernel_size=(5, 5),
                   strides=(1, 1),
                   activation='relu',
                   input_shape=(28, 28, 1),
                   padding="same"))
        model.add(
            MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
        model.add(
            Conv2D(50,
                   kernel_size=(5, 5),
                   strides=(1, 1),
                   activation='relu',
                   padding='same'))
        model.add(
            MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
        model.add(Flatten())
        model.add(Dense(500, activation='relu'))
        model.add(Dense(10, activation='softmax'))

        model.load_weights(os.path.join(self.data_dir_local, 'lenet.h5'))
        model.summary()

        model_name = 'lenet'
        model1, use_gpu = Model.from_keras_model(
            conn=self.s,
            keras_model=model,
            output_model_table=model_name,
            include_weights=True,
            scale=1.0 / 255.0,
            input_weights_file=os.path.join(self.data_dir_local, 'lenet.h5'))

        if os.path.isdir(self.data_dir):
            try:
                copyfile(
                    os.path.join(os.getcwd(), 'lenet_weights.kerasmodel.h5'),
                    os.path.join(self.data_dir, 'lenet_weights.kerasmodel.h5'))
                copy_success = True
            except:
                print(
                    'Unable to copy weights file, skipping test of attaching weights'
                )
                copy_success = False

            if copy_success:
                model1.load_weights(path=os.path.join(
                    self.data_dir, 'lenet_weights.kerasmodel.h5'),
                                    labels=False,
                                    use_gpu=use_gpu)
                os.remove(
                    os.path.join(self.data_dir, 'lenet_weights.kerasmodel.h5'))

        model1.print_summary()

        if os.path.isfile(
                os.path.join(os.getcwd(), 'lenet_weights.kerasmodel.h5')):
            os.remove(os.path.join(os.getcwd(), 'lenet_weights.kerasmodel.h5'))

        # clean up model table
        model_tbl_opts = input_table_check(model_name)
        self.s.table.droptable(quiet=True, **model_tbl_opts)

        # clean up models
        del model
        del model1
コード例 #8
0
def produce_object_detections(conn,
                              table,
                              coord_type,
                              max_objects=9999,
                              num_plot=9999,
                              fig_size=None):
    '''
    Plot images with drawn bounding boxes.
    conn : CAS
        CAS connection object
    table : string or CASTable
        Specifies the object detection castable to be plotted.
    coord_type : string
        Specifies coordinate type of input table
    max_objects : int, optional
        Specifies the maximum number of bounding boxes to be plotted on an image.
        Default: 10
    num_plot : int, optional
        Specifies the name of the castable.
    n_col : int, optional
        Specifies the number of column to plot.
        Default: 2
    fig_size : int, optional
        Specifies the size of figure.
    '''
    conn.retrieve('loadactionset', _messagelevel='error', actionset='image')

    input_tbl_opts = input_table_check(table)
    input_table = conn.CASTable(**input_tbl_opts)
    img_num = input_table.shape[0]
    num_plot = num_plot if num_plot < img_num else img_num
    input_table = input_table.sample(num_plot)
    det_label_image_table = random_name('detLabelImageTable')

    num_max_obj = input_table['_nObjects_'].max()
    max_objects = max_objects if num_max_obj > max_objects else num_max_obj

    with sw.option_context(print_messages=False):
        res = conn.image.extractdetectedobjects(casout={
            'name': det_label_image_table,
            'replace': True
        },
                                                coordtype=coord_type,
                                                maxobjects=max_objects,
                                                table=input_table)
        if res.severity > 0:
            for msg in res.messages:
                print(msg)

    outtable = conn.CASTable(det_label_image_table)
    imageRecordList = list()
    in_df = input_table.fetch()['Fetch']
    out_df = outtable.fetch()['Fetch']

    if len(out_df) == len(in_df):
        print(
            str(len(out_df)) +
            " equal table length assumption is met, producing message buffer")
        for i in range(len(out_df)):
            imageId = str(uuid4())
            timestamp = round(datetime.now().microsecond)
            nbrOfBoats = int(in_df['_nObjects_'][i])
            imgStr = out_df['_image_'][i]
            nparr = np.frombuffer(imgStr, np.uint8)
            #img_np = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
            base_img = str(base64.b64encode(nparr))

            occupancy_rate = 0
            if nbrOfBoats > 0:
                surface_list = list()
                index = 5
                for ix in range(nbrOfBoats):
                    surface_list.append(in_df.iloc[i, index + 4] *
                                        in_df.iloc[i, index + 5])
                    index = index + 6
                occupancy_rate = sum(surface_list)

            imageRecordList.append(
                ImageRecord(imageId, timestamp, nbrOfBoats, occupancy_rate,
                            base_img))

    with sw.option_context(print_messages=False):
        conn.table.droptable(det_label_image_table)

    return imageRecordList