def setUp(self): self.maxDiff = None self.contamination = 0.1 self.window_size = 2 self.roc_floor = 0. # 0.8 # self.n_train = 200 # self.n_test = 100 # self.X_train, self.y_train, self.X_test, self.y_test = generate_data( # n_train=self.n_train, n_test=self.n_test, # contamination=self.contamination, random_state=42) # self.X_train = d3m_dataframe(self.X_train, generate_metadata=True) # self.X_test = d3m_dataframe(self.X_test, generate_metadata=True) self.X_train = d3m_dataframe( { 'data': [3., 4., 8., 16, 18, 13., 22., 36., 59., 128, 62, 67, 78, 100] }, columns=['data'], generate_metadata=True) self.y_train = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) self.X_test = d3m_dataframe( { 'data': [3., 4., 8.6, 13.4, 22.5, 17, 19.2, 36.1, 127, -23, 59.2] }, columns=['data'], generate_metadata=True) self.y_test = np.array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0]) hyperparams_default = KDiscordODetector.metadata.get_hyperparams( ).defaults() hyperparams = hyperparams_default.replace({ 'contamination': self.contamination, }) hyperparams = hyperparams.replace({ 'window_size': self.window_size, }) hyperparams = hyperparams.replace({ 'return_subseq_inds': True, }) self.primitive = KDiscordODetector(hyperparams=hyperparams) self.primitive.set_training_data(inputs=self.X_train) self.primitive.fit() self.prediction_labels = self.primitive.produce( inputs=self.X_test).value self.prediction_score = self.primitive.produce_score( inputs=self.X_test).value self.collective_common_test = CollectiveCommonTest( model=self.primitive._clf, X_train=self.X_train, y_train=self.y_train, X_test=self.X_test, y_test=self.y_test, roc_floor=self.roc_floor, )
def set_training_data(self, *, inputs: Inputs) -> None: inputs_timeseries = d3m_dataframe(inputs.iloc[:, -1]) inputs_d3mIndex = d3m_dataframe(inputs.iloc[:, 0]) if len(inputs_timeseries) == 0: print( "Warning: Inputs timeseries data to timeseries_featurization primitive's length is 0.") return column_name = inputs_timeseries.columns[0] self._training_inputs, self._target_names = inputs_timeseries, column_name self._training_outputs = inputs_timeseries
def log_likelihoods(self, *, outputs: Outputs, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Sequence[float]]: inputs = inputs.iloc[:, self._training_indices] # Get ndarray outputs = outputs.iloc[:, self._target_column_indices] if len(inputs.columns) and len(outputs.columns): if outputs.shape[1] != self._n_classes: raise exceptions.InvalidArgumentValueError( "\"outputs\" argument does not have the correct number of target columns." ) log_proba = self._predict_log_proba(inputs, self._weights) # Making it always a list, even when only one target. if self._n_classes == 1: log_proba = [log_proba] classes = [self._classes_] else: classes = self._classes_ samples_length = inputs.shape[0] log_likelihoods = [] for k in range(self._n_classes): # We have to map each class to its internal (numerical) index used in the learner. # This allows "outputs" to contain string classes. outputs_column = outputs.iloc[:, k] classes_map = pandas.Series(np.arange(len(classes[k])), index=classes[k]) mapped_outputs_column = outputs_column.map(classes_map) # For each target column (column in "outputs"), for each sample (row) we pick the log # likelihood for a given class. log_likelihoods.append(log_proba[k][np.arange(samples_length), mapped_outputs_column]) results = d3m_dataframe(dict(enumerate(log_likelihoods)), generate_metadata=True) results.columns = outputs.columns for k in range(self._n_classes): column_metadata = outputs.metadata.query_column(k) if 'name' in column_metadata: results.metadata = results.metadata.update_column( k, {'name': column_metadata['name']}) else: results = d3m_dataframe(generate_metadata=True) return CallResult(results)
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: with stopit.ThreadingTimeout(timeout) as timer: sk_inputs = inputs if self.hyperparams['use_semantic_types']: sk_inputs = inputs.iloc[:, self._training_indices] sk_output = self._model.predict(sk_inputs) if sparse.issparse(sk_output): sk_output = sk_output.toarray() output = d3m_dataframe( sk_output, columns=self._target_names if self._target_names else None, generate_metadata=False) output.metadata = inputs.metadata.clear(for_value=output, generate_metadata=True) output.metadata = self._add_target_semantic_types( metadata=output.metadata, target_names=self._target_names, source=self) outputs = common_utils.combine_columns( return_result=self.hyperparams['return_result'], add_index_columns=self.hyperparams['add_index_columns'], inputs=inputs, column_indices=[], columns_list=[output]) if timer.state == timer.EXECUTED: return CallResult(outputs) else: raise TimeoutError('BBNMLPClassifier exceeded time limit')
def _can_accept( cls, *, self, method_name: str, arguments: typing.Dict[str, typing.Union[metadata_module.Metadata, type]], hyperparams: Hyperparams, outputs: Outputs) -> typing.Optional[metadata_module.DataMetadata]: output_metadata = super().can_accept(method_name=method_name, arguments=arguments, hyperparams=hyperparams) if 'inputs' not in arguments: return output_metadata inputs_metadata = typing.cast(metadata_module.DataMetadata, arguments['inputs']) metadata_lookup = cls._parse_metadata(metadata=inputs_metadata) #try: # cls._parse_metadata(metadata = inputs_metadata) #except: # return None num_data = inputs_metadata.query(metadata_lookup['primary_resource_id'] ['selector'])['dimension']['length'] outputs = d3m_dataframe(data={}) metadata = outputs.metadata.update( (metadata_module.ALL_ELEMENTS, 0), inputs_metadata.query(metadata_lookup['targets']['selector'])) return metadata
def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: outputs = d3m_dataframe(predictions, generate_metadata=True) target_columns_metadata = self._copy_inputs_metadata( inputs.metadata, self._training_indices, outputs.metadata, self.hyperparams) outputs.metadata = self._update_predictions_metadata( inputs.metadata, outputs, target_columns_metadata) return outputs
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: with stopit.ThreadingTimeout(timeout) as timer: x = self._tfidf.transform(inputs).toarray() outputs = d3m_dataframe(x, generate_metadata=False) metadata = inputs.metadata.clear( { 'schema': metadata_module.CONTAINER_SCHEMA_VERSION, 'structural_type': type(outputs), 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'length': outputs.shape[0], 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ] } }, for_value=outputs ).update( ((metadata_base.ALL_ELEMENTS, )), { 'dimension': { 'length': outputs.shape[1], 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ] } } ).update( ((metadata_base.ALL_ELEMENTS, metadata_base.ALL_ELEMENTS)), { #'structural_type': self._v.dtype, 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], }) # Set metadata attribute. outputs.metadata = metadata if timer.state == timer.EXECUTED: return CallResult(outputs) else: raise TimeoutError('BBNTfidfTransformer exceeded time limit')
def setUp(self): self.maxDiff = None self.n_train = 200 self.n_test = 100 self.contamination = 0.1 self.roc_floor = 0.8 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination, random_state=42) self.X_train = d3m_dataframe(self.X_train, generate_metadata=True) self.X_test = d3m_dataframe(self.X_test, generate_metadata=True) hyperparams_default = IsolationForest.metadata.get_hyperparams( ).defaults() hyperparams = hyperparams_default.replace({ 'contamination': self.contamination, }) hyperparams = hyperparams.replace({ 'return_subseq_inds': True, }) self.primitive = IsolationForest(hyperparams=hyperparams) self.primitive.set_training_data(inputs=self.X_train) self.primitive.fit() self.prediction_labels = self.primitive.produce( inputs=self.X_test).value self.prediction_score = self.primitive.produce_score( inputs=self.X_test).value self.uodbase_test = UODCommonTest( model=self.primitive._clf, X_train=self.X_train, y_train=self.y_train, X_test=self.X_test, y_test=self.y_test, roc_floor=self.roc_floor, )
def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: """ Wrap predictions into dataframe Args: inputs: Container Dataframe predictions: array-like data (n_samples, n_features) Returns: Dataframe """ outputs = d3m_dataframe(predictions, generate_metadata=True) target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams) outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) return outputs
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: """ Arguments - inputs: List( # Data List( # Segments [ context0, ..., contextN ], ... # for N-gram ) ), Returns: - List(d3m_ndarray) """ with stopit.ThreadingTimeout(timeout) as timer: x = seq_to_tokenfreq_csr(inputs, self._vocab).toarray() #outputs = List([ x[i] for i in range(x.shape[0]) ]) outputs = d3m_dataframe(x, generate_metadata=False) metadata = inputs.metadata.clear({ 'schema': metadata_module.CONTAINER_SCHEMA_VERSION, 'structural_type': type(outputs), 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'length': outputs.shape[0], 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ] } }, for_value=outputs).update( ((metadata_base.ALL_ELEMENTS,)), { 'dimension': { 'length': outputs.shape[1], 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ] } } ).update( ((metadata_base.ALL_ELEMENTS, metadata_base.ALL_ELEMENTS)), { #'structural_type': self._v.dtype, 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], } ) # Set metadata attribute. outputs.metadata = metadata if timer.state == timer.EXECUTED: return CallResult(outputs) else: raise TimeoutError('SequenceToBagOfTokens exceeded time limit')
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: if not self._fitted: _logger.info("Please run fit() first!") return CallResult(None, self._has_finished, self._iterations_done) self.n_predict_step = inputs.shape[0] self.pred_point_test = tf.slice( self.prediction_method, (self.n_total - self.n_predict_step, 0), (self.n_predict_step, 1)) self.pred_lower_test = tf.slice( self.prediction_method, (self.n_total - self.n_predict_step, 1), (self.n_predict_step, 1)) self.pred_upper_test = tf.slice( self.prediction_method, (self.n_total - self.n_predict_step, 2), (self.n_predict_step, 1)) with tf.Session(config=self.tf_config) as sess: sess.run(tf.global_variables_initializer()) self._load_weights(sess) self._current_cell_state = np.zeros( (self.hyperparams["n_batch"], self.hyperparams["n_neurons"]), dtype=np.float32) pred_test, pred_test_lower, pred_test_upper = sess.run( [self.pred_point_test, self.pred_lower_test, self.pred_upper_test], feed_dict={ self.batchX_placeholder: self.x, self.cell_state: self._current_cell_state, } ) pred_test = self.scaler.inverse_transform(pred_test) pred_test_lower = self.scaler.inverse_transform(pred_test_lower) pred_test_upper = self.scaler.inverse_transform(pred_test_upper) pred_test_lower = np.minimum( pred_test, np.minimum(pred_test_lower, pred_test_upper)) pred_test_upper = np.maximum( pred_test, np.maximum(pred_test_upper, pred_test_lower)) _logger.info(pred_test.tolist()) res=pred_test.ravel().tolist() output = d3m_dataframe({'d3mIndex': inputs['d3mIndex'], self._target_name: res}) output.metadata = inputs.metadata.clear( source=self, for_value=output, generate_metadata=True) meta_d3mIndex = {"name": "d3mIndex", "structural_type":int, "semantic_types":["http://schema.org/Integer", "https://metadata.datadrivendiscovery.org/types/PrimaryKey"]} meta_target = {"name": self._target_name, "structural_type":float, "semantic_types":["https://metadata.datadrivendiscovery.org/types/Target", "https://metadata.datadrivendiscovery.org/types/PredictedTarget"]} output.metadata = output.metadata.update(selector=(ALL_ELEMENTS, 0), metadata=meta_d3mIndex) output.metadata = output.metadata.update(selector=(ALL_ELEMENTS, 1), metadata=meta_target) self._has_finished = True self._iterations_done = True return CallResult(output, self._has_finished, self._iterations_done)
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: arima_inputs = inputs if self.hyperparams['use_semantic_types']: sk_inputs = inputs.iloc[:, self._training_indices] sk_output = self._clf.predict(n_periods=len(arima_inputs)) output = d3m_dataframe(sk_output, generate_metadata=False, source=self) output.metadata = inputs.metadata.clear( source=self, for_value=output, generate_metadata=True) output.metadata = self._add_target_semantic_types( metadata=output.metadata, target_names=self._target_names, source=self) if not self.hyperparams['use_semantic_types']: return CallResult(output) # outputs = common_utils.combine_columns(return_result=self.hyperparams['return_result'], # add_index_columns=self.hyperparams['add_index_columns'], # inputs=inputs, column_indices=self._training_indices, columns_list=[output], source=self) return CallResult(output)
def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: """ Wrap predictions into dataframe Args: inputs: Container Dataframe predictions: array-like data (n_samples, n_features) Returns: Dataframe """ outputs = d3m_dataframe(predictions, generate_metadata=True) # target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, # self.hyperparams) target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams, self.primitiveNo) outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) # print(outputs.metadata.to_internal_simple_structure()) return outputs
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: """ Arguments: - inputs: Dataset Returns: - [ num_samples, num_channels ] """ with stopit.ThreadingTimeout(timeout) as timer: metadata_lookup = self.__class__._parse_metadata( metadata=inputs.metadata) if not metadata_lookup: return None primary_key_name = inputs.metadata.query( metadata_lookup['primary_key']['selector'])['name'] targets_name = inputs.metadata.query( metadata_lookup['targets']['selector'])['name'] #outputs = d3m_dataframe(inputs[metadata_lookup['targets']['selector'][0]][metadata_lookup['targets']['selector'][-1]]) outputs = d3m_dataframe({ targets_name: inputs[metadata_lookup['targets']['selector'][0]][targets_name] }) outputs.metadata = outputs.metadata.update( (metadata_module.ALL_ELEMENTS, 0), inputs.metadata.query(metadata_lookup['targets']['selector'])) if timer.state == timer.EXECUTED: return CallResult(outputs) else: raise TimeoutError('TargetsReader exceeded time limit')
def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: outputs = d3m_dataframe(predictions, generate_metadata=False) outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) return outputs
def test_basic(self): self.maxDiff = None dataset_fname = os.path.join( this_path, '../../datasets/anomaly/kpi/TRAIN/dataset_TRAIN/tables/learningData.csv' ) dataset = pd.read_csv(dataset_fname) # dataset = np.random.rand(1000) main = d3m_dataframe(dataset, generate_metadata=True) # print(main) hyperparams_class = SKPowerTransformer.SKPowerTransformer.metadata.get_hyperparams( ) primitive = SKPowerTransformer.SKPowerTransformer( hyperparams=hyperparams_class.defaults()) primitive.set_training_data(inputs=main) primitive.fit() new_main = primitive.produce(inputs=main).value test_data = new_main.values[:, 2] # hist_data = new_main.values std_normal_samples = np.random.randn(test_data.__len__()) # Plot the distribution # import matplotlib.pyplot as plt # plt.hist(test_data, bins=100, alpha=0.6) # plt.hist(std_normal_samples, bins=100, alpha=0.6) # plt.legend(labels=['PowerTransformer', 'Standard Gaussian'], loc='best') # plt.savefig('./fig/test_SKPowerTransformer.png') # plt.close() # plt.show() # centerization check new_mean, new_std = test_data.mean(), test_data.std() mean_mse = new_mean**2 std_mse = (new_std - 1)**2 # print(mean_mse, std_mse) self.assertAlmostEqual(mean_mse.__float__(), 0., delta=1e-5) self.assertAlmostEqual(std_mse.__float__(), 0., delta=1e-5) # # print(main.metadata.to_internal_simple_structure()) # print(new_main.metadata.to_internal_simple_structure()) self.assertEqual( utils.to_json_structure( new_main.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 7027, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 4, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'd3mIndex', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'timestamp', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'name': 'value', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 3], 'metadata': { 'name': 'ground_truth', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, } ]) params = primitive.get_params() primitive.set_params(params=params)
def test_basic(self): self.maxDiff = None curr_path = os.path.dirname(__file__) dataset_fname = os.path.join( curr_path, '../../datasets/anomaly/kpi/TRAIN/dataset_TRAIN/tables/learningData.csv' ) dataset = pd.read_csv(dataset_fname) # print(dataset.columns) value = dataset['value'] main = d3m_dataframe(value, generate_metadata=True) ################## Test Wavelet transform ################## hyperparams_default = WaveletTransformer.metadata.get_hyperparams( ).defaults() hyperparams = hyperparams_default.replace({ 'wavelet': 'db8', 'level': 2, 'inverse': 0, 'return_result': 'new' }) primitive = WaveletTransformer(hyperparams=hyperparams) new_main = primitive.produce(inputs=main).value # print(new_main) # print(mean_mse, std_mse) # self.assertAlmostEqual(mean_mse.__float__(), 0., delta=1e-8) # self.assertAlmostEquael(std_mse.__float__(), 0., delta=1e-8) # print(main.metadata.to_internal_simple_structure()) # print(new_main.metadata.to_internal_simple_structure()) self.assertEqual( utils.to_json_structure( new_main.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 3521, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 3, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'value', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 1], 'metadata': { 'name': 'output_1', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, }, { 'selector': ['__ALL_ELEMENTS__', 2], 'metadata': { 'name': 'output_2', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, } ]) ################## Test inverse transform ################## hyperparams = hyperparams_default.replace({'inverse': 1}) primitive = WaveletTransformer(hyperparams=hyperparams) main_recover = primitive.produce(inputs=main).value self.assertAlmostEqual(main_recover.values.tolist(), main.values.tolist(), delta=1e-6) # print(main.metadata.to_internal_simple_structure()) # print(main_recover.metadata.to_internal_simple_structure()) self.assertEqual( utils.to_json_structure( main_recover.metadata.to_internal_simple_structure()), [ { 'selector': [], 'metadata': { # 'top_level': 'main', 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, 'structural_type': 'd3m.container.pandas.DataFrame', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ], 'dimension': { 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ], 'length': 7027, }, }, }, { 'selector': ['__ALL_ELEMENTS__'], 'metadata': { 'dimension': { 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ], 'length': 1, }, }, }, { 'selector': ['__ALL_ELEMENTS__', 0], 'metadata': { 'name': 'value', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], 'structural_type': 'numpy.float64', }, } ]) params = primitive.get_params() primitive.set_params(params=params)
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: with stopit.ThreadingTimeout(timeout) as timer: num_data = len(inputs) outputs = np.empty((num_data, self.hyperparams['ivec_dim']), dtype=self._v.dtype) VtV = compute_VtV(self._v, self._gmm.n_components) I = np.eye(self.hyperparams['ivec_dim'], dtype=self._v.dtype) for idx in range(num_data): X = inputs[idx] if len(X.shape) != 2: outputs[idx] = np.zeros((self.hyperparams['ivec_dim'])) continue gamma = self._gmm.predict_proba(X) N0 = gamma.T.sum(axis=1) F0 = gamma.T.dot(X) N0, F0 = normalize_stats(N0, F0, self._gmm.means_, self._gmm.precisions_cholesky_) ivec = estimate_i(row(N0.astype(self._v.dtype)), row(F0.astype(self._v.dtype)), self._v, VtV, I) outputs[idx] = ivec.flatten() #adding normalization if (self.hyperparams['ivec_normalize']): outputs = preprocessing.normalize(outputs, norm='l2') outputs = d3m_dataframe(outputs, generate_metadata=False) metadata = inputs.metadata.clear( { 'schema': metadata_module.CONTAINER_SCHEMA_VERSION, 'structural_type': type(outputs), 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], 'dimension': { 'length': outputs.shape[0], 'name': 'rows', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ] } }, for_value=outputs ).update( ((metadata_base.ALL_ELEMENTS, )), { 'dimension': { 'length': outputs.shape[1], 'name': 'columns', 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ] } } ).update( ((metadata_base.ALL_ELEMENTS, metadata_base.ALL_ELEMENTS)), { #'structural_type': self._v.dtype, 'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ], }) # Set metadata attribute. outputs.metadata = metadata if timer.state == timer.EXECUTED: return CallResult(outputs) else: raise TimeoutError('IVectorExtractor exceeded time limit')