def produce(self, *, inputs: Input, timeout: float = None, iterations: int = None) -> CallResult[Output]: columns_list_to_fold = self._mapping.get('foldable_columns', []) if len(columns_list_to_fold) == 0: return CallResult(inputs, True, 1) if inputs.shape[0] > 20000: return CallResult(inputs, True, 1) self._column_names = list(inputs) if inputs is not None else [] df = None for columns_to_fold in columns_list_to_fold: df = self._fold_columns(inputs, columns_to_fold) cols_to_drop = list() for col_idx, col_name in enumerate(inputs.columns): if col_name not in df.columns: cols_to_drop.append(col_idx) inputs = utils.remove_columns(inputs, cols_to_drop) new_df = inputs[0:0] for col_name in new_df.columns: new_df.loc[:, col_name] = df.loc[:, col_name] extends = {} for col_name in df.columns: if col_name not in new_df.columns: extends[col_name] = df.loc[:, col_name].tolist() if extends: extends_df = d3m_DataFrame.from_dict(extends) extends_df.index = new_df.index.copy() new_df = utils.append_columns(new_df, extends_df) new_df = self._update_type(new_df, list(extends.keys())) old_metadata = dict(new_df.metadata.query(())) old_metadata["dimension"] = dict(old_metadata["dimension"]) old_metadata["dimension"]["length"] = new_df.shape[0] new_df.metadata = new_df.metadata.update((), old_metadata) return CallResult(new_df, True, 1) if new_df is not None else CallResult( inputs, True, 1)
def update_type(extends, df_origin): extends_df = d3m_DataFrame.from_dict(extends) if extends != {}: extends_df.index = df_origin.index.copy() new_df = utils.append_columns(df_origin, extends_df) indices = list() for key in extends: indices.append(new_df.columns.get_loc(key)) for idx in indices: old_metadata = dict(new_df.metadata.query((mbase.ALL_ELEMENTS, idx))) numerics = pd.to_numeric(new_df.iloc[:, idx], errors='coerce') length = numerics.shape[0] nans = numerics.isnull().sum() if nans / length > 0.9: if HelperFunction.is_categorical(new_df.iloc[:, idx]): old_metadata['semantic_types'] = ( "https://metadata.datadrivendiscovery.org/types/CategoricalData", ) else: old_metadata['semantic_types'] = ("http://schema.org/Text", ) else: intcheck = (numerics % 1) == 0 if np.sum(intcheck) / length > 0.9: old_metadata['semantic_types'] = ( "http://schema.org/Integer", ) else: old_metadata['semantic_types'] = ("http://schema.org/Float", ) old_metadata['semantic_types'] += ( "https://metadata.datadrivendiscovery.org/types/Attribute", ) new_df.metadata = new_df.metadata.update((mbase.ALL_ELEMENTS, idx), old_metadata) return new_df