Ejemplo n.º 1
0
    def produce(self,
                *,
                inputs: Input,
                timeout: float = None,
                iterations: int = None) -> CallResult[Output]:
        columns_list_to_fold = self._mapping.get('foldable_columns', [])
        if len(columns_list_to_fold) == 0:
            return CallResult(inputs, True, 1)
        if inputs.shape[0] > 20000:
            return CallResult(inputs, True, 1)
        self._column_names = list(inputs) if inputs is not None else []
        df = None
        for columns_to_fold in columns_list_to_fold:
            df = self._fold_columns(inputs, columns_to_fold)
        cols_to_drop = list()
        for col_idx, col_name in enumerate(inputs.columns):
            if col_name not in df.columns:
                cols_to_drop.append(col_idx)

        inputs = utils.remove_columns(inputs, cols_to_drop)
        new_df = inputs[0:0]
        for col_name in new_df.columns:
            new_df.loc[:, col_name] = df.loc[:, col_name]

        extends = {}
        for col_name in df.columns:
            if col_name not in new_df.columns:
                extends[col_name] = df.loc[:, col_name].tolist()

        if extends:
            extends_df = d3m_DataFrame.from_dict(extends)
            extends_df.index = new_df.index.copy()
            new_df = utils.append_columns(new_df, extends_df)
            new_df = self._update_type(new_df, list(extends.keys()))

        old_metadata = dict(new_df.metadata.query(()))
        old_metadata["dimension"] = dict(old_metadata["dimension"])
        old_metadata["dimension"]["length"] = new_df.shape[0]
        new_df.metadata = new_df.metadata.update((), old_metadata)

        return CallResult(new_df, True,
                          1) if new_df is not None else CallResult(
                              inputs, True, 1)
Ejemplo n.º 2
0
def update_type(extends, df_origin):
    extends_df = d3m_DataFrame.from_dict(extends)
    if extends != {}:
        extends_df.index = df_origin.index.copy()
    new_df = utils.append_columns(df_origin, extends_df)

    indices = list()
    for key in extends:
        indices.append(new_df.columns.get_loc(key))

    for idx in indices:
        old_metadata = dict(new_df.metadata.query((mbase.ALL_ELEMENTS, idx)))

        numerics = pd.to_numeric(new_df.iloc[:, idx], errors='coerce')
        length = numerics.shape[0]
        nans = numerics.isnull().sum()

        if nans / length > 0.9:
            if HelperFunction.is_categorical(new_df.iloc[:, idx]):
                old_metadata['semantic_types'] = (
                    "https://metadata.datadrivendiscovery.org/types/CategoricalData",
                )
            else:
                old_metadata['semantic_types'] = ("http://schema.org/Text", )
        else:
            intcheck = (numerics % 1) == 0
            if np.sum(intcheck) / length > 0.9:
                old_metadata['semantic_types'] = (
                    "http://schema.org/Integer", )
            else:
                old_metadata['semantic_types'] = ("http://schema.org/Float", )

        old_metadata['semantic_types'] += (
            "https://metadata.datadrivendiscovery.org/types/Attribute", )

        new_df.metadata = new_df.metadata.update((mbase.ALL_ELEMENTS, idx),
                                                 old_metadata)

    return new_df