Example #1
0
    def _gen_metadata(self):
        from pyspark.sql.group import GroupedData
        if isinstance(self._data, GroupedData):
            return Metadata()
        else:
            schema = self._data.schema
            stream_metadata = Metadata()
            for field in schema.fields:
                stream_metadata.add_dataDescriptor(DataDescriptor().set_name(
                    str(field.name)).set_type(str(field.dataType)))

            stream_metadata.add_module(ModuleMetadata().set_name(
                "cerebralcortex.core.datatypes.datastream.DataStream"
            ).set_attribute("url", "https://md2k.org").set_author(
                "Nasir Ali", "*****@*****.**"))

            return stream_metadata
def get_metadata(stress_imputed_data, output_stream_name, input_stream_name):
    """
    generate metadata for a datastream.

    Args:
        stress_imputed_data (DataStream):
        output_stream_name (str):

    Returns:

    """
    schema = stress_imputed_data.schema
    stream_metadata = Metadata()
    stream_metadata.set_name(output_stream_name).set_description("stress imputed")\
        .add_input_stream(input_stream_name)
    for field in schema.fields:
        stream_metadata.add_dataDescriptor(DataDescriptor().set_name(
            str(field.name)).set_type(str(field.dataType)))
    stream_metadata.add_module(
        ModuleMetadata().set_name("stress forward fill imputer") \
            .set_attribute("url", "hhtps://md2k.org").set_author(
            "Md Azim Ullah", "*****@*****.**"))
    return stream_metadata