def generate_metadata_notif(): stream_metadata = Metadata() stream_metadata.set_name('mcontain-md2k--user-notifications').set_description('Notification generated for the Covid-19 encountered users.') \ .add_dataDescriptor( DataDescriptor().set_name("user").set_type("string").set_attribute("description", \ "user id")) \ .add_dataDescriptor( DataDescriptor().set_name("timestamp").set_type("timestamp").set_attribute("description", \ "Unix timestamp when the message was generated")) \ .add_dataDescriptor( DataDescriptor().set_name("localtime").set_type("timestamp").set_attribute("description", \ "Local timestamp when the message was generated.")) \ .add_dataDescriptor( DataDescriptor().set_name("message").set_type("string").set_attribute("description", \ "Generated notification message")) \ .add_dataDescriptor( DataDescriptor().set_name("day").set_type("timestamp").set_attribute("description", \ "day of the encounter")) \ .add_dataDescriptor( DataDescriptor().set_name("version").set_type("int").set_attribute("description", \ "version")) stream_metadata.add_module( ModuleMetadata().set_name('Generated notification for a user encountered with Covid-19 participant') \ .set_attribute("url", "https://mcontain.md2k.org").set_author( "Md Shiplu Hawlader", "*****@*****.**").set_version(1)) return stream_metadata
def generate_metadata_hourly(): stream_metadata = Metadata() stream_metadata.set_name('mcontain-md2k--visualization-stats--time-window').set_description('Computes visualization stats every time window defined by start time and end time') \ .add_dataDescriptor( DataDescriptor().set_name("start_time").set_type("timestamp").set_attribute("description", \ "Start time of the time window localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("end_time").set_type("timestamp").set_attribute("description", \ "End time of the time window in localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("latitude").set_type("double").set_attribute("description", \ "Latitude of centroid location, a gps cluster output grouping encounters in similar location together")) \ .add_dataDescriptor( DataDescriptor().set_name("longitude").set_type("double").set_attribute("description", \ "Longitude of centroid location, a gps cluster output grouping encounters in similar location together")) \ .add_dataDescriptor( DataDescriptor().set_name("n_users").set_type("integer").set_attribute("description", \ "Number of unique users in that cluster centroid")) \ .add_dataDescriptor( DataDescriptor().set_name("total_encounters").set_type("double").set_attribute("description", \ "Total encounters happening in the time window in this specific location")) \ .add_dataDescriptor( DataDescriptor().set_name("normalized_total_encounters").set_type("double").set_attribute("description", \ "Total encounters normalized by the centroid area. (encounters per 10 square meter)")) \ .add_dataDescriptor( DataDescriptor().set_name("avg_encounters").set_type("double").set_attribute("description", \ "average encounter per participant(participants who had at least one encounter)")) stream_metadata.add_module( ModuleMetadata().set_name('Visualization stats computation in a time window between start time and end time') \ .set_attribute("url", "https://mcontain.md2k.org").set_author( "Md Azim Ullah", "*****@*****.**")) return stream_metadata
def generate_metadata_dailystats(): stream_metadata = Metadata() stream_metadata.set_name('mcontain-md2k--daily-stats').set_description('Daily stats for website') \ .add_dataDescriptor( DataDescriptor().set_name("start_time").set_type("timestamp").set_attribute("description", \ "Start time of the day in localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("end_time").set_type("timestamp").set_attribute("description", \ "End time of the day in localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("number_of_app_users").set_type("double").set_attribute("description", \ "Total number of app users")) \ .add_dataDescriptor( DataDescriptor().set_name("encounter_per_user").set_type("double").set_attribute("description", \ "Average encounter per user")) \ .add_dataDescriptor( DataDescriptor().set_name("total_covid_encounters").set_type("double").set_attribute("description", \ "Total covid encounters on the day")) \ .add_dataDescriptor( DataDescriptor().set_name("maximum_concurrent_encounters").set_type("double").set_attribute("description", \ "Maximum concurrent encounters")) stream_metadata.add_module( ModuleMetadata().set_name('Daily encounter stats for all the users to be shown in website') \ .set_attribute("url", "https://mcontain.md2k.org").set_author( "Md Azim Ullah", "*****@*****.**")) return stream_metadata
def generate_metadata_encounter_daily(): stream_metadata = Metadata() stream_metadata.set_name('mcontain-md2k-encounter-daily--bluetooth-gps').set_description('Contains each unique encounters between two persons along with the location of encounter') \ .add_dataDescriptor( DataDescriptor().set_name("start_time").set_type("timestamp").set_attribute("description", \ "Start time of the encounter in localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("end_time").set_type("timestamp").set_attribute("description", \ "End time of the encounter in localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("participant_identifier").set_type("string").set_attribute("description", \ "Participant with whom encounter happened")) \ .add_dataDescriptor( DataDescriptor().set_name("os").set_type("string").set_attribute("description", \ "Operating system of the phone belonging to user")) \ .add_dataDescriptor( DataDescriptor().set_name("latitude").set_type("double").set_attribute("description", \ "Latitude of encounter location")) \ .add_dataDescriptor( DataDescriptor().set_name("longitude").set_type("double").set_attribute("description", \ "Longitude of encounter location")) \ .add_dataDescriptor( DataDescriptor().set_name("durations").set_type("array").set_attribute("description", \ "Mean distance between participants in encounter")) \ .add_dataDescriptor( DataDescriptor().set_name("covid").set_type("integer").set_attribute("description", \ "0, 1 or 2 indicating if this encounter contained a covid user -- 0 - no covid-19 affected, 1 - user is, 2 - participant identifier is")) stream_metadata.add_module( ModuleMetadata().set_name('Encounter computation after parsing raw bluetooth-gps data, clustering gps locations and removing double counting') \ .set_attribute("url", "https://mcontain.md2k.org").set_author( "Md Azim Ullah", "*****@*****.**")) return stream_metadata
def _gen_metadata(self): from pyspark.sql.group import GroupedData if isinstance(self._data, GroupedData): return Metadata() else: schema = self._data.schema stream_metadata = Metadata() for field in schema.fields: stream_metadata.add_dataDescriptor(DataDescriptor().set_name( str(field.name)).set_type(str(field.dataType))) stream_metadata.add_module(ModuleMetadata().set_name( "cerebralcortex.core.datatypes.datastream.DataStream" ).set_attribute("url", "https://md2k.org").set_author( "Nasir Ali", "*****@*****.**")) return stream_metadata
def generate_metadata_user_encounter_count(): stream_metadata = Metadata() stream_metadata.set_name('mcontain-md2k--user--encounter-count').set_description('Number of encounter in a given time window') \ .add_dataDescriptor( DataDescriptor().set_name("start_time").set_type("timestamp").set_attribute("description", \ "Start time of the time window in localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("end_time").set_type("timestamp").set_attribute("description", \ "End time of the time window in localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("encounter_count").set_type("int").set_attribute("description", \ "Total number of encounter for the user in the given time window")) stream_metadata.add_module( ModuleMetadata().set_name('Total number of encounter for a user in a given time window') \ .set_attribute("url", "https://mcontain.md2k.org").set_author( "Md Shiplu Hawlader, Md Azim Ullah", "[email protected], [email protected]").set_version(1)) return stream_metadata
def generate_metadata_notification_daily(): stream_metadata = Metadata() stream_metadata.set_name('mcontain-md2k--crowd--notification--daily').set_description('Computes notifications for each user who dwelled in a crowded hotspot') \ .add_dataDescriptor( DataDescriptor().set_name("start_time").set_type("timestamp").set_attribute("description", \ "Start time of the time window localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("end_time").set_type("timestamp").set_attribute("description", \ "End time of the time window in localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("centroid_latitude").set_type("double").set_attribute("description", \ "Latitude of centroid location, a gps cluster output grouping encounters in similar location together")) \ .add_dataDescriptor( DataDescriptor().set_name("centroid_longitude").set_type("double").set_attribute("description", \ "Longitude of centroid location, a gps cluster output grouping encounters in similar location together")) \ .add_dataDescriptor( DataDescriptor().set_name("centroid_area").set_type("double").set_attribute("description", \ "area of centroid")) \ .add_dataDescriptor( DataDescriptor().set_name("durations").set_type("double").set_attribute("description", \ "duration of stay in the centroid in hours")) \ .add_dataDescriptor( DataDescriptor().set_name("unique_users").set_type("integer").set_attribute("description", \ "Number of unique users in that cluster centroid")) \ .add_dataDescriptor( DataDescriptor().set_name("total_encounters").set_type("double").set_attribute("description", \ "Total encounters happening in the time window in this specific location")) \ .add_dataDescriptor( DataDescriptor().set_name("normalized_total_encounters").set_type("double").set_attribute("description", \ "Total encounters normalized by the centroid area. (encounters per 10 square meter)")) \ .add_dataDescriptor( DataDescriptor().set_name("avg_encounters").set_type("double").set_attribute("description", \ "average encounter per participant(participants who had at least one encounter)")) stream_metadata.add_module( ModuleMetadata().set_name('Notification messages to be shown to each user') \ .set_attribute("url", "https://mcontain.md2k.org").set_author( "Md Azim Ullah", "*****@*****.**")) return stream_metadata
def get_metadata(stress_imputed_data, output_stream_name, input_stream_name): """ generate metadata for a datastream. Args: stress_imputed_data (DataStream): output_stream_name (str): Returns: """ schema = stress_imputed_data.schema stream_metadata = Metadata() stream_metadata.set_name(output_stream_name).set_description("stress imputed")\ .add_input_stream(input_stream_name) for field in schema.fields: stream_metadata.add_dataDescriptor(DataDescriptor().set_name( str(field.name)).set_type(str(field.dataType))) stream_metadata.add_module( ModuleMetadata().set_name("stress forward fill imputer") \ .set_attribute("url", "hhtps://md2k.org").set_author( "Md Azim Ullah", "*****@*****.**")) return stream_metadata