def process_save_stream(msg: dict, cc_config_path: str): """ Process one of kafka messages, add gaussian noise to data and store data as a new stream Args: msg (dict): kafka message - {'filename': str, 'metadata_hash': str, "stream_name": str, "user_id": str} cc_config_path (str): path of cerebralcortex configs Notes: This method creates CC object again. This code is running on worker node. Thus, it won't have access to CC object created in run() CC object cannot be passed to worker nodes because it contains sockets and sockets cannot be serialized in spark to pass as a parameter """ # Disable pandas warnings warnings.simplefilter(action='ignore', category=FutureWarning) CC = Kernel(cc_config_path, enable_spark=False) cc_config = CC.config stream_name = msg.get("stream_name") user_id = msg.get("user_id") if cc_config["nosql_storage"] == "filesystem": file_name = str( cc_config["filesystem"]["filesystem_path"]) + msg.get("filename") elif cc_config["nosql_storage"] == "hdfs": file_name = str( cc_config["hdfs"]["raw_files_dir"]) + msg.get("filename") else: raise Exception( str(cc_config["nosql_storage"]) + " is not supported. Please use filesystem or hdfs.") if os.path.exists(file_name): data = pq.read_table(file_name) pdf = data.to_pandas() pdf = add_gaussian_noise(pdf) new_stream_name = stream_name + "_gaussian_noise" metadata = Metadata().set_name(new_stream_name).set_description("Gaussian noise added to the accel sensor stream.") \ .add_dataDescriptor( DataDescriptor().set_attribute("description", "noisy accel x")) \ .add_dataDescriptor( DataDescriptor().set_attribute("description", "noisy accel y")) \ .add_dataDescriptor( DataDescriptor().set_attribute("description", "noisy accel z")) \ .add_module( ModuleMetadata().set_name("cerebralcortex.streaming_operation.main").set_version("0.0.1").set_attribute("description", "Spark streaming example using CerebralCortex. This example adds gaussian noise to a stream data.").set_author( "test_user", "test_user@test_email.com")) pdf["user"] = user_id ds = DataStream(data=pdf, metadata=metadata) CC.save_stream(ds) else: print(file_name, "does not exist.")
def run(): """ This example: - Make call to CerebralCortex-APIServer to: - Authenticate a user - Register a new stream (`accelerometer--org.md2k.phonesensor--phone`) - Upload sample data - Create Pyspark-Kafka direct stream - Read parquet data and convert it into pandas dataframe - Add gaussian noise in sample data - Store noisy data as a new stream - Retrieve and print noisy/clean data streams """ # upload sample data and publish messages on Kafka #rest_api_client("http://0.0.0.0:8089/") # create cerebralcortex object cc_config_path = "../../conf/" CC = Kernel(cc_config_path, enable_spark_ui=True) sample_stream_name = "accelerometer--org.md2k.phonesensor--phone" upload_stream_data( "http://localhost/", "demo", "demo", sample_stream_name, "../../resources/sample_data/msgpack_files/phone_accel.msgpack.gz") # raise Exception if CC.config["messaging_service"] == "none": raise Exception( "Messaging service is disabled (none) in cerebralcortex.yml. Please update configs." ) # Kafka Consumer Configs print("*"*100, type(user_metadata)) spark_context = get_or_create_sc(type="sparkContext") ssc = StreamingContext(spark_context, int(CC.config["kafka"]["ping_kafka"])) kafka_files_stream = CC.MessagingQueue.create_direct_kafka_stream( "filequeue", ssc) if kafka_files_stream is not None: kafka_files_stream.foreachRDD( lambda rdd: iterate_on_rdd(rdd, cc_config_path)) ssc.start() ssc.awaitTermination(timeout=15) ssc.stop() CC = Kernel(cc_config_path, enable_spark_ui=True) print("*" * 15, "CLEAN DATA", "*" * 15) ds_clean = CC.get_stream(stream_name=sample_stream_name) ds_clean.show(5, truncate=False) print("*" * 15, "NOISY DATA", "*" * 15) ds_noise = CC.get_stream(stream_name=sample_stream_name + "_gaussian_noise") ds_noise.show(5, truncate=False)
required=True) parser.add_argument( '-u', '--user_id', help='User ID. Optional if you want to process data for just one user', required=True) args = vars(parser.parse_args()) config_dir = str(args["config_dir"]).strip() accel_stream_name = str(args["accel_stream_name"]).strip() gyro_stream_name = str(args["gyro_stream_name"]).strip() wrist = str(args["wrist"]).strip() user_id = str(args["user_id"]).strip() CC = Kernel(config_dir, study_name="moral") candidate_stream_name = "brushing-candidates--org.md2k.motionsense--motion_sense--" + wrist + "_wrist" features_stream_name = "brushing-features--org.md2k.motionsense--motion_sense--" + wrist + "_wrist" generate_candidates(CC, user_id=user_id, accel_stream_name=accel_stream_name, gyro_stream_name=gyro_stream_name, output_stream_name=candidate_stream_name) generate_features(CC, user_id=user_id, candidate_stream_name=candidate_stream_name, output_stream_name=features_stream_name)
'--sensor_name', help='Sensor Type', required=False, default='respiban') # parse arguments args = vars(parser.parse_args()) config_dir = str(args["config_dir"]).strip() ecg_stream_name = str(args["ecg_stream_name"]).strip() study_name = str(args["study_name"]).strip() Fs = int(str(args["frequency"]).strip()) model_path = str(args["path"]).strip() sensor_name = str(args["sensor_name"]).strip() # create CC object CC = Kernel(config_dir, study_name=study_name) # get stream data ecg_data = CC.get_stream(ecg_stream_name) label = CC.get_stream("wesad.label") stress_episodes = stress_from_ecg(ecg_data, label, sensor_name=sensor_name, Fs=Fs, model_path=model_path) # show results stress_episodes.show(60) # Store results
parser.add_argument('-n', '--sensor_name', help='Sensor Type', required=False, default='autosense') # parse arguments args = vars(parser.parse_args()) config_dir = str(args["config_dir"]).strip() ecg_stream_name = str(args["ecg_stream_name"]).strip() study_name = str(args["study_name"]).strip() Fs = int(str(args["frequency"]).strip()) model_path = str(args["path"]).strip() sensor_name = str(args["sensor_name"]).strip() # create CC object CC = Kernel(config_dir, study_name=study_name) # get stream data ecg_data = CC.get_stream(ecg_stream_name) stress_episodes = stress_from_ecg(ecg_data, sensor_name=sensor_name, Fs=Fs, model_path=model_path) # show results stress_episodes.show(60) # Store results # CC.save_stream(clusterz)
def run(): parser = argparse.ArgumentParser( description='CerebralCortex Random Data Generator.') parser.add_argument( "-uid", "--user_id", help= "UUID of a user. Defaul UUID of a user is 00000000-e19c-3956-9db2-5459ccadd40c", default="00000000-e19c-3956-9db2-5459ccadd40c") parser.add_argument("-sn", "--study_name", help="Name of the study. Default is mguard.", default="mguard") parser.add_argument( "-duration", "--duration", help= "Hours of data to be generated. Acceptable parameters are integers. Default is 1 hour", default=1) args = vars(parser.parse_args()) study_name = str(args["study_name"]).strip() user_id = str(args["user_id"]).strip() hours = int(args["duration"]) if not isinstance(hours, int): raise ValueError("Only integer values are allowed.") CC = Kernel(cc_configs="default", study_name=study_name, new_study=True) battery_stream_name = "org.md2k--{}--{}--battery--phone".format( study_name, user_id) location_stream_name = "org.md2k--{}--{}--gps--phone".format( study_name, user_id) semantic_location_stream_name = "org.md2k--{}--{}--data_analysis--gps_episodes_and_semantic_location".format( study_name, user_id) accel_stream_name = "org.md2k.phonesensor--{}--{}--accelerometer--phone".format( study_name, user_id) gyro_stream_name = "org.md2k.phonesensor--{}--{}--gyroscope--phone".format( study_name, user_id) gen_battery_data(CC, study_name=study_name, user_id=user_id, stream_name=battery_stream_name, hours=hours) gen_location_datastream(CC, study_name=study_name, user_id=user_id, stream_name=location_stream_name) gen_semantic_location_datastream(CC, study_name=study_name, user_id=user_id, stream_name=semantic_location_stream_name) gen_accel_gyro_data(CC, study_name=study_name, user_id=user_id, stream_name=accel_stream_name, hours=hours) gen_accel_gyro_data(CC, study_name=study_name, user_id=user_id, stream_name=gyro_stream_name, hours=hours)
def make_CC_object(config_dir="/home/jupyter/cc3_conf/", study_name='mcontain'): CC = Kernel(config_dir, study_name=study_name) return CC
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from cerebralcortex.algorithms.gps.clustering import cluster_gps from cerebralcortex.kernel import Kernel from cerebralcortex.test_suite.util.data_helper import gen_location_datastream cc_config = "/../../conf/" # Create CC object CC = Kernel(configs_dir_path=cc_config) # get location data ds_gps = gen_location_datastream(user_id="bfb2ca0c-e19c-3956-9db2-5459ccadd40c", stream_name="gps--org.md2k.phonesensor--phone") # window location data windowed_ds=ds_gps.window(windowDuration=60) # Cluster GPS data clusterz = cluster_gps(windowed_ds) # show results clusterz.show(truncate=False) # Store results # CC.save_stream(clusterz)