def process_save_stream(msg: dict, cc_config_path: str):
    """
    Process one of kafka messages, add gaussian noise to data and store data as a new stream

    Args:
        msg (dict): kafka message - {'filename': str, 'metadata_hash': str, "stream_name": str, "user_id": str}
        cc_config_path (str): path of cerebralcortex configs

    Notes:
        This method creates CC object again. This code is running on worker node. Thus, it won't have access to CC object created in run()
        CC object cannot be passed to worker nodes because it contains sockets and sockets cannot be serialized in spark to pass as a parameter

    """

    # Disable pandas warnings
    warnings.simplefilter(action='ignore', category=FutureWarning)

    CC = Kernel(cc_config_path, enable_spark=False)
    cc_config = CC.config
    stream_name = msg.get("stream_name")
    user_id = msg.get("user_id")

    if cc_config["nosql_storage"] == "filesystem":
        file_name = str(
            cc_config["filesystem"]["filesystem_path"]) + msg.get("filename")
    elif cc_config["nosql_storage"] == "hdfs":
        file_name = str(
            cc_config["hdfs"]["raw_files_dir"]) + msg.get("filename")
    else:
        raise Exception(
            str(cc_config["nosql_storage"]) +
            " is not supported. Please use filesystem or hdfs.")

    if os.path.exists(file_name):
        data = pq.read_table(file_name)
        pdf = data.to_pandas()

        pdf = add_gaussian_noise(pdf)

        new_stream_name = stream_name + "_gaussian_noise"

        metadata = Metadata().set_name(new_stream_name).set_description("Gaussian noise added to the accel sensor stream.") \
            .add_dataDescriptor(
            DataDescriptor().set_attribute("description", "noisy accel x")) \
            .add_dataDescriptor(
            DataDescriptor().set_attribute("description", "noisy accel y")) \
            .add_dataDescriptor(
            DataDescriptor().set_attribute("description", "noisy accel z")) \
            .add_module(
            ModuleMetadata().set_name("cerebralcortex.streaming_operation.main").set_version("0.0.1").set_attribute("description", "Spark streaming example using CerebralCortex. This example adds gaussian noise to a stream data.").set_author(
                "test_user", "test_user@test_email.com"))

        pdf["user"] = user_id
        ds = DataStream(data=pdf, metadata=metadata)
        CC.save_stream(ds)
    else:
        print(file_name, "does not exist.")
def run():
    """
    This example:
     - Make call to CerebralCortex-APIServer to:
        - Authenticate a user
        - Register a new stream (`accelerometer--org.md2k.phonesensor--phone`)
        - Upload sample data
     - Create Pyspark-Kafka direct stream
     - Read parquet data and convert it into pandas dataframe
     - Add gaussian noise in sample data
     - Store noisy data as a new stream
     - Retrieve and print noisy/clean data streams
    """

    # upload sample data and publish messages on Kafka
    #rest_api_client("http://0.0.0.0:8089/")

    # create cerebralcortex object
    cc_config_path = "../../conf/"
    CC = Kernel(cc_config_path, enable_spark_ui=True)
    sample_stream_name = "accelerometer--org.md2k.phonesensor--phone"

    upload_stream_data(
        "http://localhost/", "demo", "demo", sample_stream_name,
        "../../resources/sample_data/msgpack_files/phone_accel.msgpack.gz")

    # raise Exception
    if CC.config["messaging_service"] == "none":
        raise Exception(
            "Messaging service is disabled (none) in cerebralcortex.yml. Please update configs."
        )

    # Kafka Consumer Configs    print("*"*100, type(user_metadata))
    spark_context = get_or_create_sc(type="sparkContext")

    ssc = StreamingContext(spark_context,
                           int(CC.config["kafka"]["ping_kafka"]))
    kafka_files_stream = CC.MessagingQueue.create_direct_kafka_stream(
        "filequeue", ssc)
    if kafka_files_stream is not None:
        kafka_files_stream.foreachRDD(
            lambda rdd: iterate_on_rdd(rdd, cc_config_path))

    ssc.start()
    ssc.awaitTermination(timeout=15)
    ssc.stop()

    CC = Kernel(cc_config_path, enable_spark_ui=True)
    print("*" * 15, "CLEAN DATA", "*" * 15)
    ds_clean = CC.get_stream(stream_name=sample_stream_name)
    ds_clean.show(5, truncate=False)

    print("*" * 15, "NOISY DATA", "*" * 15)
    ds_noise = CC.get_stream(stream_name=sample_stream_name +
                             "_gaussian_noise")
    ds_noise.show(5, truncate=False)
                        required=True)
    parser.add_argument(
        '-u',
        '--user_id',
        help='User ID. Optional if you want to process data for just one user',
        required=True)

    args = vars(parser.parse_args())

    config_dir = str(args["config_dir"]).strip()
    accel_stream_name = str(args["accel_stream_name"]).strip()
    gyro_stream_name = str(args["gyro_stream_name"]).strip()
    wrist = str(args["wrist"]).strip()
    user_id = str(args["user_id"]).strip()

    CC = Kernel(config_dir, study_name="moral")

    candidate_stream_name = "brushing-candidates--org.md2k.motionsense--motion_sense--" + wrist + "_wrist"
    features_stream_name = "brushing-features--org.md2k.motionsense--motion_sense--" + wrist + "_wrist"

    generate_candidates(CC,
                        user_id=user_id,
                        accel_stream_name=accel_stream_name,
                        gyro_stream_name=gyro_stream_name,
                        output_stream_name=candidate_stream_name)

    generate_features(CC,
                      user_id=user_id,
                      candidate_stream_name=candidate_stream_name,
                      output_stream_name=features_stream_name)
                        '--sensor_name',
                        help='Sensor Type',
                        required=False,
                        default='respiban')

    # parse arguments
    args = vars(parser.parse_args())
    config_dir = str(args["config_dir"]).strip()
    ecg_stream_name = str(args["ecg_stream_name"]).strip()
    study_name = str(args["study_name"]).strip()
    Fs = int(str(args["frequency"]).strip())
    model_path = str(args["path"]).strip()
    sensor_name = str(args["sensor_name"]).strip()

    # create CC object
    CC = Kernel(config_dir, study_name=study_name)

    # get stream data
    ecg_data = CC.get_stream(ecg_stream_name)

    label = CC.get_stream("wesad.label")
    stress_episodes = stress_from_ecg(ecg_data,
                                      label,
                                      sensor_name=sensor_name,
                                      Fs=Fs,
                                      model_path=model_path)

    # show results
    stress_episodes.show(60)

    # Store results
    parser.add_argument('-n',
                        '--sensor_name',
                        help='Sensor Type',
                        required=False,
                        default='autosense')

    # parse arguments
    args = vars(parser.parse_args())
    config_dir = str(args["config_dir"]).strip()
    ecg_stream_name = str(args["ecg_stream_name"]).strip()
    study_name = str(args["study_name"]).strip()
    Fs = int(str(args["frequency"]).strip())
    model_path = str(args["path"]).strip()
    sensor_name = str(args["sensor_name"]).strip()

    # create CC object
    CC = Kernel(config_dir, study_name=study_name)

    # get stream data
    ecg_data = CC.get_stream(ecg_stream_name)

    stress_episodes = stress_from_ecg(ecg_data,
                                      sensor_name=sensor_name,
                                      Fs=Fs,
                                      model_path=model_path)

    # show results
    stress_episodes.show(60)

    # Store results
    # CC.save_stream(clusterz)
def run():
    parser = argparse.ArgumentParser(
        description='CerebralCortex Random Data Generator.')
    parser.add_argument(
        "-uid",
        "--user_id",
        help=
        "UUID of a user. Defaul UUID of a user is 00000000-e19c-3956-9db2-5459ccadd40c",
        default="00000000-e19c-3956-9db2-5459ccadd40c")
    parser.add_argument("-sn",
                        "--study_name",
                        help="Name of the study. Default is mguard.",
                        default="mguard")
    parser.add_argument(
        "-duration",
        "--duration",
        help=
        "Hours of data to be generated. Acceptable parameters are integers. Default is 1 hour",
        default=1)

    args = vars(parser.parse_args())

    study_name = str(args["study_name"]).strip()
    user_id = str(args["user_id"]).strip()
    hours = int(args["duration"])

    if not isinstance(hours, int):
        raise ValueError("Only integer values are allowed.")

    CC = Kernel(cc_configs="default", study_name=study_name, new_study=True)

    battery_stream_name = "org.md2k--{}--{}--battery--phone".format(
        study_name, user_id)
    location_stream_name = "org.md2k--{}--{}--gps--phone".format(
        study_name, user_id)
    semantic_location_stream_name = "org.md2k--{}--{}--data_analysis--gps_episodes_and_semantic_location".format(
        study_name, user_id)
    accel_stream_name = "org.md2k.phonesensor--{}--{}--accelerometer--phone".format(
        study_name, user_id)
    gyro_stream_name = "org.md2k.phonesensor--{}--{}--gyroscope--phone".format(
        study_name, user_id)

    gen_battery_data(CC,
                     study_name=study_name,
                     user_id=user_id,
                     stream_name=battery_stream_name,
                     hours=hours)
    gen_location_datastream(CC,
                            study_name=study_name,
                            user_id=user_id,
                            stream_name=location_stream_name)
    gen_semantic_location_datastream(CC,
                                     study_name=study_name,
                                     user_id=user_id,
                                     stream_name=semantic_location_stream_name)
    gen_accel_gyro_data(CC,
                        study_name=study_name,
                        user_id=user_id,
                        stream_name=accel_stream_name,
                        hours=hours)
    gen_accel_gyro_data(CC,
                        study_name=study_name,
                        user_id=user_id,
                        stream_name=gyro_stream_name,
                        hours=hours)
Exemple #7
0
def make_CC_object(config_dir="/home/jupyter/cc3_conf/",
                   study_name='mcontain'):
    CC = Kernel(config_dir, study_name=study_name)
    return CC
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


from cerebralcortex.algorithms.gps.clustering import cluster_gps
from cerebralcortex.kernel import Kernel
from cerebralcortex.test_suite.util.data_helper import gen_location_datastream

cc_config = "/../../conf/"

# Create CC object
CC = Kernel(configs_dir_path=cc_config)

# get location data
ds_gps = gen_location_datastream(user_id="bfb2ca0c-e19c-3956-9db2-5459ccadd40c", stream_name="gps--org.md2k.phonesensor--phone")

# window location data
windowed_ds=ds_gps.window(windowDuration=60)

# Cluster GPS data
clusterz = cluster_gps(windowed_ds)

# show results
clusterz.show(truncate=False)

# Store results
# CC.save_stream(clusterz)