def weather_and_operating_features_extraction_one_user(user_id):
	out_path_prefix = "/home/dehajjik/workspace/logs/"
	data = DataExtractor.load_json_data(user_id)
	known_features = (["activityRecognitionResult", "androidActivityRecognitionResult", "appLaunch", "battery", "bluetooth", "event", "notifications", "headsetPlug", "location", "networkInfo", "sensor", "settingInfo",
						"settingInfo", "telephony", "telephony2", "wifiAps", "wifiConnectedAp", "launcherLayouts", "predictors", "neighboringCellInfo2", "neighboringCellInfo" ])
	
	#filter notification already adds to the records the attributes createDateTimeZone and rcreateDateTimeZone
	#check that the data is really ordered by date!!
	data = DataOperations.filter_notifications(data)
	
	#the data is sorted and notification is filtered and annotated with the timezone date.
	for record_id in data:
		record = data[record_id]
		for key, value in record.iteritems():
			if key not in known_features:
				if key not in other_features.keys():
					other_features[key]=[]
				if len(other_features[key])<100:
					other_features[key].append(value)
	
	
	t = time.strftime("%Y%m%d")
	t = t+""+time.strftime("%H%M%S")
	JsonUtils.save_json_data(out_path_prefix+"/"+t+"extra_features_u"+str(user_id), other_features)
	return other_features
Example #2
0
def weather_and_operating_features_extraction_one_user(user_id):
    out_path_prefix = "/home/dehajjik/workspace/logs/"
    data = DataExtractor.load_json_data(user_id)
    known_features = ([
        "activityRecognitionResult", "androidActivityRecognitionResult",
        "appLaunch", "battery", "bluetooth", "event", "notifications",
        "headsetPlug", "location", "networkInfo", "sensor", "settingInfo",
        "settingInfo", "telephony", "telephony2", "wifiAps", "wifiConnectedAp",
        "launcherLayouts", "predictors", "neighboringCellInfo2",
        "neighboringCellInfo"
    ])

    #filter notification already adds to the records the attributes createDateTimeZone and rcreateDateTimeZone
    #check that the data is really ordered by date!!
    data = DataOperations.filter_notifications(data)

    #the data is sorted and notification is filtered and annotated with the timezone date.
    for record_id in data:
        record = data[record_id]
        for key, value in record.iteritems():
            if key not in known_features:
                if key not in other_features.keys():
                    other_features[key] = []
                if len(other_features[key]) < 100:
                    other_features[key].append(value)

    t = time.strftime("%Y%m%d")
    t = t + "" + time.strftime("%H%M%S")
    JsonUtils.save_json_data(
        out_path_prefix + "/" + t + "extra_features_u" + str(user_id),
        other_features)
    return other_features
def filter_notifications_one_user(user_id):
    out_path_prefix = "/speech/dbwork/mul/students/dehajjik/notifications_filtered/"
    data = DataExtractor.load_json_data(user_id)
    data = DataOperations.filter_notifications(data)

    JsonUtils.save_json_data(
        out_path_prefix + str(DataExtractor.user_long_ids[user_id]) + "/all/all_in_one_validated_log", data
    )
    return data
def filter_notifications_one_user(user_id):
    out_path_prefix = "/speech/dbwork/mul/students/dehajjik/notifications_filtered/"
    data = DataExtractor.load_json_data(user_id)
    data = DataOperations.filter_notifications(data)

    JsonUtils.save_json_data(
        out_path_prefix + str(DataExtractor.user_long_ids[user_id]) +
        "/all/all_in_one_validated_log", data)
    return data
def output_sample(user_id):
    specific_date_times = {
        1: [
            datetime.datetime(year=2014, month=8, day=19, hour=16),
            datetime.datetime(year=2014, month=8, day=27, hour=15),
            datetime.datetime(year=2014, month=9, day=5, hour=18),
            datetime.datetime(year=2014, month=10, day=12, hour=15),
            datetime.datetime(year=2014, month=9, day=1, hour=1)
        ],
        2: [
            datetime.datetime(year=2014, month=9, day=25, hour=7),
            datetime.datetime(year=2014, month=12, day=8, hour=6),
            datetime.datetime(year=2014, month=9, day=25, hour=1)
        ],
        3: [datetime.datetime(year=2014, month=9, day=25, hour=17)],
        4: [
            datetime.datetime(year=2014, month=9, day=5, hour=14),
            datetime.datetime(year=2015, month=1, day=8, hour=11),
            datetime.datetime(year=2014, month=9, day=2, hour=13)
        ],
        5: [
            datetime.datetime(year=2014, month=9, day=22, hour=18),
            datetime.datetime(year=2015, month=1, day=5, hour=13),
            datetime.datetime(year=2014, month=12, day=29, hour=13)
        ],
        6: [
            datetime.datetime(year=2014, month=10, day=26, hour=3),
            datetime.datetime(year=2014, month=11, day=4, hour=8)
        ],
        7: [datetime.datetime(year=2014, month=7, day=28, hour=10)]
    }

    out_path_prefix = "/home/dehajjik/workspace/resources/filtered_notifs/"

    data = DataExtractor.load_json_data(user_id)

    #for each specific date and hour write the data that occured at that specified to a file. Json format
    for specific_dt in specific_date_times[user_id]:
        selected_data = DataExtractor.select_records_by_date_and_hour(
            data, specific_dt)
        selected_data = DataOperations.order_chronologically_and_annotate(
            selected_data)
        #selected_data = DataOperations.annotate(selected_data)
        JsonUtils.save_json_data(
            out_path_prefix + "u" + str(user_id) + " d" +
            specific_dt.strftime('%Y-%m-%d %H'), selected_data)
        print(str(json.dumps(selected_data.keys(), indent=4)))
def extract_specific_date_time_one_user(user_id):
    '''specific_date_times = {1: [datetime.datetime(year=2014, month=8, day=19, hour=16), datetime.datetime(year=2014, month=8, day=27, hour=15), datetime.datetime(year=2014, month=9, day=5, hour=18), datetime.datetime(year=2014, month=10, day=12, hour=15), datetime.datetime(year=2014, month=9, day=1, hour=1)],
						2: [datetime.datetime(year=2014, month=9, day=25, hour=7),datetime.datetime(year=2014, month=12, day=8, hour=6), datetime.datetime(year=2014, month=9, day=25, hour=1)],
						3: [datetime.datetime(year=2014, month=9, day=13, hour=0), datetime.datetime(year=2014, month=9, day=25, hour=17)],
						4: [datetime.datetime(year=2014, month=9, day=5, hour=14), datetime.datetime(year=2015, month=1, day=8, hour=11), datetime.datetime(year=2014, month=9, day=2, hour=13)],
						5: [datetime.datetime(year=2014, month=9, day=22, hour=18), datetime.datetime(year=2015, month=1, day=5, hour=13), datetime.datetime(year=2014, month=12, day=29, hour=13)],
						6: [datetime.datetime(year=2014, month=10, day=26, hour=3), datetime.datetime(year=2014, month=11, day=4, hour=8)],
						7: [datetime.datetime(year=2014, month=7, day=28, hour=10)]}'''

    specific_date_times = {
        1: [
            datetime.datetime(year=2014, month=10, day=12, hour=14),
            datetime.datetime(year=2014, month=10, day=12, hour=22)
        ],
        2: [],
        3: [],
        4: [datetime.datetime(year=2014, month=12, day=9, hour=15)],
        5: [
            datetime.datetime(year=2014, month=12, day=5, hour=12),
            datetime.datetime(year=2014, month=12, day=16, hour=9)
        ],
        6: [],
        7: []
    }

    out_path_prefix = "/home/dehajjik/workspace/resources/"

    data = DataExtractor.load_json_data(user_id)

    #for each specific date and hour write the data that occured at that specified to a file. Json format
    for specific_dt in specific_date_times[user_id]:
        selected_data = DataExtractor.select_records_by_date_and_hour(
            data, specific_dt)

        #annotate the records with readable dates and sort the notifications chronologically
        for record in selected_data:
            record = DataOperations.annotate_with_readable_date_no_timezone(
                record)
            record = DataOperations.order_chronologically_notifications_and_annotate(
                record)

        #sort the records chronologically
        selected_data = DataOperations.order_chronologically_and_annotate(
            selected_data)
        JsonUtils.save_json_data(
            out_path_prefix + "u" + str(user_id) + " d" +
            specific_dt.strftime('%Y-%m-%d %H'), selected_data)
def transform_to_categorized_data_one_user(user_id):
	out_path_prefix = "/speech/dbwork/mul/students/dehajjik/categorized_data/"
	data_key = "data"
	metadata_key = "metadata"
	
	print "loading data for user "+str(user_id)
	nontransformed_data = DataExtractor.load_json_data(user_id)
	#nontransformed_data = JsonUtils.load_json_data("/home/dehajjik/workspace/resources/sample_data_for_location_categorization_test.json")
	
	
	#the transfomers responsible for the features of the data categorization
	feature_transformers = {LocationTransformer.transformed_feature_name: LocationTransformer(nontransformed_data),
							NotificationTransformer.transformed_feature_name : NotificationTransformer(nontransformed_data),
							ApplaunchTransformer.transformed_feature_name : ApplaunchTransformer(nontransformed_data),
							BatteryTransformer.transformed_feature_name: BatteryTransformer(nontransformed_data),
							HeadsetTransformer.transformed_feature_name: HeadsetTransformer(nontransformed_data),
							BluetoothPairedTransformer.transformed_feature_name: BluetoothPairedTransformer(nontransformed_data),
							BluetoothSeenTransformer.transformed_feature_name: BluetoothSeenTransformer(nontransformed_data),
							ActivityTransformer.transformed_feature_name : ActivityTransformer(nontransformed_data)}
	
	#the features that we want to transform
	selected_features = [LocationTransformer.transformed_feature_name,
						NotificationTransformer.transformed_feature_name,
						ApplaunchTransformer.transformed_feature_name,
						#BatteryTransformer.transformed_feature_name,
						#HeadsetTransformer.transformed_feature_name,
						BluetoothPairedTransformer.transformed_feature_name,
						#BluetoothSeenTransformer.transformed_feature_name,
						ActivityTransformer.transformed_feature_name]
						
	#selected_features = [ActivityTransformer.transformed_feature_name]
	
	
	categorized_data = {}
	categorized_data[data_key]={}
	categorized_data[metadata_key]={}
	
	
	for feature in selected_features:
		feature_transformers[feature].transform()
		if feature_transformers[feature].transformed_feature_data != {None:None} and feature_transformers[feature].transformed_feature_metadata != {None:None}:
			categorized_data[data_key][feature]= feature_transformers[feature].transformed_feature_data
			categorized_data[metadata_key][feature] = feature_transformers[feature].transformed_feature_metadata
	
	JsonUtils.save_json_data(out_path_prefix+str(DataExtractor.user_long_ids[user_id])+"/all/all_in_one_validated_log", categorized_data)
	return categorized_data
def output_sample(user_id):
	specific_date_times = {1: [datetime.datetime(year=2014, month=8, day=19, hour=16), datetime.datetime(year=2014, month=8, day=27, hour=15), datetime.datetime(year=2014, month=9, day=5, hour=18), datetime.datetime(year=2014, month=10, day=12, hour=15), datetime.datetime(year=2014, month=9, day=1, hour=1)],
						2: [datetime.datetime(year=2014, month=9, day=25, hour=7),datetime.datetime(year=2014, month=12, day=8, hour=6), datetime.datetime(year=2014, month=9, day=25, hour=1)],
						3: [ datetime.datetime(year=2014, month=9, day=25, hour=17)],
						4: [datetime.datetime(year=2014, month=9, day=5, hour=14), datetime.datetime(year=2015, month=1, day=8, hour=11), datetime.datetime(year=2014, month=9, day=2, hour=13)],
						5: [datetime.datetime(year=2014, month=9, day=22, hour=18), datetime.datetime(year=2015, month=1, day=5, hour=13), datetime.datetime(year=2014, month=12, day=29, hour=13)],
						6: [datetime.datetime(year=2014, month=10, day=26, hour=3), datetime.datetime(year=2014, month=11, day=4, hour=8)],
						7: [datetime.datetime(year=2014, month=7, day=28, hour=10)]}
	
	out_path_prefix = "/home/dehajjik/workspace/resources/filtered_notifs/"
	
	data = DataExtractor.load_json_data(user_id)
	
	
	#for each specific date and hour write the data that occured at that specified to a file. Json format
	for specific_dt in specific_date_times[user_id]:
		selected_data = DataExtractor.select_records_by_date_and_hour(data, specific_dt)
		selected_data = DataOperations.order_chronologically_and_annotate(selected_data)
		#selected_data = DataOperations.annotate(selected_data)
		JsonUtils.save_json_data(out_path_prefix+"u"+str(user_id)+" d"+specific_dt.strftime('%Y-%m-%d %H'), selected_data)
		print(str(json.dumps(selected_data.keys(), indent=4)))
	def write(content, file_suffix):
		t = time.strftime("%Y%m%d")
		t = t+""+time.strftime("%H%M%S")
		log_file_name = LogsFileWriter.log_dir+t+file_suffix
		JsonUtils.save_json_data(log_file_name, content)
 def write(content, file_suffix):
     t = time.strftime("%Y%m%d")
     t = t + "" + time.strftime("%H%M%S")
     log_file_name = LogsFileWriter.log_dir + t + file_suffix
     JsonUtils.save_json_data(log_file_name, content)
import json
import collections
sys.path.insert(0, "/home/dehajjik/workspace/src/utils")
from json_utils import JsonUtils
from data_utils import *
other_features = {}
def weather_and_operating_features_extraction_one_user(user_id):
	out_path_prefix = "/home/dehajjik/workspace/logs/"
	data = DataExtractor.load_json_data(user_id)
	known_features = (["activityRecognitionResult", "androidActivityRecognitionResult", "appLaunch", "battery", "bluetooth", "event", "notifications", "headsetPlug", "location", "networkInfo", "sensor", "settingInfo",
						"settingInfo", "telephony", "telephony2", "wifiAps", "wifiConnectedAp", "launcherLayouts", "predictors" ])
	
	#filter notification already adds to the records the attributes createDateTimeZone and rcreateDateTimeZone
	#check that the data is really ordered by date!!
	data = DataOperations.filter_notifications(data)
	
	#the data is sorted and notification is filtered and annotated with the timezone date.
	for record_id in data:
		record = data[record_id]
		for key, value in record.iteritems():
			key not in known_features:
				if key not in other_features:
					other_features[key]=[]
				other_features[key].append(record)
				print key+"\n"
	
	
	t = time.strftime("%Y%m%d")
	t = t+""+time.strftime("%H%M%S")
	JsonUtils.save_json_data(out_path_prefix+str(DataExtractor.user_long_ids[user_id])+"/"+t+"extra_features_u"+str(user_id), other_features)
	return other_features
Example #12
0
def clean_and_write_data_one_user(user_id):
	#empty the lists
	global clean_data
	clean_data = {}
	global last_realization_val
	last_realization_val = {}
	global last_realization_key
	last_realization_key = {}
	
	
	'''
	For each feature, we want to know what are the distribution of the difference of time observed between two realizations.
	For that reason we store in time_variances for each feature, the number of times that the difference between two realizations was x minutes.
	Note that the maximum time variance allowed is timeout_in_minutes.
	'''
	global time_variances
	time_variances ={}
	
	out_path_prefix = "/speech/dbwork/mul/students/dehajjik/clean_data/"
	data = DataExtractor.load_json_data(user_id)
	DataOperations.print_times_for_specific_locations(data)
	#filter notification already adds to the records the attributes createDateTimeZone and rcreateDateTimeZone
	#check that the data is really ordered by date!!
	data = DataOperations.filter_notifications(data)
	
	
	
	#the data is sorted and notification is filtered and annotated with the timezone date.
	for record_id in data:
		record = data[record_id]
		event_type = record['event']['type']
		for key, value in record.iteritems():
			#test if it has one black listed value, if it is the case ignore it
			do_ignore = False
			is_array_attribute = False
			if key in blacklisted_values:
				for attribute, black_values in blacklisted_values[key].iteritems():
					try :
						if value[attribute] in black_values:
							#the current realization contains one blacklisted value, so we need to ignore it
							do_ignore = True
					except TypeError:
						#the feature we have is an array feature
						is_array_attribute=True
						break;
						
			if is_array_attribute:
				#the feature we have is an array feature, so we go throught all the elements and we remove the blacklisted ones if they exist
				temp_value = []
				do_remove = False
				for one_value in value:
					for attribute, black_values in blacklisted_values[key].iteritems():
						if one_value[attribute] in black_values:
							do_remove = True
							
					if not do_remove:
						temp_value.append(one_value)
						
				if len(temp_value)>=1:
					value = temp_value
				else:
					#all the values were removed so ignore this entry
					do_ignore = True
					
			
			if not do_ignore:
				if key == "activityRecognitionResult":
					activityRecognitionResult_update(value , event_type)
				elif key == "androidActivityRecognitionResult":
					androidActivityRecognitionResult_update(value, event_type)
				elif key == "appLaunch":
					appLaunch_update(value , event_type)
				elif key == "battery":
					battery_update(value, event_type)
				elif key == "bluetooth":
					bluetooth_update(value, event_type)
				elif key == "event":
					event_update(value, event_type)
				elif key == "notifications":
					notifications_update(value, event_type)
				elif key == "headsetPlug":
					headsetPlug_update(value, event_type)
				elif key == "location":
					location_update(value, event_type)
				elif key == "networkInfo":
					networkInfo_update(value, event_type)
				elif key == "sensor":
					sensor_update(value, event_type)
				elif key == "settingInfo":
					settingInfo_update(value, event_type)
				elif key == "telephony" or key=="telephony2":
					telephony_update(value, event_type)
				elif key == "wifiAps":
					wifiApps_update(value , event_type)
				elif key == "wifiConnectedAp":
					wifiConnectedApp_update(value, event_type)
				'''elif key not in blacklisted_features:
					print key+"\n"'''
				
	
	
	
	order_data()
	#do some tests to see that the transformation went well
	do_sanity_check(data,clean_data)
	JsonUtils.save_json_data(out_path_prefix+str(DataExtractor.user_long_ids[user_id])+"/all/all_in_one_validated_log", clean_data)
		
	return clean_data
Example #13
0
																								   "seq":[23]
																								}
					},
		"wifiAps": {"2015-01-01 00:55:00 , 1420073700000->2015-01-01 01:10:01 , 1420074601000":[ {"ssid":"w1",
																								   "seq":[2,3,4]}
																								],
					"2015-01-01 01:10:01 , 1420074601000->2015-01-01 02:00:00 , 1420077600000":[ {"ssid":"w1",
																								  "seq":[5,6,7,8]},
																								{"ssid":"w2",
																								  "seq":[5,6,7,8]},
																								
																								],
					"2015-01-01 03:00:01 , 1420081201000->2015-01-01 03:05:00 , 1420081500000":[ {"ssid":"w3",
																								   "seq":[9]}
																								],
					"2015-01-01 04:05:00 , 1420085100000->2015-01-01 05:00:00 , 1420088400000":[ {"ssid":"w2",
																								   "seq":[10,11]},
																								  {"ssid":"w3",
																								  "seq":[10,11]}
																								],
					"2015-01-01 19:15:00 , 1420139700000->2015-01-01 21:00:00 , 1420146000000":[ {"ssid":"w4",
																								   "seq": [21,22]}
																								]
					
					}
					
}
					
					
JsonUtils.save_json_data( "/home/dehajjik/workspace/resources/sample_data_for_location_categorization_test", data)