def get_percent_of_parallel_sensor_events(number_of_entire_rows, dataset_name, whole_common_time): ''' count number of events in common time of performed activites ''' # بعدا اصلاح شود. داده ای که به آن پاس داده میشود اشتباه است. address_to_read = r"E:\pgmpy\{}\sensor_data_each_row_one_features_is_one_on_and_off+time_ordered.csv".format(dataset_name) f= open(address_to_read , "r") counter = 0 print(whole_common_time) durations_index = 0 for line in f: cells = line.split(',') date_time = convert_string_to_datetime(cells[-2], cells[-1].split('\n')[0]) #datetime if date_time < whole_common_time[durations_index]["start_time"]: continue if date_time <= whole_common_time[durations_index]["end_time"]: counter += 1 if date_time > whole_common_time[durations_index]["end_time"]: durations_index += 1 if date_time >= whole_common_time[durations_index]["start_time"] and date_time <= whole_common_time[durations_index]["end_time"]: counter += 1 return counter/number_of_entire_rows
def get_list_of_duration_of_activities(file_address): f = open(file_address, "r") _, _, list_of_works = get_list_of_allowed_sensors_and_works_in_dataset( file_address) list_of_beginned_activities = [ ] # list of dictionaries, each cell contains the activity and personID of beginner number_of_proccessed_line = -1 list_of_durations = [] PersonNumber, WorkNumber = -1, -1 for line in f: number_of_proccessed_line += 1 cells = line.split() if len(cells) > 4: PersonNumber, WorkString = get_person_and_work(cells[4]) WorkNumber = list_of_works.index(WorkString) if cells[5] == 'begin': list_of_beginned_activities.append( [PersonNumber, WorkNumber, cells[0], cells[1]]) elif cells[5] == 'end': indices = [ i for i, (first, second, *_) in enumerate(list_of_beginned_activities) if (first, second) == (PersonNumber, WorkNumber) ] print(indices) index = indices[0] begin_datetime = convert_string_to_datetime( list_of_beginned_activities[index][2], list_of_beginned_activities[index][3]) end_datetime = convert_string_to_datetime(cells[0], cells[1]) timedelta_in_sec = timedelta.total_seconds(end_datetime - begin_datetime) list_of_durations.append(timedelta_in_sec) del (list_of_beginned_activities[index]) return list_of_durations
def convert_casas_dataset_to_pycasas_events_file(file_address_to_read, file_address_to_save_dir): """ """ f = open(file_address_to_read,"r") list_of_sensors, number_of_allowed_samples, list_of_works = get_list_of_allowed_sensors_and_works_in_dataset(file_address_to_read) #print(list_of_sensors) number_of_columns = 7# 6+1 (1 for datetime object used to sort data based on time) features = [0] * number_of_columns all_features = np.zeros((number_of_allowed_samples, number_of_columns), dtype= object ) counter = -1 first = True active_activities = [] active_residents = [] for line in f: cells = line.split() try: feature_column = list_of_sensors.index(cells[2]) except Exception as e:# i.e. the item is not in list feature_column = -1 if len(cells) > 4: if cells[5] == 'begin': if cells[4][0] == 'R': person = cells[4].split('_')[0] activity = cells[4][len(person)+1:] # remain of the string is the activity active_activities.append(activity) active_residents.append(person) else: active_activities.append(cells[4]) if feature_column != -1: counter +=1 date_details = cells[0].split('-') date = date_details[1] + '/' + date_details[2] + '/' + date_details[0] # month/day/year features[0] = date + " " + cells[1] + " " + "-08:00" # date time features[1] = cells[2] # sensor features[2] = cells[3] # message features[3] = ';'.join(sorted(list(set(active_residents))))#residents features[4] = ';'.join(sorted(list(set(active_activities))))#activities features[5] = get_sensor_type(cells[2])#sensor_type features[6] = convert_string_to_datetime(cells[0],cells[1]) if len(cells) > 4 and cells[5] == 'end': if cells[4][0] == 'R': person = cells[4].split('_')[0] activity = cells[4][len(person)+1:] # remain of the string is the activity active_activities.remove(activity) active_residents.remove(person) else: active_activities.remove(cells[4]) if feature_column != -1: if first == True: first = False if counter < number_of_allowed_samples: all_features[counter] = features else: all_features = np.vstack([all_features,features]) file_address_to_save = os.path.join(file_address_to_save_dir, 'data', 'events.csv') all_features = all_features[all_features[:,-1].argsort()] # sort all_features based on datetime column np.savetxt(file_address_to_save, np.delete(all_features, -1 , 1 ), delimiter=',' , fmt='%s')
def get_list_of_durations(number_of_entire_rows, dataset_name): ''' 1. The order of features: sensor events (for each sensor on and off), Person, Date, Time This function works for every multi-rsident dataset ''' address_to_read = r"E:\pgmpy\{}\sensor_data_each_row_one_features_is_one_on_and_off+time_ordered.csv".format(dataset_name) f= open(address_to_read , "r") all_features = None#np.zeros((number_of_entire_rows, 124), dtype= object )#np.str)1003 +1 counter = -1 first = True for line in f: #print(line) cells = line.split(',') if first: number_of_columns = 3 # Person + Activity + datetime all_features = np.zeros((number_of_entire_rows, number_of_columns), dtype= object )#np.str)1003 +1 converted_cells = [] converted_cells.append(int(cells[-4]))#person converted_cells.append(int(cells[-3]))#activity #print(cells[-2], cells[-1]) converted_cells.append(convert_string_to_datetime(cells[-2], cells[-1].split('\n')[0])) #datetime counter+=1 if first == True: first = False if counter < number_of_entire_rows: all_features[counter] = converted_cells else: all_features = np.vstack([all_features,converted_cells]) #seperate each person data in a list (-4 is the index of person column) person_IDs = list(set(all_features[: , -3])) #print(person_IDs) number_of_residents = len(person_IDs) person_data = np.zeros(number_of_residents, dtype = np.ndarray) durations = np.zeros(number_of_residents, dtype = np.ndarray) #print(type(person_data)) for i in range(number_of_residents): person_data[i] = all_features[np.where(all_features[:,-3] == person_IDs[i])] #separate each person activities based on start and end time of items for per in range(number_of_residents): start_time = person_data[per][0][-1] activity = person_data[per][0][-2] durations[per] = [] for i in range(1,len(person_data[per])): if person_data[per][i][-2] != activity: durations[per].append({"start_time": start_time, "end_time": person_data[per][i-1][-1]}) start_time = person_data[per][i][-1] activity = person_data[per][i][-2] return durations
def return_list_of_activity_durations(number_of_entire_rows, address_to_read): ''' ''' f= open(address_to_read , "r") counter = -1 first = True for line in f: #print(line) cells = line.split(',') if first: number_of_columns = 3#Person_ID, Activity_ID, +1 is for datetime column all_features = np.zeros((number_of_entire_rows, number_of_columns), dtype= object )#np.str)1003 +1 converted_cells = [] converted_cells.append(int(cells[-4]))#Person_ID converted_cells.append(int(cells[-3]))#Activity_ID converted_cells.append(convert_string_to_datetime(cells[-2], cells[-1].split('\n')[0])) #DateTime. the time has '\n' at end taht should be removed counter+=1 if first == True: first = False if counter < number_of_entire_rows: all_features[counter] = converted_cells else: all_features = np.vstack([all_features,converted_cells]) #seperate each person data in a list (-4 is the index of person column) person_IDs = list(set(all_features[: , 0])) #print(person_IDs) number_of_residents = len(person_IDs) person_data = np.zeros(number_of_residents, dtype = np.ndarray) #print(type(person_data)) for i in range(number_of_residents): person_data[i] = all_features[np.where(all_features[:,0] == person_IDs[i])] #print("*****************\n {}".format(i)) #print(type(person_data[i])) #print(person_data[i].shape) #save bag of features in deltaT for each person person_durations = np.zeros(number_of_residents, dtype = np.ndarray) for each_person in range(number_of_residents): #print("each_line:{}".format(each_line+1)) new_counter = 0 #initialize person_data_number_of_rows , _ = person_data[each_person].shape # create a ndarray with size of all data of the person person_durations[each_person] = np.ndarray(shape = (person_data_number_of_rows , 2), dtype = int)#just save activityID and its duration last_activity = person_data[each_person][0][1] last_start_datetime = person_data[each_person][0][2] for offset in range(1, len(person_data[each_person]), 1): # range(start, end, step) sharte end ine k bozorgtar bashe pas bayad yeki az akhari kamtar begiram # compare delta time in minutes if person_data[each_person][offset][1] == last_activity: continue else: # save activity column person_durations[each_person][new_counter][0] = last_activity #add person column value person_durations[each_person][new_counter][1] = timedelta.total_seconds(person_data[each_person][offset-1][2] - last_start_datetime) / 60 last_start_datetime = person_data[each_person][offset][2] last_activity = person_data[each_person][offset][1] new_counter += 1 #update last row column and activity number person_durations[each_person][new_counter][0] = last_activity person_durations[each_person][new_counter][1] = timedelta.total_seconds(person_data[each_person][offset][2] - last_start_datetime)/60 #remove additional items (because when i created the person_bag[each_person], the size was number of rows ) person_durations[each_person] = np.delete(person_durations[each_person] , range(new_counter + 1 , person_data_number_of_rows) , axis = 0) for i in range(number_of_residents): print("resident number:", person_IDs[i] , "\n---------------------------\n") #print(sorted(list(set(person_durations[i][:,1])))) print(person_durations[i][:,1]) print("average activity duration:" , np.average(person_durations[i][:,1])) #for j in range(len(person_durations[i])): # print(person_durations[i]) return person_durations
def convert_data_to_each_row_one_feature_is_on(in_file, out_file, person_number, hasHeader): ''' this function adds the person number to file as well there is a mistake that this code ignores the first activity if the activity does not change any sesnor status ''' f = open(in_file, "r") #read the first line to get the sensor names seprators_pattern = ',| |\n' first_line = re.split( seprators_pattern, f.readline()) # the last element should be ''. do ont count it #-3 for Activity,timestamp,'' #+5 for Person + work + 1 date + 1 time + 1 datetime for ordering data number_of_columns = (len(first_line) - 3) * 2 + 5 number_of_sensors_plus_activity = len(first_line) - 2 list_of_sensor_names = first_line[0:(number_of_sensors_plus_activity - 1)] features = [0] * number_of_columns features[-5] = person_number number_of_samples = get_number_of_lines_in_file(in_file, hasHeader) all_features = np.zeros((number_of_samples, number_of_columns), dtype=object) counter = -1 #first = True previous_cells = re.split(seprators_pattern, f.readline()) #first line of samples for line in f: #search for the first change in sensor states cells = re.split(seprators_pattern, line) if cells[0:number_of_sensors_plus_activity] != previous_cells[ 0:number_of_sensors_plus_activity]: counter += 1 #find the sensor whose status is changed for i in range(number_of_sensors_plus_activity - 1): if cells[i] != previous_cells[i]: if cells[i] == '1': changed_index = i * 2 elif cells[i] == '0': changed_index = i * 2 + 1 break #set_of_changed_index.add(changed_index) features[changed_index] = 1 features[-4] = openshs_activities[cells[-4]] features[-3] = cells[-3] features[-2] = cells[-2] features[-1] = convert_string_to_datetime(cells[-3], cells[-2]) previous_cells = cells if counter < number_of_samples: all_features[counter] = features else: all_features = np.vstack([all_features, features]) #reset changed_index to 0 features[changed_index] = 0 all_features = np.delete(all_features, range(counter + 1, number_of_samples), axis=0) #print(all_features[:,-1]) all_features = all_features[all_features[:, -1].argsort( )] # sort all_features based on datetime column rows, cols = all_features.shape print("rows:", rows, "cols:", cols) np.savetxt(out_file, np.delete(all_features, -1, 1), delimiter=',', fmt='%s')