def get_percent_of_parallel_sensor_events(number_of_entire_rows, dataset_name, whole_common_time):
    '''
	count number of events in common time of performed activites
	'''
	# بعدا اصلاح شود. داده ای که به آن پاس داده میشود اشتباه است.
    address_to_read = r"E:\pgmpy\{}\sensor_data_each_row_one_features_is_one_on_and_off+time_ordered.csv".format(dataset_name)
    f= open(address_to_read , "r")
    counter = 0
    print(whole_common_time)
    durations_index = 0
	
    for line in f:    
        cells = line.split(',')
       
        date_time = convert_string_to_datetime(cells[-2], cells[-1].split('\n')[0]) #datetime
        if date_time < whole_common_time[durations_index]["start_time"]:
            continue
		
        if date_time <= whole_common_time[durations_index]["end_time"]:
            counter += 1
		
        if date_time > whole_common_time[durations_index]["end_time"]:
            durations_index += 1
            if date_time >= whole_common_time[durations_index]["start_time"] and date_time <= whole_common_time[durations_index]["end_time"]:
                counter += 1
       
    return counter/number_of_entire_rows
def get_list_of_duration_of_activities(file_address):

    f = open(file_address, "r")
    _, _, list_of_works = get_list_of_allowed_sensors_and_works_in_dataset(
        file_address)

    list_of_beginned_activities = [
    ]  # list of dictionaries, each cell contains the activity and personID of beginner
    number_of_proccessed_line = -1
    list_of_durations = []
    PersonNumber, WorkNumber = -1, -1

    for line in f:

        number_of_proccessed_line += 1
        cells = line.split()

        if len(cells) > 4:
            PersonNumber, WorkString = get_person_and_work(cells[4])
            WorkNumber = list_of_works.index(WorkString)

            if cells[5] == 'begin':
                list_of_beginned_activities.append(
                    [PersonNumber, WorkNumber, cells[0], cells[1]])

            elif cells[5] == 'end':

                indices = [
                    i for i, (first, second,
                              *_) in enumerate(list_of_beginned_activities)
                    if (first, second) == (PersonNumber, WorkNumber)
                ]
                print(indices)
                index = indices[0]
                begin_datetime = convert_string_to_datetime(
                    list_of_beginned_activities[index][2],
                    list_of_beginned_activities[index][3])
                end_datetime = convert_string_to_datetime(cells[0], cells[1])
                timedelta_in_sec = timedelta.total_seconds(end_datetime -
                                                           begin_datetime)
                list_of_durations.append(timedelta_in_sec)
                del (list_of_beginned_activities[index])

    return list_of_durations
def convert_casas_dataset_to_pycasas_events_file(file_address_to_read, file_address_to_save_dir):
    
    """
    """
    
    f = open(file_address_to_read,"r")
    list_of_sensors, number_of_allowed_samples, list_of_works = get_list_of_allowed_sensors_and_works_in_dataset(file_address_to_read)
    #print(list_of_sensors)
    number_of_columns = 7# 6+1 (1 for datetime object used to sort data based on time)
    
    features = [0] * number_of_columns 
    all_features = np.zeros((number_of_allowed_samples, number_of_columns), dtype= object )
    
    counter = -1
    first = True
    active_activities = []
    active_residents = []
    
    for line in f:
        
        cells = line.split()
        
        try:
            feature_column = list_of_sensors.index(cells[2])
        except Exception as e:# i.e. the item is not in list
            feature_column = -1
            
        if len(cells) > 4:
                
                if cells[5] == 'begin':
                    if cells[4][0] == 'R':
                        person = cells[4].split('_')[0]
                        activity = cells[4][len(person)+1:] # remain of the string is the activity
                        active_activities.append(activity)
                        active_residents.append(person)
                    else:
                        active_activities.append(cells[4])
                        
        if feature_column != -1:
            counter +=1
            
            date_details = cells[0].split('-')
            date = date_details[1] + '/' + date_details[2] + '/' + date_details[0] # month/day/year
            features[0] = date + " " + cells[1] + " " + "-08:00" # date time
            features[1] = cells[2] # sensor
            features[2] = cells[3] # message
            features[3] = ';'.join(sorted(list(set(active_residents))))#residents
            features[4] = ';'.join(sorted(list(set(active_activities))))#activities
            features[5] = get_sensor_type(cells[2])#sensor_type
            features[6] = convert_string_to_datetime(cells[0],cells[1])

        if len(cells) > 4 and cells[5] == 'end':
            if cells[4][0] == 'R':
                person = cells[4].split('_')[0]
                activity = cells[4][len(person)+1:] # remain of the string is the activity
                active_activities.remove(activity)
                active_residents.remove(person)
            else:
                active_activities.remove(cells[4])

        if feature_column != -1:
            
            if first == True:
                first  = False
            
            if counter < number_of_allowed_samples:
                all_features[counter] = features
            else:
                all_features = np.vstack([all_features,features])
            
    file_address_to_save = os.path.join(file_address_to_save_dir, 'data', 'events.csv')
    all_features = all_features[all_features[:,-1].argsort()] # sort all_features based on datetime column
    np.savetxt(file_address_to_save, np.delete(all_features, -1 , 1 ), delimiter=',' , fmt='%s')
def get_list_of_durations(number_of_entire_rows, dataset_name):
    '''
    1. The order of  features: sensor events (for each sensor on and off), 
                               Person,
                               Date,
                               Time
    
    
    This function works for every multi-rsident dataset
	'''
	
    address_to_read = r"E:\pgmpy\{}\sensor_data_each_row_one_features_is_one_on_and_off+time_ordered.csv".format(dataset_name)
    f= open(address_to_read , "r")
    all_features = None#np.zeros((number_of_entire_rows, 124), dtype= object )#np.str)1003 +1
        
    counter = -1
    
    first = True
    for line in f:
        #print(line)
        cells = line.split(',')
       
        if first:
            number_of_columns = 3 # Person + Activity + datetime
            all_features = np.zeros((number_of_entire_rows, number_of_columns), dtype= object )#np.str)1003 +1


        converted_cells = []
        converted_cells.append(int(cells[-4]))#person
        converted_cells.append(int(cells[-3]))#activity
        #print(cells[-2], cells[-1])
        converted_cells.append(convert_string_to_datetime(cells[-2], cells[-1].split('\n')[0])) #datetime
        counter+=1
               
        if first == True:
            first  = False
           
      
        if counter < number_of_entire_rows:
            all_features[counter] = converted_cells
        else:
            all_features = np.vstack([all_features,converted_cells])
        
    #seperate each person data in a list (-4 is the index of person column)
    person_IDs = list(set(all_features[: , -3]))
    #print(person_IDs)
    number_of_residents = len(person_IDs)
    person_data = np.zeros(number_of_residents, dtype = np.ndarray)
    durations = np.zeros(number_of_residents, dtype = np.ndarray)
    #print(type(person_data))
    for i in range(number_of_residents):
        person_data[i] = all_features[np.where(all_features[:,-3] == person_IDs[i])]
       
     
    #separate each person activities based on start and end time of items
    for per in range(number_of_residents):
        start_time = person_data[per][0][-1]
        activity = person_data[per][0][-2]
        durations[per] = []
        for i in range(1,len(person_data[per])):
            if person_data[per][i][-2] != activity:
                durations[per].append({"start_time": start_time, "end_time": person_data[per][i-1][-1]})
                start_time = person_data[per][i][-1]
                activity = person_data[per][i][-2]
	
    return durations
Esempio n. 5
0
def return_list_of_activity_durations(number_of_entire_rows, address_to_read):
    '''
    
    '''
    f= open(address_to_read , "r")
        
    counter = -1
    
    first = True
    for line in f:
        #print(line)
        cells = line.split(',')
       
        if first:
            number_of_columns = 3#Person_ID, Activity_ID, +1 is for datetime column
            all_features = np.zeros((number_of_entire_rows, number_of_columns), dtype= object )#np.str)1003 +1
        
        converted_cells = []
        converted_cells.append(int(cells[-4]))#Person_ID
        converted_cells.append(int(cells[-3]))#Activity_ID
        converted_cells.append(convert_string_to_datetime(cells[-2], cells[-1].split('\n')[0])) #DateTime. the time has '\n' at end taht should be removed 
      
        counter+=1
               
        if first == True:
            first  = False
           

        if counter < number_of_entire_rows:
            all_features[counter] = converted_cells
        else:
            all_features = np.vstack([all_features,converted_cells])
        
    #seperate each person data in a list (-4 is the index of person column)
    person_IDs = list(set(all_features[: , 0]))
    #print(person_IDs)
    number_of_residents = len(person_IDs)
    person_data = np.zeros(number_of_residents, dtype = np.ndarray)
    #print(type(person_data))
    for i in range(number_of_residents):
        person_data[i] = all_features[np.where(all_features[:,0] == person_IDs[i])]
        #print("*****************\n {}".format(i))
        #print(type(person_data[i]))
        #print(person_data[i].shape)
     
    #save bag of features in deltaT for each person   
    person_durations = np.zeros(number_of_residents, dtype = np.ndarray)

    for each_person in range(number_of_residents):
        #print("each_line:{}".format(each_line+1))
        new_counter = 0
        
        #initialize 
        person_data_number_of_rows , _ = person_data[each_person].shape
        # create a ndarray with size of all data of the person 
        person_durations[each_person] = np.ndarray(shape = (person_data_number_of_rows , 2), dtype = int)#just save activityID and its duration
        
        last_activity = person_data[each_person][0][1]
        last_start_datetime = person_data[each_person][0][2]
        
        for offset in range(1, len(person_data[each_person]), 1): # range(start, end, step) sharte end ine k bozorgtar bashe pas bayad yeki az akhari kamtar begiram
                
            # compare delta time in minutes
            if person_data[each_person][offset][1] == last_activity:
                continue
            else:  
                # save activity column 
                person_durations[each_person][new_counter][0] = last_activity
                #add person column value
                person_durations[each_person][new_counter][1] = timedelta.total_seconds(person_data[each_person][offset-1][2] - last_start_datetime) / 60
                last_start_datetime = person_data[each_person][offset][2]
                last_activity = person_data[each_person][offset][1]
                
                new_counter += 1
                
        #update last row column and activity number
        person_durations[each_person][new_counter][0] = last_activity
        person_durations[each_person][new_counter][1] = timedelta.total_seconds(person_data[each_person][offset][2] - last_start_datetime)/60
                
        #remove additional items (because when i created the person_bag[each_person], the size was number of rows )
        person_durations[each_person] = np.delete(person_durations[each_person] , range(new_counter + 1 , person_data_number_of_rows) , axis = 0)    
         
    for i in range(number_of_residents):
        print("resident number:", person_IDs[i] , "\n---------------------------\n")
        #print(sorted(list(set(person_durations[i][:,1]))))
        print(person_durations[i][:,1])
        print("average activity duration:" , np.average(person_durations[i][:,1]))
        #for j in range(len(person_durations[i])):
        #    print(person_durations[i])

    return person_durations
def convert_data_to_each_row_one_feature_is_on(in_file, out_file,
                                               person_number, hasHeader):
    '''
	this function adds the person number to file as well
	there is a mistake that this code ignores the first activity if the activity does not change any sesnor status 
	'''
    f = open(in_file, "r")
    #read the first line to get the sensor names
    seprators_pattern = ',| |\n'
    first_line = re.split(
        seprators_pattern,
        f.readline())  # the last element should be ''. do ont count it

    #-3 for Activity,timestamp,''
    #+5 for Person + work + 1 date + 1 time + 1 datetime for ordering data
    number_of_columns = (len(first_line) - 3) * 2 + 5
    number_of_sensors_plus_activity = len(first_line) - 2
    list_of_sensor_names = first_line[0:(number_of_sensors_plus_activity - 1)]
    features = [0] * number_of_columns
    features[-5] = person_number

    number_of_samples = get_number_of_lines_in_file(in_file, hasHeader)
    all_features = np.zeros((number_of_samples, number_of_columns),
                            dtype=object)

    counter = -1
    #first = True
    previous_cells = re.split(seprators_pattern,
                              f.readline())  #first line of samples

    for line in f:
        #search for the first change in sensor states
        cells = re.split(seprators_pattern, line)
        if cells[0:number_of_sensors_plus_activity] != previous_cells[
                0:number_of_sensors_plus_activity]:
            counter += 1
            #find the sensor whose status is changed
            for i in range(number_of_sensors_plus_activity - 1):
                if cells[i] != previous_cells[i]:
                    if cells[i] == '1':
                        changed_index = i * 2
                    elif cells[i] == '0':
                        changed_index = i * 2 + 1
                    break

            #set_of_changed_index.add(changed_index)
            features[changed_index] = 1
            features[-4] = openshs_activities[cells[-4]]
            features[-3] = cells[-3]
            features[-2] = cells[-2]
            features[-1] = convert_string_to_datetime(cells[-3], cells[-2])

            previous_cells = cells

            if counter < number_of_samples:
                all_features[counter] = features
            else:
                all_features = np.vstack([all_features, features])

            #reset changed_index to 0
            features[changed_index] = 0

    all_features = np.delete(all_features,
                             range(counter + 1, number_of_samples),
                             axis=0)
    #print(all_features[:,-1])
    all_features = all_features[all_features[:, -1].argsort(
    )]  # sort all_features based on datetime column

    rows, cols = all_features.shape
    print("rows:", rows, "cols:", cols)

    np.savetxt(out_file,
               np.delete(all_features, -1, 1),
               delimiter=',',
               fmt='%s')