Esempio n. 1
0
class Labels(PreprocessData):
    def __init__(self, bus_stop_distance, traffic_light_distance, array_stops,
                 array_trfl):
        super().__init__()
        self.bst_dist = bus_stop_distance
        self.trfl_dist = traffic_light_distance
        self.array_stops = array_stops
        self.array_trfl = array_trfl
        self.prepro = PreProcess()

    def add_bus_stop_label(self, data):
        ''' this method is used with multiprocessing
        item[4] is the velocity'''
        chunck = []
        for items in tqdm(data):
            final_item = []
            for item in items:
                for stop in self.array_stops:
                    #                     dist = self.prepro.distance_in_meters([item[0],item[1]], [stop[4],stop[5]])
                    dist = self.prepro.distance_in_meters([item[0], item[1]],
                                                          [stop[1], stop[2]])
                    if item[4] < 5 and dist < self.bst_dist:
                        print('bustop')
                        item.append('bus_stop')
                        break
                final_item.append(item)
            chunck.append(final_item)
        return chunck

    def add_traffic_light_label(self, data):
        chunck = []
        for items in tqdm(data):
            final_item = []
            for item in items:
                for stop in self.array_trfl:
                    #                     dist = self.prepro.distance_in_meters([item[0],item[1]], [stop[7],stop[8]])
                    dist = self.prepro.distance_in_meters([item[0], item[1]],
                                                          [stop[1], stop[2]])
                    if item[4] < 5 and dist < self.trfl_dist and item[
                            10] != 'bus_stop':
                        item[10] = 'traffic_light'
                        break
                final_item.append(item)
            chunck.append(final_item)
        return chunck

    def add_other_stop_label(self, data):
        for items in tqdm(data):
            for item in items:
                if item[4] < 5 and item[10] == 'in_route':
                    item[10] = 'other_stop'

    def get_false_labels(self, data, label, min_dist):
        ''' Remove labels other_stop that is between bus or traffic_light'''
        count_b, count_a = [], []
        for items in tqdm(data):
            for idx in range(len(items) - 1):
                if idx > 0 and idx < (len(items) - 1):
                    lat_lng_b = [items[idx - 1][0], items[idx - 1][1]]
                    lat_lng_a = [items[idx + 1][0], items[idx + 1][1]]
                    lat_lng_c = [items[idx][0], items[idx][1]]
                    if items[idx][16]==label and ((items[idx-1][16]==0.0 or items[idx-1][16]==3.0)\
                    and (items[idx+1][16]==0.0 or items[idx+1][16]==3.0))\
                    and (self.prepro.distance_in_meters(lat_lng_c, lat_lng_b)<min_dist or self.prepro.distance_in_meters(lat_lng_c, lat_lng_a)<min_dist):
                        print(
                            f'before:{items[idx-1][16]}----current:{items[idx][16]}----after:{items[idx+1][16]}'
                        )
                        print(
                            f'before:{self.prepro.distance_in_meters(lat_lng_c, lat_lng_b)}----after:{self.prepro.distance_in_meters(lat_lng_c, lat_lng_a)}'
                        )
                        count_b.append(
                            self.prepro.distance_in_meters(
                                lat_lng_c, lat_lng_b))
                        count_a.append(
                            self.prepro.distance_in_meters(
                                lat_lng_c, lat_lng_a))
                        items[idx][16] = -1
Esempio n. 2
0
class Sentences(PreprocessData):
    def __init__(self, list_of_features):
        super().__init__()
        self.features = list_of_features
        self.prepro = PreProcess()

    def _has_min_quantity_of_points(self,items):
        return len(items) > 10

    def is_window(self, delta_time):
        return delta_time < 5
    
    def delta_time(self, t1, t2)->float:
        ##Return time difference between time in seconds
        t1 = pd.to_datetime(t1)
        t2 = pd.to_datetime(t2)
        delta = pd.Timedelta(np.abs(t2-t1))
        return delta.seconds
    
    def get_element_by_element(self, _id, data):
        row = data[data['id']==_id]
        row = row[self.features].values[0]
        return row.tolist()
    
    def create_sentences(self, data)->list:
        old_matricula = data.iloc[0].matricula_id
    
        old_viaje = data.iloc[0].viaje
    
        old_time = data.iloc[0].instante
    
        old_rota = data.iloc[0].rota
    
        len_sentence = []
    
        partial_list, complete_list = [], []
    
        iterator = 0
    
        for idx in tqdm(data.index):
            if self.is_valid_point(data, old_matricula, old_viaje, old_time, old_rota, idx):

                partial_list.append(self.get_element_by_element(data.at[idx,'id'], data))
            else:
                if self._has_min_quantity_of_points(partial_list):
                    len_sentence.append(len(partial_list))
                    complete_list.append(partial_list)

                partial_list = []
                partial_list.append(self.get_element_by_element(data.at[idx,'id'], data))


            old_matricula = data.at[idx,'matricula_id']
            old_viaje = data.at[idx,'viaje']
            old_time = data.at[idx,'instante']
            old_rota = data.at[idx,'rota']
            iterator +=1


        if self._has_min_quantity_of_points(partial_list):
            complete_list.append(partial_list)
            len_sentence.append(len(partial_list))

        print(iterator)
        return complete_list

    def is_valid_point(self, data, old_matricula, old_viaje, old_time, old_rota, idx):
        return (data.at[idx,'matricula_id'] == old_matricula) and (data.at[idx,'viaje'] == old_viaje) and (self.is_window(self.delta_time(old_time,data.at[idx,'instante']))) and (data.at[idx,'rota'] == old_rota)
    
    def label_encoder(self, data):
        for items in tqdm(data):
            for item in items:
                if item[10]=='bus_stop':
                    item[10]=0.0
                elif item[10]=='in_route':
                    item[10]=1.0
                elif item[10]=='other_stop':
                    item[10]=2.0
                else:
                    item[10]=3.0
    
    def bearing(self, point1, point2):
        lat1 = math.radians(point1[0])
        
        lat2 = math.radians(point2[0])
        
        y = math.sin(math.radians(point2[1]-point1[1]))*math.cos(lat2)

        x = math.cos(lat1)*math.sin(lat2)-(math.sin(lat1)*math.cos(lat2)*math.cos(math.radians(point2[1]-point1[1])))
        
        deg = degrees(math.atan2(y,x))
        return (deg+360)%360

    def acceleration(self, v1,v2,deltaT)->float:
#         v1 e v2 devems ser m/s
        return np.abs(v1-v2)/deltaT
    
    def velocity(self, deltaT,deltaS)->float:
        return deltaS/deltaT
    
    def delta_space(self, s1,s2)->float:
        return self.prepro.distance_in_meters(s1,s2)

    def get_frmt(self, date):
        return '%Y-%m-%d %H:%M:%S.%f' if len(date)>19 else '%Y-%m-%d %H:%M:%S'    

    def days_of_week(self, t1):
        f1 = self.get_frmt(t1)
        t1 = datetime.datetime.strptime(t1,f1)
        return float(t1.weekday())
    
    def hours_of_day(self, t1):
        f1 = self.get_frmt(t1)
        t1 = datetime.datetime.strptime(t1,f1)
        return float(t1.hour)
    
    def complete_trajectory(self, item, pad):
        new_trajectory = list()
        diff = abs(pad - len(item))
        if len(item) > pad:
                new_trajectory = item[:pad]
                return new_trajectory
        elif len(item) < pad:
                new_trajectory =item
                new_trajectory.extend([item[len(item)-1]]*diff)
                return new_trajectory
        return item
    
    def get_time_in_seconds(self, data):
        ## returns values in seconds
        for items in tqdm(data):
            for idx, item in enumerate(items):
                if type(item[2]) == str:
                    frmt = self.get_frmt(item[2])                
                    date_2 = datetime.datetime.strptime(item[2],frmt)
                    item[2] = date_2.timestamp()
        return data

    def put_statistics_metrics(self, data, window=16):
        # It takes windows and calculates statistics
        final_list_x_b,final_list_x_a,final_list_x_c,final_list_x_as,final_list_x_bs = list(),list(),list(),list(),list()
        final_list_y= list()
        final_list_ys = list()
        features= [4,5,6,7,11]
        basic_features= [0,1,2,4,5,6,7,9,10,11]
        basic_features_c = [0,1,2,4,5,6,7,9,10,11,3,12,13,15]
        for item in tqdm(data):
            for i in range(0,len(item),1):
                if i >= window and i+window <= len(item)-1 and item[i][14] != -1 and item[i][14] != 1.0:
                    before = item[abs(i-window):i][:,features]
                    after = item[i+1:i+window+1][:,features]
            
                    mean_before= np.mean(before,axis=0)
                    mean_after= np.mean(after,axis=0)
                    std_before= np.std(before,axis=0)
                    std_after= np.std(after,axis=0)
                    min_before= np.min(before,axis=0)
                    min_after= np.min(after,axis=0)
                    max_before= np.max(before,axis=0)
                    max_after= np.max(after,axis=0)
                    median_before=np.median(before,axis=0)
                    median_after= np.median(after,axis=0)
                    before = np.concatenate((mean_before,std_before,min_before,max_before,median_before)).tolist()
                    after = np.concatenate((mean_after,std_after,min_after,max_after,median_after)).tolist()
                    
                    final_list_x_b.append(item[abs(i-window):i][:,basic_features])
                    final_list_x_a.append(item[i+1:i+window+1][:,basic_features])
                    final_list_x_c.append(item[i,basic_features_c])
                    final_list_x_bs.append(before)
                    final_list_x_as.append(after)

                    final_list_y.append(item[i][14])

                    final_list_ys.append(np.array((item[abs(i-window):i,14].tolist()+[item[i][14]]+item[i+1:i+window+1,14].tolist())))
                    
        return final_list_x_b,final_list_x_a,final_list_x_c,final_list_x_bs,final_list_x_as, final_list_y, final_list_ys
    
    def put_statistics_metrics_with_padding(self, data, window=16):
        # It takes windows and calculates statistics
        # 16 é flag que informa o ruído
        final_list_x_b, final_list_x_a, final_list_x_c, final_list_x_as, final_list_x_bs = list(
        ), list(), list(), list(), list()
        final_list_y = list()
        final_list_ys = list()
        features = [4, 5, 6, 7, 11]
        basic_features = [0, 1, 2, 4, 5, 6, 7, 9, 10, 11]
        basic_features_c = [0, 1, 2, 4, 5, 6, 7,
                            8, 9, 10, 11, 3, 12, 13, 15, 16, 17]
        for item in tqdm(data):
            for i in range(0, len(item), 1):
                # and item[i][14] != 1.0:
                if i >= window and i+window <= len(item)-1 and item[i][14] != -1:
                    before = item[abs(i-window):i][:, features]
                    after = item[i+1:i+window+1][:, features]

                    mean_before = np.mean(before, axis=0)
                    mean_after = np.mean(after, axis=0)
                    std_before = np.std(before, axis=0)
                    std_after = np.std(after, axis=0)
                    min_before = np.min(before, axis=0)
                    min_after = np.min(after, axis=0)
                    max_before = np.max(before, axis=0)
                    max_after = np.max(after, axis=0)
                    median_before = np.median(before, axis=0)
                    median_after = np.median(after, axis=0)
                    before = np.concatenate(
                        (mean_before, std_before, min_before, max_before, median_before)).tolist()
                    after = np.concatenate(
                        (mean_after, std_after, min_after, max_after, median_after)).tolist()

                    final_list_x_b.append(item[abs(i-window):i][:, basic_features])
                    final_list_x_a.append(item[i+1:i+window+1][:, basic_features])
                    final_list_x_c.append(item[i, basic_features_c])
                    final_list_x_bs.append(before)
                    final_list_x_as.append(after)

                    final_list_y.append(item[i][14])

                    final_list_ys.append(np.array(
                        (item[abs(i-window):i, 14].tolist()+[item[i][14]]+item[i+1:i+window+1, 14].tolist())))
        return final_list_x_b, final_list_x_a, final_list_x_c, final_list_x_bs, final_list_x_as, final_list_y, final_list_ys
    
    def add_features(self, data):
        for items in tqdm(data):
            for idx in range(len(items)):
                if len(items[idx]) <=11:
                    if idx == 0:
                        items[idx][4] = items[idx][4]/3.6
                        items[idx].insert(5,0.0)
                        items[idx].insert(6,0.0)
                        items[idx].insert(7,0.0)
                        items[idx].insert(8,0.0)
                        items[idx].insert(9,self.days_of_week(items[idx][2]))
                        items[idx].insert(10,self.hours_of_day(items[idx][2]))
                    else:
                        v1 = items[idx-1][4]
                        v2 = items[idx][4]/3.6
                        p1 = items[idx-1][:2]
                        p2 = items[idx][:2]
                        t1 = items[idx-1][2]
                        t2 = items[idx][2]
                        time = self.delta_time(t1,t2)
                        space = self.delta_space(p1,p2)
                        if time == 0:
                            time = 0.00000001
                        #Uso aqui para calcular a nova velocidade e aceleração devido ao ruído espacial
                        vel = self.velocity(time,space)
                        acc = self.acceleration(v1,vel,time)
        #                 acc = sentences.acceleration(v1,v2,time)
                        bear = self.bearing(p1,p2)
                        #Mudo de v2 para vel por motivo do ruído espacial
                        items[idx][4] = vel
                        items[idx].insert(5, acc)
                        items[idx].insert(6, space)
                        items[idx].insert(7, np.abs(bear-items[idx-1][7]))
                        items[idx].insert(8, time)
                        items[idx].insert(9,self.days_of_week(t2))
                        items[idx].insert(10,self.hours_of_day(t2))
                        if items[idx][4]*3.6 > 5 and items[idx][-1] != 1.0:
                            items[idx][-1] = 1.0
                        if items[idx][4]*3.6 < 5 and items[idx][-1] == 1.0:
                            items[idx][-1] = 2.0

    def select_features(self,data):
        '''
            Select only important features, here we remove 13o fearure and add id point and id   trajectory. Both ids is useful to rebuild the trajectories
        '''
        final_list = list()
        idx = 0
        for i, items in tqdm(enumerate(data)):
            list_item = list()
            for j, item in enumerate(items):
                aux = list()
                aux = copy.copy(item[:12])
                # aqui
                aux.insert(12, item[14])
                aux.insert(13, item[15])
                aux.insert(14, item[16])

                '''adding id in each point of trajectory'''
                aux.insert(15, idx)

                '''adding id to identify each trajectory'''
                aux.insert(16, i)

                idx += 1

                list_item.append(aux)
            final_list.append(list_item)
        return final_list
    
    def add_id_noise(self, data, data_with_noise):
        '''
        Here, we need pass the index from trajectories with noise, ex: set(np.load('models/id_point_trajectory_without_noise_dublin_clean.npy'))
        '''
        for i, items in tqdm(enumerate(data)):
            if i in data_with_noise:
                for item in items:
                    item.append(1)
            else:
                for item in items:
                    item.append(0)
                

    def padding(self,pad,data):
        '''
        Ex: padding(16,np.array(final_list_with_time))
        '''
        final_list = list()
        for items in data:
            item_list = list()
            item_list.extend([np.zeros_like(items[0]).tolist()]*pad)
            for item in items:
                item_list.append(item.tolist())
            for i in range(pad):
                item_list.append(items[len(items)-1].tolist())
            final_list.append(item_list)
        return final_list