Exemple #1
0
    def segmentation(self, folder_dataset, folder_segments, min_points = 10):
        """ Receives the raw data from GeoLife dataset and 
        segmentates it, saving it to the chosen folder.
        
        Parameters
        ----------
            folder_dataset : str
                absolute path where the dataset is
            
            folder_segments : str
                absolute path where to save the segments
                
            min_points : int
                segments with less than <min_point> will be ignored
            
        Returns
        -------
            no value
        """
        
        ## get user's path who have transportation mode information
        # (i.e., only the users with the "labels.txt")
        label_users = get_files(folder_dataset, "labels.txt", True)
        
        n           = len(label_users)
                
        for enum, current_label in enumerate(label_users):
            
            print("{:02d} of {} users -- processing user {}".format(enum + 1, n, dirname(current_label)))
            
            # read label information
            df_label    = self._read_label(current_label)
            
            ## read user's trajectories data
            user_folder = dirname(current_label)
            user_name   = basename(user_folder)
            user_files  = get_files(user_folder, ".plt", True)

            for current_trajectory in user_files:
                df_user      = self._read_trajectory(current_trajectory)
                trajectories = self._get_segments(df_user, df_label, min_points)
                
                for transport, trajectory in trajectories:
                    self._save_segments(transport, trajectory, folder_segments, user_name)
    def _get_data(self, transportation):
        """ Reads the motion files and organized them in a
        single dataset: X for features and y for labels.
        
        Parameters
        ----------
            transportation : list of str
                list of transportation mode name used to classification
                
        Returns
        -------
            X : pandas dataframe
                dataframe of features 
                length: motion features * parameter
                
            y : pandas dataframe
                class labels
        """

        X = pd.DataFrame()
        y = pd.DataFrame()

        motion = MotionDataset()

        for transport in transportation:

            file_name = transport + "*" + ".csv"
            path_transport = get_files(self.folder_features, file_name, True)

            feature_df = motion.build_dataset(self.motion_features,
                                              path_transport)

            feature_df = feature_df[:self.n_samples]

            concat1 = [X, feature_df]
            X = pd.concat(concat1, axis=0, ignore_index=True)

            # labels
            motion_class = pd.DataFrame([transport] * len(feature_df))
            concat2 = [y, motion_class]
            y = pd.concat(concat2, axis=0, ignore_index=True)

        print("#### Motion features size: {}".format(len(X.columns)))

        print("size", len(X), len(y))

        return X, y
    def get_features(self, transportation, folder_segments, folder_features,
                     motion_features):
        """ Receives the data about the segments, 
        organizes them in a list and extract the features from them, 
        saving it to the chosen folder.
        
        Parameters
        ----------
            transportation : list of str
                transportation modes which we want to extract features
                
            folder_segments : str
                absolute path where the segments to extract features are
                
            folder_features : str
                the folder where to save the features
                                
        Returns
        -------
            no value
        """

        segment_files = []

        ## Get the path to the transportation mode files (all of them)
        for transport in transportation:
            query = transport + "*.csv"
            user_transportation = get_files(folder_segments, query, True)
            segment_files = list(chain(segment_files, user_transportation))

        print("Processing {} segments...".format(len(segment_files)))

        for enum, segment in enumerate(segment_files):
            print("{} out of {} - {}".format(enum + 1, len(segment_files),
                                             segment))

            df_features = GeoLifeFeaturesExtraction.get_features(
                segment, motion_features)

            ## save data
            self._save_features(df_features, folder_features, segment)
Exemple #4
0
 def _save_segments(self, transport_name, segment, folder_segments, user_name):
     """ Saves the segments extracted from trajectory.
     
     Parameters
     ----------
         transport_name : str
             the transportation used in the trajectory
     
         segment : pandas dataframe
             the segment to save
             
         path_to_save : str
             absolute path where to save the segments
             
         user_name : str
             the user's name
             
     Returns
     -------
         no value
     """
     
     path_to_save = join(folder_segments, user_name)
     
     ## write to file
     previous_traj = get_files(path_to_save, transport_name, False)
     k = len(previous_traj) + 1
     
     ## where to save
     path_to_save = join(path_to_save, transport_name, "")
     create_folder(path_to_save)
     
     ## name to save
     file_name = "{}_{:03d}.csv".format(transport_name, k)
     save_in = join(path_to_save, file_name)
     
     segment.to_csv(save_in, sep = ",", header = True, index = None)
Exemple #5
0
    def _get_data(self, transportation, parameter):
        """ Reads the OP Transformation files and organized them in a
        single dataset: X for features and y for labels.
        
        Parameters
        ----------
            transportation : list of str
                list of transportation mode name used to classification
                
            parameter : list of int
                list of OP parameters: D and tau
                
        Returns
        -------
            X : pandas dataframe
                dataframe of features 
                
            y : pandas dataframe
                class labels
        """

        D, tau = parameter
        op_values = "_D" + str(D) + "_t" + str(tau) + ".csv"
        features_name = [m + op_values for m in self.motion_features]
        # ex: 'distance_D3_t1.csv'

        X = pd.DataFrame()
        y = pd.DataFrame()

        print("### OP features: {}".format(self.op_features))

        for transport in transportation:

            # path to op transformation files
            # ex: query      = 'op_bus_distance_D3_t1.csv'
            # ex: op_files = 'db/GeoLife/op_features/op_bus_distance_D3_t1.csv'
            file_name = "op_" + transport + "_"
            query = [file_name + f for f in features_name]
            op_files = [self.folder_op + q for q in query]

            df_transport_op = pd.DataFrame()

            for file in op_files:
                op_csv = pd.read_csv(file, usecols=self.op_features)
                op_csv = op_csv[self.op_features]  # to assure order

                # axis = 1 is by column, axis = 0 is by rows
                concat = [df_transport_op, op_csv]
                df_transport_op = pd.concat(concat, axis=1, ignore_index=True)
                df_transport_op = df_transport_op.dropna()

            ### motion_features
            motion = MotionDataset()

            file_name = transport + "*" + ".csv"
            path_transport = get_files(self.folder_features, file_name, True)

            feature_df = motion.build_dataset(self.motion_features,
                                              path_transport)

            concat2 = [df_transport_op, feature_df]
            df_transport_op = pd.concat(concat2, axis=1, ignore_index=True)

            df_transport_op = df_transport_op[:self.n_samples]

            # features
            concat1 = [X, df_transport_op]
            X = pd.concat(concat1, axis=0, ignore_index=True)

            # labels
            op_class = pd.DataFrame([transport] * len(df_transport_op))
            concat2 = [y, op_class]
            y = pd.concat(concat2, axis=0, ignore_index=True)

        n = len(X.columns)

        print("#### OP features size: {}".format(n))
        print("size", len(X), len(y))

        return X, y
Exemple #6
0
    def get_transformation(self, parameters, motion_features, op_features,
                           transportation, folder_features, folder_op):
        """ From each motion feature, calculates OP transformation,
        saving it to the chosen folder.
        The feature in dataset is a column, but our OP functions are
        implemented to rows, so we transpose each feature
        
        Parameters
        ----------
            parameters : list of lists (int, int)
                OP parameters (D, tau) we want to extract 
                
            motion_features : list of str
                list of motion features we want to transform
                
            op_features : list of str
                which features to extract from OP and OPTN transformation
                
            transportation : list of str
                list of transportation modes which will be transformed
                
             folder_features : str
                absolute path where the motion features that will be transformed are
                
            folder_op : str
                the folder where to save the segments  
                            
        Returns
        -------
            no value
        """

        for D, tau in parameters:

            print('\n for D = {} and tau = {} \n'.format(D, tau))

            for motion_feature in motion_features:

                for transport in transportation:

                    query = transport + "*.csv"
                    user_files = get_files(folder_features, query, True)

                    # dict of lists
                    keys = op_features
                    df_op = {k: [] for k in keys}

                    print("Motion Feature: {} and transportation: {}".format(
                        motion_feature, transport))

                    for file in user_files:
                        df = pd.read_csv(file,
                                         sep=",",
                                         header=0,
                                         usecols=[motion_feature])

                        df.dropna(inplace=True)

                        # we have columns, but the op function gets a row
                        df_transposed = df.T  # transposed

                        # we put [0] because we have
                        # pe, sc, fi = [(2.31, 0.23, 0.045)] (does not work)
                        # and we want pe, sc, fi = (2.31, 0.23, 0.045)
                        #
                        feature_list = self._op_multithread(
                            df_transposed, D, tau, op_features)[0]

                        for op, feat in zip(op_features, feature_list):
                            df_op[op].append(feat)

                    df_op = pd.DataFrame.from_dict(df_op)

                    ## save data
                    op_values = "_D" + str(D) + "_t" + str(tau)
                    file_name = "op_" + transport + "_" + motion_feature + op_values + ".csv"

                    self._save_op_transformation(df_op, folder_op, file_name)