Python Day.data Beispiele

Programmiersprache: Python

Namespace / Paketname: utils

Klasse / Typ: Day

Methode / Funktion: data

Beispiele auf hotexamples.com: 2

Python Day.data - 2 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die utils.Day.data, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

Day(8)

data(2)

strftime(1)

today(1)

Häufig verwendete Methoden

Day (8)

data (2)

strftime (1)

today (1)

Beispiel #1

Datei anzeigen

Datei: predictor.py Projekt: lukoucky/swimming_pool_attendance_prediction

    def predict(self, predictor, columns, data_dict, time_steps_back, lines_reservations=None):
        """
        Prepares feature vector for prediction algorithms an generates prediction
        :param predictor: sklearn predictor or other with simmilar interface
        :param columns: List with names of columns that must be in feature vector
        :param data_dict: Dictionary with time data containing columns names and default vaules
        :param time_steps_back: Number of time steps for one input to prediction algorithm
        :param lines_reservations: List with line reservations for predicted day generated by get_lines_usage_for_day method
        :return: Vector with prediction for the day (288 items)
        """
        lines_reserved_id = -1
        org_ids = dict()
        for org_id, column in enumerate(columns):
            if column.startswith('reserved_'):
                org_ids[column] = org_id
            if column == 'lines_reserved':
                lines_reserved_id = org_id

        if lines_reservations is None:
            lines_reservations = ['']*64
            
        data = list()
        for i in range(self.prediction_steps):
            data.append([0]*len(columns))
            for j, column in enumerate(columns):
                if column in data_dict:
                    data[i][j] = data_dict[column]

            slot_id = (data_dict['minute_of_day']-360)//15
            if slot_id >= 0 and slot_id < 64:
                org_list = lines_reservations[slot_id].split(',')[:-1]
                for name in org_list:
                    feature_name = 'reserved_' + name
                    if feature_name in columns:
                        data[i][org_ids[feature_name]] += 1
                    elif 'reserved_other' in columns:
                        data[i][org_ids['reserved_other']] += 1
                        
                    if lines_reserved_id >= 0:
                        data[i][lines_reserved_id] += 1
                        
            data_dict['minute'] += 5
            data_dict['minute_of_day'] += 5
            if data_dict['minute'] == 60:
                data_dict['minute'] = 0
                data_dict['hour'] += 1

        df = pd.DataFrame(data, columns=columns) 
        day = Day('ts')
        day.data = df
        x, y = self.dh.get_feature_vectors_from_days([day], [], time_steps_back, 1, True)
        return self.dh.predict_day_from_features(x, predictor, time_steps_back)

Beispiel #2

Datei anzeigen

Datei: data_helper.py Projekt: lukoucky/swimming_pool_attendance_prediction

    def prepare_days_data(self):
        """
        Loads pickle with all Days
        """
        if os.path.isfile(self.days_data_path):
            with open(self.days_data_path, 'rb') as input_file:
                self.days_train, self.days_test, self.days_valid = pickle.load(
                    input_file)
            self.columns = self.days_train[0].data.columns
        else:
            if os.path.isfile(self.csv_path):
                days_stats = [0, 0, 0, 0, 0, 0, 0]
                print('Preparing days.pickle')
                days_list = []
                last_date = 'start'
                day_start_id = 0
                day_stop_id = 0
                n_bad_days = 0

                data_frame = pd.read_csv(self.csv_path)
                for index, row in data_frame.iterrows():
                    if row['minute_of_day'] > 1320:
                        data_frame['pool'].iloc[index] = 0

                    new_date = data_frame['time'].iloc[index][:10]
                    if not last_date == new_date:
                        day_stop_id = index
                        if index > 0:
                            new_day = Day(last_date)
                            new_day.data = data_frame.iloc[
                                day_start_id:day_stop_id]
                            if day_stop_id - day_start_id == 288:
                                days_list.append(new_day)
                                days_stats[
                                    data_frame['day_of_week'].iloc[index]] += 1
                            else:
                                if abs(day_stop_id - day_start_id - 288) < 15:
                                    expected = 0
                                    n_bad_days += 1
                                    print(
                                        'Error in day %s, length of day is %d'
                                        % (last_date,
                                           day_stop_id - day_start_id))
                                    for value in list(
                                            data_frame['minute_of_day'].
                                            iloc[day_start_id:day_stop_id]):
                                        if not value == expected:
                                            print('Should be %d is %d' %
                                                  (expected, value))
                                            expected = value
                                        expected += 5
                                    print('\n\n')

                                # TODO: Most of them have less than 10 missing values.
                                # If the missinga values are out of openning hours - fill with zeros and use
                                # Many other missing values can be filled in
                                # Also change of time from summer to winter makes 1 hour gap or duplicate hour
                                # Move this function to data preprocessing

                        last_date = data_frame['time'].iloc[index][:10]
                        day_start_id = index

                Random(RANDOM_SEED).shuffle(days_list)
                train_portion = 0.4
                validation_portion = 0.2
                n_days = len(days_list)
                print('Generated %d days. (%d days removed)' %
                      (n_days, n_bad_days))
                print('Number of days from Monday to Sunday', days_stats)
                n_train_days = int(n_days * train_portion)
                n_validation_days = int(n_days * validation_portion)
                train_days = days_list[:n_train_days]
                validation_days = days_list[n_train_days:n_train_days +
                                            n_validation_days]
                test_days = days_list[n_train_days + n_validation_days:]

                with open(self.days_data_path, 'wb') as input_file:
                    pickle.dump([train_days, test_days, validation_days],
                                input_file)

            else:
                raise Exception(
                    'Missing days.pickle and dataset.csv.\nGenerate dataset.csv in preprocess_data.py first.'
                )