def _init_reference(self) -> None:

        reference_initialized = False

        while not reference_initialized:
            if len(self._maxtimes) < 1:
                raise ValueError("Not enough data to work on")

            self._reference_id = self._maxtimes.index[0]

            # create reference dataframe
            self._reference_df = self._create_person_dataframe(
                self._reference_id)

            self._reference_df = self._remove_duplicate_dates(
                self._reference_df)
            # ignore too small dataframes
            if len(self._reference_df.index) < 2:
                self._maxtimes = self._maxtimes.drop(self._reference_id)
                self._mintimes = self._mintimes.drop(self._reference_id)
                continue

            self._reference_df = utils.remove_not_progressing_solves(
                self._reference_df)
            # ignore too small dataframes
            if len(self._reference_df.index) < 2:
                self._maxtimes = self._maxtimes.drop(self._reference_id)
                self._mintimes = self._mintimes.drop(self._reference_id)
                continue

            self._reference_df = utils.interpolate_dates(self._reference_df)
            self._set_reference_values(self._reference_df)

            reference_initialized = True
    def _get_date_for_new_time(self, dataframe: DataFrame, column_id: str,
                               time: float) -> Tuple[datetime, float]:
        # use data from the group (i.e. more spaced data) for a more precise value
        person_df = self._create_person_dataframe(column_id)
        person_df = self._remove_duplicate_dates(person_df)
        person_df = utils.remove_not_progressing_solves(person_df)

        next_to_last_date = person_df.index[len(person_df) - 2]
        next_to_last_value = person_df.iloc[len(person_df) - 2, 0]
        last_date = person_df.index[len(person_df) - 1]
        last_value = person_df.iloc[len(person_df) - 1, 0]
        days_delta = (last_date - next_to_last_date).days

        # number of days to add to next_to_last_date
        number_of_days_to_add = ((next_to_last_value - time) * days_delta) / (
            next_to_last_value - last_value)
        # number of days to add to last_date
        number_of_days_to_add = number_of_days_to_add - days_delta
        # upper round to make sure date encloses time
        number_of_days_to_add = math.ceil(number_of_days_to_add)

        new_date = self._find_date_for_value(
            dataframe, column_id,
            last_value) + timedelta(days=number_of_days_to_add)
        # recompute corresponding time to match the ceiled date
        new_time = last_value - ((
            (next_to_last_value - last_value) * number_of_days_to_add) /
                                 days_delta)

        return new_date, new_time
Beispiel #3
0
def test_remove_not_progressing_solves_not_default_column() -> None:
    df_before = pd.DataFrame(
        {
            'event': ['333', '333', '333'],
            'best': [50, 60, 30]
        },
        index=[0, 1, 2])
    df_expected = pd.DataFrame({
        'event': ['333', '333'],
        'best': [50, 30]
    },
                               index=[0, 2])
    df_after = utils.remove_not_progressing_solves(df_before, column_number=1)
    assert df_expected.equals(df_after)
    def _launch_main_process(self,
                             log_progression: bool = False,
                             log_debug: bool = False) -> None:
        if log_progression:
            # prepare process progression indication
            total_loops = len(self._maxtimes[1:len(self._maxtimes)])
            print_every_percent = 0.05
            loops_percent = round(total_loops * 0.05, 0)
            if loops_percent == 0:
                loops_percent = 1
            start_time = time.time()
            previous_time = start_time

        for i, row in enumerate(
                self._maxtimes[1:len(self._maxtimes)].itertuples()):

            if log_progression:
                current_time = time.time()
                current_running_time = current_time - previous_time
                previous_time = current_time
                total_running_time = current_time - start_time
                estimated_running_time = (total_loops *
                                          total_running_time) / (i + 1)
                # don't print every iteration
                if i == 0 or i == total_loops - 1 or (i +
                                                      1) % loops_percent == 0:
                    print(
                        f'{(i + 1)}/{total_loops} loops, total elapsed/remaining/estimated: {round(total_running_time, 0)}/{round(estimated_running_time - total_running_time, 0)}/{round(estimated_running_time, 0)} seconds'
                    )

            person_df = self._create_person_dataframe(row.Index)

            person_df = self._remove_duplicate_dates(person_df)
            # ignore too small dataframes
            if len(person_df.index) < 2:
                continue

            person_df = utils.remove_not_progressing_solves(person_df)
            # ignore too small dataframes
            if len(person_df.index) < 2:
                continue

            # search matching date
            matching_date = self._find_closest_date(row[1], log_debug)
            # align dates
            delta = matching_date - person_df.index[0]
            person_df = self._shift_date(person_df, delta)

            # interpolate
            person_df = utils.interpolate_dates(person_df)

            # add current df to final df
            self._df_to_concat.append(person_df)

        if log_progression:
            print('Final concatenation...')

        self._processed_results = pd.concat(self._df_to_concat,
                                            axis=1,
                                            sort=False)
        self._df_to_concat = [self._processed_results]

        if log_progression:
            print('Done')
Beispiel #5
0
def test_remove_not_progressing_solves_nothing_to_remove() -> None:
    df_before = pd.DataFrame({'best': [50, 40, 30]}, index=[0, 1, 2])
    df_expected = df_before
    df_after = utils.remove_not_progressing_solves(df_before)
    assert df_expected.equals(df_after)
Beispiel #6
0
def test_remove_not_progressing_solves_superior_in_the_middle() -> None:
    df_before = pd.DataFrame({'best': [50, 40, 45, 35]}, index=[0, 1, 2, 3])
    df_expected = pd.DataFrame({'best': [50, 40, 35]}, index=[0, 1, 3])
    df_after = utils.remove_not_progressing_solves(df_before)
    assert df_expected.equals(df_after)
Beispiel #7
0
def test_remove_not_progressing_solves_mixed() -> None:
    df_before = pd.DataFrame({'best': [50, 60, 60, 50, 45, 45, 70, 45]},
                             index=[0, 1, 2, 3, 4, 5, 6, 7])
    df_expected = pd.DataFrame({'best': [50, 45]}, index=[0, 4])
    df_after = utils.remove_not_progressing_solves(df_before)
    assert df_expected.equals(df_after)
Beispiel #8
0
def test_remove_not_progressing_solves_equals_starting() -> None:
    df_before = pd.DataFrame({'best': [50, 50, 40, 35]}, index=[0, 1, 2, 3])
    df_expected = pd.DataFrame({'best': [50, 40, 35]}, index=[0, 2, 3])
    df_after = utils.remove_not_progressing_solves(df_before)
    assert df_expected.equals(df_after)