Example #1
0
    def test_submodule(self):
        # Create a fake module with a submodule
        name = 'fakemodule'
        module = types.ModuleType(name)
        module.__version__ = '0.9.0'
        sys.modules[name] = module
        sub_name = 'submodule'
        submodule = types.ModuleType(sub_name)
        setattr(module, sub_name, submodule)
        sys.modules[f'{name}.{sub_name}'] = submodule

        with mock.patch.dict(VERSIONS, {name: '1.0.0'}):

            match = 'Heartandsole requires .*1.0.0.* of .fakemodule.*"0.9.0"'
            with self.assertRaisesRegex(ImportError, match):
                import_optional_dependency('fakemodule.submodule')

            with self.assertWarns(UserWarning):
                result = import_optional_dependency('fakemodule.submodule',
                                                    errors='warn')
            self.assertIsNone(result)

            module.__version__ = '1.0.0'  # exact match is OK
            result = import_optional_dependency('fakemodule.submodule')
            self.assertIs(result, submodule)
Example #2
0
    def test_bad_version(self):
        name = 'fakemodule'
        module = types.ModuleType(name)
        module.__version__ = '0.9.0'
        sys.modules[name] = module

        with mock.patch.dict(VERSIONS, {name: '1.0.0'}):

            match = 'Heartandsole requires .*1.0.0.* of .fakemodule.*"0.9.0"'
            with self.assertRaisesRegex(ImportError, match):
                import_optional_dependency('fakemodule')

            # Test min_version parameter
            result = import_optional_dependency('fakemodule',
                                                min_version='0.8')
            self.assertIs(result, module)

            with self.assertWarns(UserWarning):
                result = import_optional_dependency('fakemodule',
                                                    errors='warn')
            self.assertIsNone(result)

            module.__version__ = '1.0.0'  # exact match is OK
            result = import_optional_dependency('fakemodule')
            self.assertIs(result, module)
Example #3
0
    def test_import_optional(self):
        match = 'Missing .*notapackage.* pip .* notapackage'
        with self.assertRaisesRegex(ImportError, match):
            import_optional_dependency('notapackage')

        result = import_optional_dependency('notapackage', errors='ignore')
        self.assertIsNone(result)
Example #4
0
    def test_no_version_raises(self):
        name = 'fakemodule'
        module = types.ModuleType(name)
        sys.modules[name] = module

        with mock.patch.dict(VERSIONS, {name: '1.0.0'}):

            with self.assertRaisesRegex(ImportError,
                                        'Can\'t determine .* fakemodule'):
                import_optional_dependency(name)
Example #5
0
    def gain(self, source='records'):
        """Return total elevation gain in meters.

    Args:
      source (str): Source from which to obtain elevation gain.
      
        - ``records`` (default): result when the records DataFrame is
          passed to :meth:`pandas.DataFrame.xyz.z_gain_threshold`.
        - ``summary``: ``gain`` value in the summary Series.
        - ``laps``: sum of ``gain`` column in the laps DataFrame.
    
    Returns:
      float or None: Total elevation gain according to the requested source. 
      If the Activity does not possess the requested data source, return None.

    Raises:
      ValueError: If source is not a valid option.

    See also:
      :meth:`pandas.DataFrame.xyz.z_gain_threshold`
        A 5-meter threshold elevation gain algorithm. From the ``pandas-xyz``
        package.
    
    """
        if source == 'records':
            if self.stream is not None:
                import_optional_dependency('pandas_xyz')

                # Option 1:
                return self.activity.records.xyz.z_gain_threshold(
                )  # 5-unit threshold

                # Option 2 (untested, might need work):
                # df_tmp = self.records.copy()
                # df_tmp['smooth'] = self.records.xyz.z_smooth_time()
                # # smooth_series = self.records.xyz.z_smooth_distance()
                # return df_tmp.xyz.z_gain_naive(elevation='smooth')

                # Option 3 (func does not exist in pandas-xyz):
                # series_smooth = self.records.xyz.z_smooth_time()
                # return series_smooth.xyz.z_gain_naive()

        elif source == 'summary':
            if 'gain' in self.summary.index:
                return self.summary['gain']
        elif source == 'laps':
            if 'gain' in self.laps.columns:
                return self.laps['gain'].sum()
        else:
            raise ValueError('source must be one of: {records, summary, laps}')
Example #6
0
    def from_gpx(cls, file_obj):
        """Construct an Activity from a .gpx file.

    Args:
      file_obj: Any file-like object accepted by :class:`activereader.Gpx`
    Returns:
      Activity

    Examples:
      Provide a file path:

      >>> act = Activity.from_gpx('my_activity.gpx')

      Provide a raw string of bytes:
      
      >>> with open('my_activity.gpx', 'rb') as fb:
      ...   raw_data = fb.read()
      >>> act = Activity.fom_gpx(raw_data)

      Provide a string (no encoding info):
      
      >>> with open('my_activity.gpx', 'r') as f:
      ...   xml_string = f.read()
      >>> act = Activity.fom_gpx(xml_string)
    """
        activereader = import_optional_dependency('activereader')

        reader = activereader.Gpx.from_file(file_obj)

        summary = pd.Series({
            f'{TIMESTAMP}_start': reader.start_time,
        })

        activities = pd.DataFrame.from_records([{
            'title': trk.name,
            'sport': trk.activity_type
        } for trk in reader.tracks])

        if len(activities) > 1:
            raise ValueError('multi-activity files not supported')

        summary = pd.concat([summary, activities.iloc[0]])

        records = pd.DataFrame.from_records([{
            TIMESTAMP: tp.time,
            LAT: tp.lat,
            LON: tp.lon,
            ELEVATION: tp.altitude_m,
            CADENCE: tp.cadence_rpm,
            HEARTRATE: tp.hr,
        } for tp in reader.trackpoints])

        # TODO: Figure out how laps are represented in gpx files, if at all.

        activity = cls(records, summary=summary)

        # Convert cadence from RPM to strides per minute.
        activity.cadence._convert_units()

        return activity
Example #7
0
    def loss(self, source='records'):
        """Return total elevation loss in meters.

    Args:
      source (str): Source from which to obtain elevation loss.
      
        - ``records`` (default): result when the records DataFrame is
          passed `in reverse` to :meth:`pandas.DataFrame.xyz.z_gain_threshold`.
        - ``summary``: ``loss`` value in the summary Series.
        - ``laps``: sum of ``loss`` column in the laps DataFrame.
    
    Returns:
      float or None: Total elevation loss according to the requested source. 
      If the Activity does not possess the requested data source, return None.

    Raises:
      ValueError: If source is not a valid option.

    See also:
      :meth:`pandas.DataFrame.xyz.z_gain_threshold`
        A 5-meter threshold elevation gain algorithm. From the ``pandas-xyz``
        package.
    
    """
        if source == 'records':
            if self.stream is not None:
                import_optional_dependency('pandas_xyz')
                return self.activity.records[::-1].xyz.z_gain_threshold()
        elif source == 'summary':
            if 'loss' in self.summary.index:
                return self.summary['loss']
        elif source == 'laps':
            if 'loss' in self.laps.columns:
                return self.laps['loss'].sum()
        else:
            raise ValueError('source must be one of: {records, summary, laps}')
Example #8
0
    def records_from_position(self, inplace=False):
        """Cumulative distance records calculated from GPS coordinate records.

    Args:
      inplace (bool): Whether to add the Series result as a column to the
        records DataFrame. Default False.

    Returns:
      pandas.Series or None: The Series result or None if ``inplace=True``
      or if the records DataFrame does not contain ``lat`` and ``lon`` columns.

    Examples:

      When called with ``inplace=False``, this method returns a Series:
 
      >>> records = pd.DataFrame({
      ...   'lat': [40.0, 40.0001, 40.0002],
      ...   'lon': [-105.2, -105.2, -105.2]
      ... })
      >>> act = Activity(records)
      >>> act.distance.records_from_position()
      0     0.000000
      1    11.119493
      2    22.238985
      dtype: float64

      When called with ``inplace=True``, this method updates the records
      DataFrame:

      >>> act.distance.records_from_position(inplace=True)
      >>> act.records
             lat     lon   distance
      0  40.0000  -105.2   0.000000
      1  40.0001  -105.2  11.119493
      2  40.0002  -105.2  22.238985

    See also:

      :meth:`pandas.DataFrame.xyz.s_from_xy`
        Custom DataFrame accessor method for calculating cumulative distance
        from GPS coordinates. From the ``pandas-xyz`` package.

    """

        if self.activity.has_position:

            # Option 1 (untested, might need work):
            # pxyz = import_optional_dependency('pandas_xyz')
            # return pxyz.algorithms.s_from_xy(
            #   self.activity.lat.stream,
            #   self.activity.lon.stream
            # )

            # Option 2:
            import_optional_dependency('pandas_xyz')

            # If no kwargs, assumes stream names are 'lat' and 'lon'
            distance_stream = self.activity.records.xyz.s_from_xy(
                lat=self.activity.lat.record_stream_label,  # or ._field_name
                lon=self.activity.lon.record_stream_label,
            )

            if not inplace:
                return distance_stream

            self.activity.records[self.record_stream_label] = distance_stream
Example #9
0
    def from_tcx(cls, file_obj):
        """Construct an Activity from a .tcx file.

    Args:
      file_obj: Any file-like object accepted by :class:`activereader.Tcx`
    Returns:
      Activity

    Examples:
      Provide a file path:

      >>> act = Activity.from_tcx('my_activity.tcx')

      Provide a raw string of bytes:
      
      >>> with open('my_activity.tcx', 'rb') as fb:
      ...   raw_data = fb.read()
      >>> act = Activity.fom_tcx(raw_data)

      Provide a string (no encoding info):

      >>> with open('my_activity.tcx', 'r') as f:
      ...   xml_string = f.read()
      >>> act = Activity.fom_tcx(xml_string)
    
    """
        activereader = import_optional_dependency('activereader')

        reader = activereader.Tcx.from_file(file_obj)

        activities = pd.DataFrame.from_records([{
            'sport': act.sport,
            'device': act.device,
            'unit_id': act.device_id,
            'product_id': act.product_id,
        } for act in reader.activities])

        if len(activities) > 1:
            raise ValueError('multi-activity files not supported')

        summary = activities.iloc[0]

        laps = pd.DataFrame.from_records([
            # lap.to_dict()
            {
                f'{TIMESTAMP}_start': lap.start_time,
                f'{TIME}_timer': lap.total_time_s,
                f'{DISTANCE}_total': lap.distance_m,
                f'{SPEED}_max': lap.max_speed_ms,
                f'{SPEED}_avg': lap.avg_speed_ms,
                'calories': lap.calories,
                f'{HEARTRATE}_avg': lap.hr_avg,
                f'{HEARTRATE}_max': lap.hr_max,
                f'{CADENCE}_avg': lap.cadence_avg,
                f'{CADENCE}_max': lap.cadence_max,
                'intensity': lap.intensity,
                'trigger_method': lap.trigger_method,
            } for lap in reader.laps
        ])

        # Build a DataFrame using only trackpoints (as records).
        records = pd.DataFrame.from_records([{
            TIMESTAMP: tp.time,
            LAT: tp.lat,
            LON: tp.lon,
            DISTANCE: tp.distance_m,
            ELEVATION: tp.altitude_m,
            HEARTRATE: tp.hr,
            SPEED: tp.speed_ms,
            CADENCE: tp.cadence_rpm,
        } for tp in reader.trackpoints])

        # TODO: Rethink how I want to use this lap column.
        # records['lap'] = [
        #   i for i, l in enumerate(reader.laps) for t in l.trackpoints
        # ]

        # Make the lap column into an additional index level.
        # TODO: Consider if 'time' or 'timestamp' might make a good
        # additional index. Or whether we need these as indexes at all.
        # records.index.name = 'record'
        # records = records.set_index('lap', append=True)

        activity = cls(records, laps, summary)

        # Convert cadence from RPM to strides per minute.
        activity.cadence._convert_units()

        return activity
Example #10
0
    def from_fit(cls, file_obj):
        """Construct an Activity from a .fit file.

    Args:
      file_obj: Any file-like object accepted by :class:`fitparse.FitFile`.
    
    Returns:
      Activity

    Examples:
      Provide a file path:

      >>> act = Activity.from_fit('my_activity.fit')

      Provide a file-like object:
      
      >>> file_obj = open('my_activity.fit', 'rb')
      >>> act = Activity.from_fit(file_obj)

      Provide a raw string of bytes:
      
      >>> file_obj = open('my_activity.fit', 'rb')
      >>> raw_fit_data = file_obj.read()
      >>> act = Activity.fom_fit(raw_fit_data)

    """
        fitparse = import_optional_dependency('fitparse')

        reader = fitparse.FitFile(file_obj)

        def _build_dataframe_from_msg(msg_type):

            return pd.DataFrame.from_records(
                [msg.get_values() for msg in reader.get_messages(msg_type)])

        # msg_names = set(msg.name for msg in self.reader.get_messages())
        # print(msg_names)

        # 'file_id' messages (worthwhile maybe)
        # 'serial_number': 3888752595, 'time_created': (timestamp),
        # 'manufacturer': 'garmin', 'garmin_product': 'fr220',
        # 'number': None, 'type': 'activity'
        # msg_type = 'file_id'

        # Nothing worthwhile
        # msg_type = 'device_info'
        # msg_type = 'file_creator'
        # msg_type = 'unknown_22'
        # msg_type = 'unknown_79'
        # msg_type = 'unknown_141'
        # print(self._build_dataframe_from_msg(msg_type))

        # 'activity' messages. Typically only be one row. Exception if not.
        # No relevant data that doesn't also appear in 'session' messages.
        # Fields:
        # ['timestamp', 'total_timer_time', 'local_timestamp', 'num_sessions',
        #  'type', 'event', 'event_type', 'event_group']
        activities = _build_dataframe_from_msg('activity')
        if len(activities) > 1:
            raise ValueError('multi-activity files not supported')

        activities = activities.rename(columns=dict(
            timestamp=f'{TIMESTAMP}_end',
            total_timer_time=f'{TIME}_timer',
        ))

        activity_series = activities.iloc[0]

        # 'session' messages. Typically only one row. Exception if not.
        # Fields:
        # ['timestamp', 'start_time', 'start_position_lat', 'start_position_long',
        #  'total_elapsed_time', 'total_timer_time', 'total_distance',
        #  'total_strides', 'nec_lat', 'nec_long', 'swc_lat', 'swc_long',
        #  'message_index', 'total_calories', 'enhanced_avg_speed', 'avg_speed',
        #  'enhanced_max_speed', 'max_speed', 'total_ascent', 'total_descent',
        #  'first_lap_index', 'num_laps', 'avg_vertical_oscillation',
        #  'avg_stance_time_percent', 'avg_stance_time', 'event', 'event_type',
        #  'sport', 'sub_sport', 'avg_heart_rate', 'max_heart_rate',
        #  'avg_running_cadence', 'max_running_cadence', 'total_training_effect',
        #  'event_group', 'trigger', 'unknown_81', 'avg_fractional_cadence',
        #  'max_fractional_cadence', 'total_fractional_cycles']
        sessions = _build_dataframe_from_msg('session')
        if len(sessions) > 1:
            raise ValueError('multi-session files not supported')

        sessions = sessions.rename(columns=dict(
            start_time=f'{TIMESTAMP}_start',
            timestamp=f'{TIMESTAMP}_end',
            start_position_lat=f'{LAT}_start',
            start_position_long=f'{LON}_start',
            total_elapsed_time=f'{TIME}_elapsed',
            total_timer_time=f'{TIME}_timer',
            total_distance=f'{DISTANCE}_total',
            total_calories='calories',
            avg_speed=f'{SPEED}_avg',
            max_speed=f'{SPEED}_max',
            total_ascent=f'{ELEVATION}_gain',
            total_descent=f'{ELEVATION}_loss',
            avg_heart_rate=f'{HEARTRATE}_avg',
            max_heart_rate=f'{HEARTRATE}_max',
            avg_running_cadence=f'{CADENCE}_avg',
            max_running_cadence=f'{CADENCE}_max',
        ))

        session = sessions.iloc[0]

        # Verify that the session and activity data is the same.
        for field in [f'{TIMESTAMP}_end', f'{TIME}_timer']:
            if activity_series[field] != session[field]:
                # raise ValueError(f'Activity and session data disagree for {field}')
                warnings.warn(
                    f'Activity and session data disagree for {field}: '
                    f'(Activity = {activity_series[field]}; Session = {session[field]}). '
                    f'Session values are used by default.')

        summary = session

        # ['timestamp', 'start_time', 'start_position_lat', 'start_position_long',
        #  'end_position_lat', 'end_position_long', 'total_elapsed_time',
        #  'total_timer_time', 'total_distance', 'total_strides', 'unknown_27',
        #  'unknown_28', 'unknown_29', 'unknown_30', 'message_index',
        #  'total_calories', 'enhanced_avg_speed', 'avg_speed',
        #  'enhanced_max_speed', 'max_speed', 'total_ascent', 'total_descent',
        #  'wkt_step_index', 'avg_vertical_oscillation', 'avg_stance_time_percent',
        #  'avg_stance_time', 'event', 'event_type', 'avg_heart_rate',
        #  'max_heart_rate', 'avg_running_cadence', 'max_running_cadence',
        #  'intensity', 'lap_trigger', 'sport', 'event_group', 'sub_sport',
        #  'unknown_72', 'avg_fractional_cadence', 'max_fractional_cadence',
        #  'total_fractional_cycles']
        laps = _build_dataframe_from_msg('lap')

        laps = laps.rename(columns=dict(
            start_time=f'{TIMESTAMP}_start',
            timestamp=f'{TIMESTAMP}_end',
            start_position_lat=f'{LAT}_start',
            start_position_long=f'{LON}_start',
            end_position_lat=f'{LAT}_end',
            end_position_long=f'{LON}_end',
            total_elapsed_time=f'{TIME}_elapsed',
            total_timer_time=f'{TIME}_timer',
            total_distance=f'{DISTANCE}_total',
            total_calories='calories',
            avg_speed=f'{SPEED}_avg',
            max_speed=f'{SPEED}_max',
            total_ascent=f'{ELEVATION}_gain',
            total_descent=f'{ELEVATION}_loss',
            avg_heart_rate=f'{HEARTRATE}_avg',
            max_heart_rate=f'{HEARTRATE}_max',
            avg_running_cadence=f'{CADENCE}_avg',
            max_running_cadence=f'{CADENCE}_max',
            lap_trigger='trigger_method',
        ))

        records = _build_dataframe_from_msg('record')

        # TODO: Move this check to base file reader?
        if not records[TIMESTAMP].is_monotonic_increasing or records[
                TIMESTAMP].duplicated().any():
            warnings.warn('Something funky is going on with timestamps.',
                          UserWarning)

        records = records.rename(columns=dict(position_lat=LAT,
                                              position_long=LON,
                                              altitude=ELEVATION,
                                              heart_rate=HEARTRATE))

        # Drop BS cols if they are there
        # self.records = self.records.drop(
        #   columns=[
        #       'enhanced_speed',
        #       'enhanced_altitude',
        #       # 'timestamp',
        #       # Garmin
        #       'unknown_88',
        #       # Wahoo
        #       'battery_soc',
        #   ],
        #   errors='ignore',
        # )

        # TODO: Consider if records should be duplicated if they belong to
        # two bouts or laps...
        # Just noticed TCX files duplicate Trackpoints...

        # Create a row multiindex for records.
        # self.records.index.name = 'record'
        # self.records = self.records.set_index(['lap', 'bout'], append=True)

        # print(
        #   f'End time:\n'
        #   f'  Session: {sessions.iloc[0][f"{TIMESTAMP}_end"]}\n'
        #   f'  Activity: {self.activity.loc[f"{TIMESTAMP}_end"]}\n'
        #   f'  Last lap: {self.laps.iloc[-1][f"{TIMESTAMP}_end"]}\n'
        #   f'  Last record: {self.records.iloc[-1][f"{TIMESTAMP}"]}\n'
        #   f'  Last pause: {self.events[self.events["event_type"]=="stop_all"].iloc[-1][TIMESTAMP]}\n'
        #   f'  Full stop: {self.events[self.events["event_type"]=="stop_disable_all"].iloc[-1][TIMESTAMP]}\n'
        # )

        # return cls(records, laps, summary)

        activity = cls(records, laps, summary)

        # Convert semicircles to degrees
        activity.lat._convert_record_units(inplace=True)
        activity.lon._convert_record_units(inplace=True)

        # Convert cadence from RPM to strides per minute.
        activity.cadence._convert_units()

        # activity.elevation._set_record_stream('altitude')
        # activity.cadence._convert_record_units(orig='rpm')
        # activity.lat._set_record_stream('position_lat')
        # activity.lon._set_record_stream('position_long')
        # activity.heartrate._set_record_stream('heart_rate')

        # ------------------------------------------------------------------
        # Add 'bout' and 'lap' columns to record DF.
        # TODO: Figure out how to make this not take so long. It ruins the
        # read-in process for large files. In general, I'll leave off the
        # lap/bout feature for all files for now.

        # If the record timestamp straddles two laps, put it into the
        # earlier lap.
        # activity.records['lap'] = [
        #   activity.laps.index[
        #     activity.timestamp.laps['start'].le(timestamp_rec)
        #     & activity.timestamp.laps['end'].ge(timestamp_rec)
        #     # laps[f'{TIMESTAMP}_start'].le(timestamp_rec)
        #     # & laps[f'{TIMESTAMP}_end'].ge(timestamp_rec)
        #   ][0]
        #   for timestamp_rec in activity.timestamp.stream
        # ]

        # events = _build_dataframe_from_msg('event')
        # start_events = events[events['event_type'] == 'start'].reset_index()
        # pause_events = events[events['event_type'] == 'stop_all'].reset_index()

        # # If the record timestamp straddles two bouts, put it into the
        # # earlier bout. (That should be impossible, but JIC)
        # activity.records['bout'] = [
        #   start_events.index[
        #     start_events['timestamp'].le(timestamp_rec)
        #     & pause_events['timestamp'].ge(timestamp_rec)
        #   ][0]
        #   for timestamp_rec in activity.timestamp.stream
        # ]

        # ------------------------------------------------------------------

        # Naive timestamps represent UTC in .fit files, which is the
        # default timezone assigned to naive timestamps by this method, which
        # affects the record DF column, summary Series elements, and
        # lap DF columns.
        activity.timestamp.ensure_aware()

        return activity