Exemple #1
    def cluster(self, gaze_points_list, inplace=False):
        Find clusters of input gaze data and label clustered points as smooth pursuit.
        Labels (sets the 'EYE_MOVEMENT_TYPE' field) the clusters of data points as 'SP',
        other samples as 'NOISE_CLUSTER'.

        New column 'CLUSTER_ID' is added into the @DATA section of each arff object in @gaze_points_list,
        indicating cluster group ID.

        :param gaze_points_list: a list of arff objects (dictionary with fields such as 'data' and 'metadata')
        :param inplace: whether to modify the original input gaze data with gaze data after clustering or use a copy
        :return: gaze data after clustering in the same form as the input data.

        if not inplace:
            gaze_points_list = copy.deepcopy(gaze_points_list)

        # add global indexing to be able to reference the particular sample even after clustering all in one structure
        for ind in xrange(len(gaze_points_list)):
            ArffHelper.add_column(gaze_points_list[ind], name='global_index', dtype='INTEGER', default_value=-1)
            gaze_points_list[ind]['data']['global_index'] = np.arange(gaze_points_list[ind]['data'].shape[0])

        self._data_set = self._aggregate_data(gaze_points_list)
        # has to be a copy, so that is is placed continuously in memory
        self._timestamps = self._data_set['time'].copy()

        current_cluster_id = 0

        for i in xrange(len(self._data_set)):
            if self._data_set[i]['visited_flag'] == 1:
                self._data_set[i]['visited_flag'] = 1
                neighbourhood = self._get_neighbourhood(i)
                if self._validate_neighbourhood(neighbourhood):
                    # if not: mark current point as NOISE
                    self._expand_cluster(i, neighbourhood, current_cluster_id)
                    current_cluster_id += 1

        # create a new column in gaze_points_list for CLUSTER_ID
        for i in xrange(len(gaze_points_list)):
            ArffHelper.add_column(gaze_points_list[i], 'CLUSTER_ID', 'NUMERIC', -1)

        # label data in gaze_points_list as SP according to CLUSTER_ID
        for i in xrange(len(self._data_set)):
            observer_id = int(self._data_set[i]['observer_id'])
            global_index = self._data_set[i]['global_index']

            if self._data_set[i]['CLUSTER_ID'] != -1:
                gaze_points_list[observer_id]['data']['EYE_MOVEMENT_TYPE'][global_index] = 'SP'
                gaze_points_list[observer_id]['data']['CLUSTER_ID'][global_index] = self._data_set[i]['CLUSTER_ID']
                gaze_points_list[observer_id]['data']['EYE_MOVEMENT_TYPE'][global_index] = 'NOISE_CLUSTER'

        # can now remove the global_index column
        for ind in xrange(len(gaze_points_list)):
            ArffHelper.remove_column(gaze_points_list[ind], name='global_index')

        return gaze_points_list
Exemple #2
def add_eye_movement_attribute(arff_object):
    Add the EYE_MOVEMENT_TYPE attribute to the @arff_object. If already present, do nothing.
    :param arff_object: arff object
    :return: arff object with added column for eye movement type
    if 'EYE_MOVEMENT_TYPE' not in arff_object['data'].dtype.names:
        ArffHelper.add_column(arff_object, EM_TYPE_ATTRIBUTE_NAME,
                              EM_TYPE_ARFF_DATA_TYPE, EM_TYPE_DEFAULT_VALUE)
    return arff_object
Exemple #3
 def test_add_column(self):
     name = 'EM_type'
     dtype = ['UNKNOWN', 'FIX', 'SACCADE', 'SP', 'NOISE']
     a = ArffHelper.load(open('test_data/arff_data_example.arff'))
     b = ArffHelper.add_column(a, name, dtype, 'UNKNOWN')
     a['attributes'].append((name, dtype))
     self.assertEqual(a['attributes'], b['attributes'])
Exemple #4
    def detect(self, gaze_points, inplace=False):
        This method labels saccades (also noise) in the provided gaze_points, which should be an arff object
        :param gaze_points: gaze recording data, an arff object (i.e. a dictionary with 'data', 'metadata'
                            and etc. keys)
        :param inplace: whether to replace the data inside @gaze_points or create a new structure
        :return: gaze points with added labels SACCADE, NOISE
        if not inplace:
            gaze_points = copy.deepcopy(gaze_points)

        # also keep track of saccadic and intersaccadic intervals
        detected_saccades_count = 0
        if 'SACC_INTERVAL_INDEX' not in gaze_points['data'].dtype.names:
            ArffHelper.add_column(gaze_points, 'SACC_INTERVAL_INDEX',
                                  'INTEGER', -1)

        # a virtual saccade that finished before the recording for uniform processing
        last_saccade_end = -1
        intersaccadic_intervals_count = 0
        if 'INTERSACC_INTERVAL_INDEX' not in gaze_points['data'].dtype.names:
            ArffHelper.add_column(gaze_points, 'INTERSACC_INTERVAL_INDEX',
                                  'INTEGER', -1)

        # verify that the timestamps are sorted!
        times = gaze_points['data']['time']
        assert all(times[i] <= times[i + 1] for i in xrange(len(times) - 1)), \
            'Timestamps are not sorted in {}'.format(gaze_points['metadata']['filename'])
        # -1 so that the exact value ends up on the right of the searched timestamp
        searchable_timestamps = times - self.VELOCITY_INTEGRAL_INTERVAL_MICROSEC - 1
        # find the indices of the first
        prev_indices = np.searchsorted(times,
        cur_indices = np.arange(len(prev_indices))
        # if the index after search points towards this very data point, take the previous one
        prev_indices[prev_indices == cur_indices] -= 1
        # except for the very first sample
        prev_indices[0] = 0

        # computing velocities
        x_shifts = gaze_points['data']['x'][cur_indices] - gaze_points['data'][
        y_shifts = gaze_points['data']['y'][cur_indices] - gaze_points['data'][
        shifts = np.linalg.norm(np.vstack([x_shifts, y_shifts]), axis=0)
        time_shifts = gaze_points['data']['time'][cur_indices] - gaze_points[
        # keep it above 0, the shifts are 0 there anyway
        time_shifts[time_shifts == 0] += 1

        velocities = shifts / time_shifts  # pixels per microsecond
        ppd = util.calculate_ppd(gaze_points)
        velocities /= ppd  # degree per microsecond
        velocities *= 1e6  # degree per second

        # How many samples back is it reasonable to go?
        time_step = np.diff(times).mean()
        # a big margin of error, 10 times as many samples as would normally need
        extra_samples_count = int(
            np.round((self.MAX_DURATION_MICROSEC * 10) / time_step))
        # Glitch detection: glitches are defined by one of several features.
        # (1) Coordinates far outside the calibrated region (what constitutes far is defined
        # by the tolerance parameter) are assumed to be erroneous.
        is_glitch = np.zeros(gaze_points['data'].shape[0], dtype=np.bool)
        is_glitch[gaze_points['data']['x'] <
                  -gaze_points['metadata']['width_px'] * self.TOLERANCE] = True
            gaze_points['data']['y'] < -gaze_points['metadata']['height_px'] *
            self.TOLERANCE] = True
            gaze_points['data']['x'] > gaze_points['metadata']['width_px'] *
            (1 + self.TOLERANCE)] = True
            gaze_points['data']['y'] > gaze_points['metadata']['height_px'] *
            (1 + self.TOLERANCE)] = True

        # (2) If the @gaze_points supports the estimate of a confidence
        # measure for samples, a confidence lower than 0.1 also indicates
        # a glitch here.
        if 'confidence' in gaze_points['data'].dtype.names:
            is_glitch[gaze_points['data']['confidence'] < 0.1] = True

        # (3) Finally, velocities that exceed \a maxSpeed (default currently
        # set to ~1000 degrees/s) are regarded as glitches as well and labelled as noise
        is_glitch[velocities > self.MAX_SPEED_DEGREE_PER_SEC] = True
            velocities > self.MAX_SPEED_DEGREE_PER_SEC] = 'NOISE'

        # Remember first sample after glitch:
        # to prevent saccade detection at the first non-glitch sample
        # that follows, saccade detection is inhibited for that first sample.
        post_glitch = np.diff(is_glitch.astype(int)) == -1
        post_glitch = np.hstack(([False], post_glitch))
        # Remember last sample before glitch:
        # since we normally would suspend the other criteria (incl. speed) if we are inside glitch, we try to avoid
        # border effects in both next-after and last-before glitch samples
        pre_glitch = np.diff(is_glitch.astype(int)) == 1
        pre_glitch = np.hstack((pre_glitch, [False]))
        all_glitch = is_glitch + post_glitch + pre_glitch
        # we will assign glitch samples' labels to NOISE after the saccades have been detected

        # recompute speeds for post-glitch samples
        pre_glitch_indices = np.nonzero(pre_glitch)[0]
        for i in np.nonzero(post_glitch)[0]:
            # find the corresponding start of the glitch
            corresponding_pre_glitch = np.searchsorted(pre_glitch_indices,
                                                       i) - 1
            if corresponding_pre_glitch < 0:
                # no correspondence found, it's the glitch from the beginning of recording ==> set velocity to 0
                velocities[i] = 0
                # found a completed glitch
                velocities[i] = np.linalg.norm([
                    gaze_points['data']['x'][i] -
                    gaze_points['data']['y'][i] -
                ]) / (times[i] - times[corresponding_pre_glitch]
                      )  # pixels per microsecond
                velocities[i] /= ppd  # degrees per microsecond
                velocities[i] *= 1e6  # degrees per second

        # Looking for saccade seed points
        # saccade seed point should
        # (1) exceed the fast threshold
        # (2) be biologically plausible
        # (3) not be inside a glitch
        saccade_seeds = (velocities > self.THRESHOLD_ONSET_FAST_DEGREE_PER_SEC) * \
                        (velocities < self.MAX_SPEED_DEGREE_PER_SEC) * \
                        (1 - all_glitch)
        saccade_seed_indices = np.nonzero(saccade_seeds)[0]
        for potential_seed_index in saccade_seed_indices:
            if gaze_points['data']['EYE_MOVEMENT_TYPE'][
                    potential_seed_index] != 'UNKNOWN':
                # already labelled this before, ex. as a saccade that started from another seed point
            if self.verbose == 2:
                print >> sys.stderr, 'potential seed index', potential_seed_index
            # Looking for onset:
            # (1) should be above slow threshold speed
            # (2) should not be a glitch
            # (3) does not yet have a label
            onset_candidates_check = (velocities[max(0, potential_seed_index - extra_samples_count):potential_seed_index] >=
                                      self.THRESHOLD_ONSET_SLOW_DEGREE_PER_SEC) * \
                                     (1 - is_glitch[max(0, potential_seed_index - extra_samples_count):potential_seed_index]) * \
                                          max(0, potential_seed_index - extra_samples_count):potential_seed_index
                                      ] == 'UNKNOWN')

            # find the last zero (the next sample after it is the beginning of the last uninterrupted 1-sequence,
            # i.e. the saccade onset
                last_zero_index = np.nonzero(1 - onset_candidates_check)[0][-1]
            except IndexError:
                # not found
            saccade_onset_index = last_zero_index + 1 + max(
                0, potential_seed_index -
                extra_samples_count)  # shift accordingly
            # also this should not be the glitch or post/pre-glitch sample
            while all_glitch[saccade_onset_index]:
                saccade_onset_index += 1

            # looking for offset
            # (1) should be above offset speed threshold
            # (2) should not exceed biologically plausible duration threshold
            # (3) should not yet have a label (i.e. not NOISE labelled above)
            offset_candidates_check = (velocities[potential_seed_index:potential_seed_index + extra_samples_count] >=
                                       self.THRESHOLD_OFFSET_DEGREE_PER_SEC) * \
                                      (times[potential_seed_index:potential_seed_index + extra_samples_count] -
                                       times[saccade_onset_index] <= self.MAX_DURATION_MICROSEC)
            # we ignore the criterion around the glitch
            offset_candidates_check += is_glitch[
                potential_seed_index:potential_seed_index +
            offset_candidates_check += post_glitch[
                potential_seed_index:potential_seed_index +

            # but there should not yet be a label present, i.e. it's not the NOISE labelled above
            offset_candidates_check *= (
                [potential_seed_index:potential_seed_index +
                 extra_samples_count] == 'UNKNOWN')

            # find the first zero (this is the first sample with speed below the threshold, i.e. the saccade offset
                saccade_offset_index = np.nonzero(
                    1 - offset_candidates_check)[0][0]
            except IndexError:
                # no offset found
            # the index was starting at potential_seed_index
            saccade_offset_index += potential_seed_index

            # if we are finished inside the glitch, we have reached a biological limit of some sorts ==> discard
            if is_glitch[saccade_offset_index]:

            if self.verbose == 2:
                print >> sys.stderr, 'Found onset/offset indices', saccade_onset_index, saccade_offset_index

            # now validate the saccade parameters
            # (1) it spans at least the minimal necessary interval
            saccade_time = times[saccade_offset_index] - times[
            if saccade_time < self.MIN_DURATION_MICROSEC:
                # If the resulting saccade is shorter than
                # a minDuration, we assume that we have only encountered
                # some noise impulse and discard this saccade.
                    saccade_onset_index:saccade_offset_index + 1] = 'NOISE'

                if self.verbose == 2:
                    print >> sys.stderr, 'Discarding due to low duration: needed {}, had {}'.\
                        format(self.MIN_DURATION_MICROSEC, saccade_time)

            # (2) mean velocity is not below the slow onset threshold
            saccade_displacement = np.linalg.norm([
                gaze_points['data']['x'][saccade_offset_index] -
                gaze_points['data']['y'][saccade_offset_index] -
            mean_speed = saccade_displacement / saccade_time  # pixels per microsecond
            mean_speed /= ppd  # degrees per microsecond
            mean_speed *= 1e6  # degrees per second
            if mean_speed < self.THRESHOLD_ONSET_SLOW_DEGREE_PER_SEC:
                # Saccades where the average velocity drops below the offset threshold
                # are also discarded (those are often due to some high-velocity samples
                # going in one direction, then jumping back - which is unbiological).
                if self.verbose == 2:
                    print >> sys.stderr, 'Discarding due to low average speed: needed {}, had {}'.format(
                        self.THRESHOLD_ONSET_SLOW_DEGREE_PER_SEC, mean_speed)

            # If all is okay, we detected a whole saccade
                saccade_onset_index:saccade_offset_index + 1] = 'SACCADE'
            # write the saccade index into the appropriate field and update the global count
            gaze_points['data']['SACC_INTERVAL_INDEX'][saccade_onset_index:saccade_offset_index + 1] = \
            detected_saccades_count += 1
            # from the end of last saccade till the beginning of this one, put appropriate intersaccadic interval index
            # also update the global count of intersaccadic intervals
            gaze_points['data']['INTERSACC_INTERVAL_INDEX'][last_saccade_end + 1:saccade_onset_index] = \
            intersaccadic_intervals_count += 1
            last_saccade_end = saccade_offset_index

            if self.verbose:
                print >> sys.stderr, '{0} {1:0.1f} {2:0.1f} {3} {4:0.1f} {5:0.1f}'.format(
        # final intersaccadic interval, if there is one
        gaze_points['data']['INTERSACC_INTERVAL_INDEX'][last_saccade_end + 1:] = \
        intersaccadic_intervals_count += 1

        # Override erroneous samples' labels
        gaze_points['data']['EYE_MOVEMENT_TYPE'][is_glitch] = 'NOISE'
        return gaze_points
Exemple #5
    def detect(self, gaze_points, inplace=False):
        Identify and label fixation intervals as 'FIX' and some others as 'NOISE'.

        Fixation identification includes the following steps:
        - First, all inter-saccadic intervals with a dispersion of less than
          a certain spread threshold (@self.PREFILTERING_INTERVAL_SPREAD_THRESHOLD_DEGREES) are marked as fixations.
        - Then, a temporal window (@self.SLIDING_WINDOW_WIDTH_MICROSEC ms) is shifted across the
          remaining data and a non-fixation onset (offset) is marked every
          time speed rises above (fell below) threshold (@self.SPEED_THRESHOLD_DEGREES_PER_SEC).
        - There are two ways for speed calculation: spread and speed.
            -'speed': speed from start point to end point is larger than
            -'spread': maximum moving speed of either x or y is larger than
          Data with speed below threshold are labeled as 'FIX'.
        - Finally, non-fixation episodes longer than @self.MINIMAL_SP_DURATION_MICROSEC are kept as 'UNKNOWN',
          the shorter ones are labeled as 'NOISE' (these are fairly dynamic episodes that however should not be SP).

        :param gaze_points: arff object with saccades detected (and intersaccadic intervals labelled)
        :param inplace: whether to replace the data inside @gaze_points or create a new structure
        :return: arff object with data labeled as 'FIX' and 'NOISE'. Some 'UNKNOWN' labels are kept for the next stage.

        if not inplace:
            gaze_points = copy.deepcopy(gaze_points)
        # add a global index column (to keep track of where we are even if working within an intersaccadic interval)
        gaze_points = ArffHelper.add_column(gaze_points,
        gaze_points['data']['global_index'] = np.arange(

        # I. First step of fixation removal: rough prefiltering
        # Convert constants to pixels per second
        ppd = util.calculate_ppd(gaze_points)
        speed_thd = ppd * self.SPEED_THRESHOLD_DEGREES_PER_SEC
        prefiltering_spread_thd = ppd * self.PREFILTERING_INTERVAL_SPREAD_THRESHOLD_DEGREES

        # record intersaccadic interval indices of those intervals that are not labelled as FIX by the prefiltering
        unknown_interval_index = []
        unknown_interval_masks = []
        for i in xrange(
                max(gaze_points['data']['INTERSACC_INTERVAL_INDEX']) + 1):
            mask = gaze_points['data']['INTERSACC_INTERVAL_INDEX'] == i
            intersacc_interval = gaze_points['data'][mask]
            if len(intersacc_interval) == 0:

            dispersion = [
                max(intersacc_interval['x']) - min(intersacc_interval['x']),
                max(intersacc_interval['y']) - min(intersacc_interval['y'])

            if any(thd >= prefiltering_spread_thd for thd in dispersion):
                unknown_interval_index.append(i)  # keep unknown
                    mask.copy())  # cache the indexing
                gaze_points['data']['EYE_MOVEMENT_TYPE'][mask] = 'FIX'

        # II. Second step of fixation removal: finer prefiltering

        for i, interval_mask in zip(unknown_interval_index,
            # We record the borders of the non-FIX episodes to validate their duration. If the non-FIX episode is very
            # short, we mark it as NOISE (not enough duration for a candidate for smooth pursuit)
            onset_timestamp = None
            onset_index = None

            intersacc_interval = gaze_points['data'][interval_mask]
            intersacc_interval = util.get_xy_moving_average(

            # for intervals shorter than @self.INTERSACCADIC_INTERVAL_DURATION_THRESHOLD_MICROSEC:
            # cannot do further filtering. The label remains 'UNKNOWN'
            if intersacc_interval['time'][-1] - intersacc_interval['time'][0] < \

            # for intervals that longer than self.SLIDING_WINDOW_WIDTH_MICROSEC: do further pre-filtering.
            # Label data as 'FIX' or 'NOISE', or keep 'UNKNOWN'
                # window is shifted by 1 sample every time
                for index, item in enumerate(intersacc_interval):
                    x_start = item['x']
                    y_start = item['y']
                    shift_window_interval = intersacc_interval[
                        (intersacc_interval['time'] >= item['time']) *
                        (intersacc_interval['time'] <=
                         item['time'] + self.SLIDING_WINDOW_WIDTH_MICROSEC)]

                    # if distance between current data and the end of interval is shorter than
                    # self.SLIDING_WINDOW_WIDTH_MICROSEC (i.e. if the end of the window matches the end of the
                    # intersaccadic interval), we keep the previous label if it was FIX, otherwise keep UNKNOWN
                    if shift_window_interval['time'][-1] == intersacc_interval[
                        if intersacc_interval['EYE_MOVEMENT_TYPE'][index -
                                                                   1] == 'FIX':
                                gaze_points['data']['time'] == item['time']
                            )] = 'FIX'

                            # we do not keep track of the non-fixation interval anymore since it will be all fixation
                            # until the end of the intersaccadic interval
                            onset_timestamp = None
                            onset_index = None
                            # new non-fixation interval is starting
                            onset_timestamp = item['time']
                            onset_index = item['global_index']

                    # if distance between current data and the end of interval is larger than window size, continue
                    # with the process
                        # get window duration in seconds
                        period = (shift_window_interval['time'][-1] -
                                  shift_window_interval['time'][0]) * 1e-6

                        # is the fixation criterion satisfied?
                        fixation_flag = True
                        if self.SLIDING_WINDOW_CRITERION == 'speed':
                            # if the current speed is larger than speed threshold --
                            # mark as onset(UNKNOWN, NOISE). else -- mark as offset(FIX)
                            x_end = shift_window_interval['x'][-1]
                            y_end = shift_window_interval['y'][-1]

                            if math.sqrt(
                                (x_start - x_end)**2 +
                                (y_start - y_end)**2) >= speed_thd * period:
                                # will not be a fixation
                                fixation_flag = False
                        else:  # spread
                            # if either x_max - x_min or y_max - y_min is larger than speed threshold * time --
                            # mark as onset. else -- mark as offset
                            x_max = max(shift_window_interval['x'])
                            x_min = min(shift_window_interval['x'])
                            y_max = max(shift_window_interval['y'])
                            y_min = min(shift_window_interval['y'])

                            if max(x_max - x_min,
                                   y_max - y_min) >= speed_thd * period:
                                # will not be a fixation
                                fixation_flag = False

                        if fixation_flag:
                                item['global_index']] = 'FIX'

                        # either a fixation start or the whole interval end
                        if fixation_flag or index == len(
                                intersacc_interval) - 1:
                            # if we had a non-fixation interval going on before, check it's duration
                            if onset_index is not None:
                                # onset episode larger than 50ms: UNKNOWN. else: NOISE
                                if item['time'] - onset_timestamp < self.MINIMAL_SP_DURATION_MICROSEC:
                                    offset_timestamp = item['time'] - 1
                                    offset_index = item['global_index'] - 1
                                    # if this is not the beginning of fixation,
                                    # the last item also should be labelled as NOISE
                                    if not fixation_flag:
                                        offset_timestamp += 1
                                        offset_index += 1

                                        offset_index +
                                        1)]['EYE_MOVEMENT_TYPE'] = 'NOISE'

                                # episode is finished
                                onset_timestamp = None
                                onset_index = None
                            # if new non-fixation interval started
                            if onset_timestamp is None:
                                onset_timestamp = item['time']
                                onset_index = item['global_index']
                            # otherwise it just continues, don't have to do anything
        # can now remove the global_index column
        gaze_points = ArffHelper.remove_column(gaze_points, 'global_index')
        return gaze_points