Example #1
0
    def containsNearbyAlmostDuplicate(self, nums: List[int], k: int,
                                      t: int) -> bool:
        if t < 0:
            return False
        s = SortedList()
        for i in range(0, len(nums)):
            # print(s)
            if i > k:
                numToDelete = nums[i - k - 1]

                s.remove(numToDelete)
            # print(s)
            if s.__contains__(nums[i]):
                # print("yea")
                return True
            s.add(nums[i])
            pos = s.index(nums[i])
            # print(s, pos)
            if pos > 0:
                tmp = s.__getitem__(pos - 1)
                # print(tmp)
                if abs(nums[i] - tmp) <= t:
                    return True
            if pos < len(s) - 1:
                tmp = s.__getitem__(pos + 1)
                # print(tmp)
                if abs(nums[i] - tmp) <= t:
                    return True

        return False
Example #2
0
class Events:
    def __init__(self, events=None):
        self.events = SortedList(events)
        self.removed_offset = 0

    def add_event(self, event):
        self.events.add(event)

    def delete_event(self, index):
        self.events.pop(index)

    def change_trial(self, index, delta):
        self.events[index].trial += delta

    def render(self, offsets, timecode):
        """
        Return data suitable for display in LogTable
        :param offsets: Offsets object which contains frame offsets used to generate timecodes that match video
        :param timecode: Timecode object (with predefined framerate, drop_frame) used to generate timecode strings
        """
        data = []
        for event in self.events:
            timecode.frames = event.frame + 1 + offsets.get_offset(event.frame)
            data.append([event.trial, event.status, event.response, str(timecode)])
        return data

    def to_plist(self):
        data = {}
        for n, event in enumerate(self.events):
            data['Response {}'.format(n)] = {
                'Trial': event.trial,
                'Trial Status': event.status,
                'Type': event.response,
                'Frame': event.frame
            }
        return data

    @staticmethod
    def from_plist(data, framerate_string='29.97'):

        timecode = Timecode(framerate_string)
        timecode.drop_frame = False

        events = []
        for e in data.values():
            if 'Timecode' in e:
                # convert iCoder-style timecode to frame number
                timecode.set_timecode('{}:{}:{}:{}'.format(
                    e['Timecode']['Hour'],
                    e['Timecode']['Minute'],
                    e['Timecode']['Second'],
                    e['Timecode']['Frame'])
                )
                e['Frame'] = timecode.frames - 1
                has_offset = True  # by assumption; we don't know if the timecode of the first frame is 00:00:00:00
            else:
                has_offset = False

            events.append(
                Event(trial=e['Trial'],
                      status=e['Trial Status'] in ('on', True),
                      response=e['Type'],
                      frame=e['Frame'],
                      has_offset=has_offset)
            )
        return Events(events)

    def remove_offset(self, offset):
        """Remove offset from any events having has_offset == True"""
        self.removed_offset = offset  # to allow undo
        for event in self.events:
            if event.has_offset:
                event.frame -= offset
                event.has_offset = False

    def reset_offset(self):
        """Mark events as having offset to allow recovery from state where the initial timecode was entered incorrectly"""
        for event in self.events:
            event.has_offset = True
            event.frame += self.removed_offset
        self.removed_offset = 0

    def error_items(self, unused_trials, max_trial):
        """ Check for errors and return a list of row numbers (which should be highlighted) and
        corresponding error messages"""
        all_error_rows = []
        msg = []
        # 1. Check for coding entries with a trial number in unused
        error_rows = [i for i, e in enumerate(self.events) if e.trial in unused_trials]
        if error_rows:
            all_error_rows += error_rows
            msg.append('Code entry for unused trial')

        # 2. Check for duplicate entries (same timestamp)
        frame_counter = Counter([e.frame for e in self.events])
        duplicate_frames = [k for k, v in frame_counter.items() if v > 1]
        error_rows = [i for i, e in enumerate(self.events) if e.frame in duplicate_frames]
        if error_rows:
            all_error_rows += error_rows
            msg.append('Entries have the same timestamp')

        # 3. Check for trial numbers that don't increase with increasing frame number
        error_rows = [i for i in range(1, len(self.events)) if self.events[i].trial < self.events[i-1].trial]
        if error_rows:
            all_error_rows += error_rows
            msg.append('Trial numbers are not increasing with increasing timestamp')

        # 4. Check for invalid sequences within trials.
        # a. must not have 2 consecutive events with a response in ('left', 'right') within a trial
        error_rows = []
        for i in range(1, len(self.events)):
            if self.events[i-1].trial == self.events[i].trial and \
                    self.events[i-1].status == self.events[i].status and \
                    self.events[i-1].response in ('left', 'right') and \
                    self.events[i].response in ('left', 'right'):
                error_rows.append(i)
        if error_rows:
            all_error_rows += error_rows
            msg.append('Cannot have consecutive "right" and/or "left" events in a trial')

        # b. must not have 2 consecutive events with the same response
        error_rows = []
        for i in range(1, len(self.events)):
            if self.events[i-1].trial == self.events[i].trial and \
                    self.events[i-1].status == self.events[i].status and \
                    self.events[i-1].response == self.events[i].response:
                error_rows.append(i)
        if error_rows:
            all_error_rows += error_rows
            msg.append('Cannot have consecutive events with the same response')

        # 5. last event in a trial should have status 'off'
        last_rows = accumulate([len(events) for t, events in self.trials().items()])
        error_rows = [r - 1 for r in last_rows if self.events[r-1].status == 'on']
        error_trials = [self.events[r].trial for r in error_rows]
        if error_rows:
            all_error_rows += error_rows
            msg.append('The last event in trial # {} should have status "off"'.format(error_trials))

        # 6. must not have 2 consecutive events with trial status 'off'
        error_rows = []
        for i in range(1, len(self.events)):
            if self.events[i-1].status == self.events[i].status == 'off':
                error_rows.append(i)
        if error_rows:
            all_error_rows += error_rows
            msg.append('Cannot have consecutive events with status "off"')

        # 7. Trial number should not exceed maximum trial number in trial order
        error_rows = [i for i, e in enumerate(self.events) if e.trial > max_trial]
        if error_rows:
            all_error_rows += error_rows
            msg.append('The maximum trial number in the trial order is {}'.format(max_trial))

        return all_error_rows, msg

    def __getitem__(self, item):
        return self.events.__getitem__(item)

    def __getattr__(self, item):
        return getattr(self.events, item)

    def absolute_index(self, item):
        # find index of event matching on all fields
        for i, event in enumerate(self.events):
            if item.trial == event.trial and \
                    item._status == event._status and \
                    item.response == event.response and \
                    item.frame == event.frame:
                return i
        return None

    def __len__(self):
        return len(self.events)

    def trials(self):
        """ Compute trials from the list of events"""
        return {k: list(g) for k, g in groupby(self.events, attrgetter('trial'))}

    def frames(self):
        """ Compute frames with responses from events
        Include all frames from start of first trial to end of last trial.
        """
        responses = {}
        for i in range(len(self.events) - 1):
            for f in range(self.events[i].frame, self.events[i + 1].frame):
                responses[f] = self.events[i].response
        # include the last frame
        responses[self.events[-1].frame] = self.events[-1].response
        return responses
Example #3
0
class GaussianProcessRegression:
    def __init__(self, noisy):
        if noisy:
            self.kernel = RBF(sigma=1, length_scale=1) + WhiteKernel(sigma_noise=0.1)
        else:
            self.kernel = RBF(sigma=1, length_scale=1)
        self.X_train = None
        self.y_train = None
        self.sortedData = SortedList()

    def fit(self, X: np.ndarray, y: np.ndarray):
        assert len(X.shape) == 2, 'X must be two-dimensional array'
        assert len(y.shape) == 1, 'y must be one-dimensional array'

        if self.X_train is None:
            self.X_train = X
        else:
            self.X_train = np.concatenate((self.X_train, X), axis=0)
        if self.y_train is None:
            self.y_train = y
        else:
            self.y_train = np.concatenate((self.y_train, y), axis=0)

        for x_point, y_value in zip(X, y):
            self.sortedData.add(Point(x_point, y_value))

        self.K = self.kernel.apply(X) + 1e-7 * np.eye(X.shape[0])
        self.K_cholesky = cholesky(self.K, lower=True)
        self.K_inv = np.linalg.inv(self.K)
        self.optimize()

    def predict(self, X: np.ndarray, return_cov=False):
        X_dim = len(X.shape)
        X = X.copy()
        X = np.atleast_2d(X)

        Kx = self.kernel.apply(self.X_train, X)

        mean = np.dot(np.dot(Kx.T, self.K_inv), self.y_train).flatten()

        if return_cov:
            Kxx = self.kernel.apply(X)
            cov = Kxx - np.dot(np.dot(Kx.T, self.K_inv), Kx)
            if X_dim == 2:
                return mean, cov
            else:
                return mean[0], cov[0]
        else:
            std = self.kernel.apply(X, diag=True) - np.einsum('ij,ji->i', np.dot(Kx.T, self.K_inv), Kx)
            if X_dim == 2:
                return mean, std
            else:
                return mean[0], std[0]

    def log_likelihood(self, theta: dict):
        old_theta = self.kernel.get_params()
        self._set_kernel_params(theta)
        S1 = solve_triangular(self.K_cholesky, self.y_train, lower=True)
        S2 = solve_triangular(self.K_cholesky.T, S1, lower=False)

        res = np.sum(np.log(np.diagonal(self.K_cholesky))) + \
              0.5 * self.y_train.dot(S2) + \
              0.5 * len(self.X_train) * np.log(2 * np.pi)
        self._set_kernel_params(old_theta)
        return res

    def optimize(self, num_run=5):

        def to_dict(x):
            return dict(zip(self.kernel.get_params_names(), x))

        def to_array(x):
            return np.array(list(x.values()))

        bounds_to_sample_from = {}
        initial_values = self.kernel.get_initial_values()
        bounds = self.kernel.get_bounds()
        param_names = self.kernel.get_params_names()
        for key in param_names:
            bounds_to_sample_from[key] = (max(0, bounds[key][0]) if bounds[key][0] is not None else 0,
                                          min(2*initial_values[key], bounds[key][1])
                                          if bounds[key][1] is not None else 2 * initial_values[key])

        best_log_likelihood = None
        best_theta = None
        for i in range(num_run):
            start_point = np.array([np.random.uniform(bounds_to_sample_from[key][0], bounds_to_sample_from[key][1])
                                    for key in param_names])
            res = minimize(lambda x: self.log_likelihood(to_dict(x)),
                           start_point,
                           bounds=to_array(self.kernel.get_bounds()),
                           method='L-BFGS-B')

            if best_log_likelihood is None or self.log_likelihood(to_dict(res.x)) < best_log_likelihood:
                best_log_likelihood = self.log_likelihood(to_dict(res.x))
                best_theta = to_dict(res.x)

        self._set_kernel_params(best_theta)

    def _set_kernel_params(self, theta: dict):
        self.kernel.set_params(theta)
        self.K = self.kernel.apply(self.X_train) + 1e-7 * np.eye(self.X_train.shape[0])
        self.K_cholesky = cholesky(self.K, lower=True)
        self.K_inv = np.linalg.inv(self.K)

    def get_data(self) -> Tuple[np.ndarray, np.ndarray]:
        return self.X_train, self.y_train

    def get_sorted_data(self):
        return self.sortedData

    def get_best_points(self, num: int) -> List[Point]:
        assert num <= len(self.sortedData), 'There is not enough points to return'
        points = []
        for i in range(num):
            points.append(self.sortedData.__getitem__(i))
        return points

    @property
    def x_opt(self) -> np.ndarray:
        return self.sortedData.__getitem__(0).x

    @property
    def f_opt(self) -> float:
        return self.sortedData.__getitem__(0).y

    @property
    def size(self) -> int:
        return self.X_train.shape[0]