예제 #1
0
    def set_alreadyreached(self, population, order, sampling_days,
                           is_same_day_as_prev):
        '''
        For those who have multiple samplings at the same day
        '''
        already_reached = is_same_day_as_prev[:, order]
        if np.any(already_reached):
            warn(
                f"There are {already_reached.sum()} who had already reached their milestone"
            )

        if self.if_reached == 'same':
            #fast forwards new sample to previous sample
            sampling_days[:, order] = np.where(already_reached,
                                               sampling_days[:, order - 1],
                                               sampling_days[:, order])
        if self.if_reached == 'NaN':
            #will be masked with fill_value = NaN
            sampling_days[:,
                          order] = np.where(already_reached, _ALREADYREACHED,
                                            sampling_days[:, order])
        if self.if_reached == 'raise':
            if np.any(already_reached):
                raise ValueError(
                    "Patient was already here when he arrived for his prev sample"
                )
        #remember how many triggered if_reached
        population.sampling_summary['if_reached'].append(
            (np.sum(already_reached), self.if_reached))
예제 #2
0
    def __getitem__(
            self,
            subscript):  #for slicing like a numpy 2d array of (persons, days)
        '''
        Returns a new population by slicing the days and scores as specified
        (in a numpy-like fashion)
        keeping arrays two-dimensional
        '''
        if isinstance(self.days, np.ma.masked_array):
            warn('slicing converts masks to arrays')  #TODO
        newpop = self.copy()
        newpop.parameters = {
            paramname: np.array(helper.twodarray(paramval)[subscript])
            if paramval.ndim > 0 else
            paramval  #slice as twodarray but keep as ndarray
            for paramname, paramval in newpop.parameters.items()
        }
        newpop.scores = {
            scorename: np.array(helper.twodarray(scoreval)[subscript])
            for scorename, scoreval in newpop.scores.items()
        }
        newpop.days = np.array(helper.twodarray(newpop.days)[subscript])
        if isinstance(subscript, tuple):
            subscript = subscript[
                0]  #since self.persons can only have its rows indexed
        newpop.persons = np.array(helper.twodarray(newpop.persons)[subscript])

        return newpop
예제 #3
0
 def regress_persons(self, x='visual', y='symptom'):
     '''Simple linear regression on each person in self, independently'''
     warn('Deprecated')
     poplist = self.to_populationlist()
     #regress each person
     regresults = poplist.regress_linear(y=y, x=x)
     return regresults  #TODO return as Result not Resultslist
예제 #4
0
    def generate(self, generate_parameters=True):
        if generate_parameters: self.generate_parameters()

        for scorename in self.scores:  #cannot be done by dict comprehension since later dict values depend on previous ones
            self.scores[scorename] = self.generate_from_score_generator(
                scorename).astype(scoretype)

        minvisualscore = np.min(
            self.scores['visual'],
            initial=VMIN)  #initial arg to avoid error of min on empty array
        if minvisualscore < VMIN:
            warn(
                "visual score in {} has min={}, which is below VMIN={}".format(
                    self.title, minvisualscore, VMIN))
        minsymptomscore = np.min(
            self.scores['symptom'],
            initial=SMIN)  #initial arg to avoid error of min on empty array
        if minsymptomscore < SMIN:
            warn("symptom score in {} has  min={}, which is below SMIN={}".
                 format(self.title, minsymptomscore, SMIN))

        #if all parameters are 'population', the generation process will only have created a single row
        #so, repeat that row 'self.npersons' times to create the full matrix
        for scorename in self.scores:
            if self.scores[scorename].shape != self.data_shape and self.scores[
                    scorename].size > 0:  #if score array is wrong shape but nonzero
                self.scores[scorename] = np.broadcast_to(
                    self.scores[scorename],
                    self.data_shape)  #change shape by broadcasting
            elif self.scores[scorename].size == 0:  #if score array is empty
                self.scores[scorename] = self.scores[scorename].reshape(
                    self.data_shape)  #change shape by adding empty axes
예제 #5
0
    def sample(self, population, order, sampling_days):
        smilescores = population.scores[
            self.scorename]  #scores which the method value refers to
        smilescore_lowerbound = get_MIN(self.scorename)

        # Compute the days where the milestones are triggered
        comparison_array = (
            smilescores <= self.value) if self.triggered_by_equal else (
                smilescores < self.value)
        # Compute the days where the milestones are triggered consecutively
        if self.min_triggered == 1:
            pass  #don't change comparison_array
        elif self.min_triggered > 1:
            triggered_in_a_row = np.ones_like(
                comparison_array[:, self.min_triggered - 1:])  #initial
            for start in range(self.min_triggered):
                end = start + 1 - self.min_triggered
                if end == 0: end = None
                triggered_in_a_row = triggered_in_a_row * comparison_array[:,
                                                                           start:
                                                                           end]  # accumulate
            comparison_array[:, self.min_triggered -
                             1:] = triggered_in_a_row  #we only checked when enough days have passed
            comparison_array[:, :self.min_triggered -
                             1] = False  #the rest can't have had enough days in a row

        #only check on or after previous (valid) sample day by
        #setting the comparison values from days 0 to prev sample day (excluding end) to False
        if order > 0:
            for i in range(population.npersons):
                #for getting valid prev day
                for prev_order in range(order - 1, 0 - 1, -1):
                    prev_sample_day = sampling_days[i, prev_order]
                    if prev_sample_day < NDAYS: break
                #for setting days until then as don't consider
                comparison_array[i, :prev_sample_day] = False
        #if it is True on the same day as the previous sample day, the finish_sampling will consider it already_reached

        #the day at which the milestone is reached for each person
        sampling_days_temp = np.argmax(comparison_array, axis=1)
        #the day at which the milestone is reached for each person, inc. 0 for 'never reached'
        sampling_days[:, order] = sampling_days_temp
        #record of which persons reached the milestones
        persons_reached_milestone = np.take_along_axis(
            comparison_array, helper.to_vertical(sampling_days_temp), axis=1)
        #give invalid day to those who didn't reach
        sampling_days[~persons_reached_milestone.flatten(),
                      order] = _UNREACHED_MAGNITUDE
        if not np.all(persons_reached_milestone):
            warn(
                f"There are {(~persons_reached_milestone.flatten()).sum()} who didn't reach their milestone"
            )

        super().finish_sampling(population, order, sampling_days)
예제 #6
0
    def regress_mixed(self, x='visual', y='symptom', random_effect='both'):
        '''Mixed effects linear regression on self, with random intercept and slope
        random_effect can be 'intercept', 'slope', or 'both'
        '''

        # Argument parsing # TODO make into helper function for clutter reduction
        y_possibilities = {'symptom'}  #TODO add more possibilities
        x_possibilities = {'visual'}  #TODO add more possibilities
        if y not in y_possibilities:
            raise ValueError(
                'Dependent variable {} not recognized. Use one of {} instead.'.
                format(y, y_possibilities))
        if x not in x_possibilities:
            raise ValueError(
                'Independent variable {} not recognized. Use one of {} instead.'
                .format(x, x_possibilities))

        df = self.to_dataframe()
        #check for NaN, will decide later if should be dropped when specifying model
        null_count = df.isnull().sum().sum(
        )  #TODO fix: reports 3 times the NaN since counts for each scoretype
        if null_count > 0:
            warn('Population {} has {} NaN values'.format(
                self.title, null_count))
        missing = 'drop'

        #regress
        if random_effect == 'intercept':
            model = smf.mixedlm(f' {y}~{x} ',
                                df,
                                groups=df['person'],
                                missing=missing)
        elif random_effect == 'slope':
            model = smf.mixedlm(f' {y}~{x} ',
                                df,
                                groups=df['person'],
                                re_formula=f' ~{x}+0',
                                missing=missing)
        elif random_effect == 'both':
            model = smf.mixedlm(f' {y}~{x} ',
                                df,
                                groups=df['person'],
                                re_formula=f' ~{x}',
                                missing=missing)
        else:
            raise ValueError(
                f"random_effect of {random_effect} not understood")
        #TODO check notes of https://www.statsmodels.org/stable/generated/statsmodels.formula.api.mixedlm
        result = model.fit()  #fit model

        return RegressionResult(result, self)
예제 #7
0
    def __init__(self,
                 value=None,
                 scorename='symptom',
                 triggered_by_equal=True,
                 min_triggered=1,
                 **kwargs):
        '''
        value: what value triggers this milestone (None means minimum possible given the scorename)
        scorename: which score the value refers to
        triggered_by_equal: if True, use <= for trigger, if False, use < for trigger
        min_triggered: the number of days in a row to fulfill the condition when sampling
        kwargs: passed to parent class
        '''
        super().__init__(name='magnitude', **kwargs)

        #check and set scorename
        if scorename not in {'symptom', 'visual', 'symptom_noerror'}:
            raise ValueError(f"scorename of {scorename} not understood")
        self.scorename = scorename

        #check and set triggered_by_equal
        if not isinstance(triggered_by_equal, bool):
            raise TypeError(
                f"triggred_by_equal of {triggered_by_equal} should be a boolean"
            )
        self.triggered_by_equal = triggered_by_equal

        #check and set value
        if value is None:
            value = get_MIN(self.scorename)
        if self.triggered_by_equal:
            if value < get_MIN(self.scorename):
                warn(
                    f"value of {value} may be unobtainable since it is smaller than "
                    f"{self.scorename}'s min of MIN of {get_MIN(self.scorename)}"
                )
        else:
            if value <= get_MIN(self.scorename):
                warn(
                    f"value of {value} may be unobtainable since it is smaller or equal to "
                    f"{self.scorename}'s MIN of {get_MIN(self.scorename)}")
        self.value = value

        #check and set min_triggered
        if not isinstance(min_triggered, int) or min_triggered < 1:
            raise TypeError(
                f"min_triggered of {min_triggered} should be an int of at least 1"
            )
        self.min_triggered = min_triggered
예제 #8
0
 def generate_from_score_generator(self, scorename):
     '''scorename is either visual, symptom_noerror, or error'''
     try:
         func = self.function_generators[scorename]
     except KeyError as e:
         warn(
             "There is no generator for '{}' score attached to this Population."
             .format(scorename))
         func = lambda: np.array([])
     paramnames = func.__code__.co_varnames
     paramvals = []
     for paramname in paramnames:
         #reserved parameters
         if paramname == 't' or paramname == 'day':
             paramvals.append(self.days)
         elif paramname == 'v' or paramname == 'visual':
             paramvals.append(self.scores['visual'])
         elif paramname == 's' or paramname == 'symptom_noerror':
             paramvals.append(self.scores['symptom_noerror'])
         #custom parameters
         else:
             paramvals.append(self.parameters[paramname])
     return func(*paramvals)
예제 #9
0
    def sample(self, population, order, sampling_days):
        smilescores = population.scores[
            self.scorename]  #scores which the method ratio refers to
        smilescore_lowerbound = get_MIN(self.scorename)

        #get and check index days
        if isinstance(self.index, int):
            index_days = np.full((population.npersons, ), self.index)
        elif isinstance(self.index, tuple):
            prev_sampling_days = sampling_days[:, :order]
            index_days = prev_sampling_days[:, self.day[1]]
        elif callable(self.index):
            prev_sampling_days = sampling_days[:, :order]
            #TODO check if int not outside NDAYS, FIRSTVISIT, LASTVISIT
            index_days = self.index((population.npersons, ),
                                    prev_sampling_days)

        # Compute the scores which will trigger milestones
        smilescores_at_index = np.take_along_axis(
            smilescores, helper.to_vertical(index_days), axis=1)  #column array
        smile_vals = (smilescores_at_index - smilescore_lowerbound
                      ) * self.ratio + smilescore_lowerbound  #column array

        # Compute the days where the milestones are triggered
        comparison_array = (
            smilescores <= smile_vals) if self.triggered_by_equal else (
                smilescores < smile_vals)
        # Compute the days where the milestones are triggered consecutively
        if self.min_triggered == 1:
            pass  #don't change comparison_array
        elif self.min_triggered > 1:
            triggered_in_a_row = np.ones_like(
                comparison_array[:, self.min_triggered - 1:])  #initial
            for start in range(self.min_triggered):
                end = start + 1 - self.min_triggered
                if end == 0: end = None
                triggered_in_a_row = triggered_in_a_row * comparison_array[:,
                                                                           start:
                                                                           end]  # accumulate
            comparison_array[:, self.min_triggered -
                             1:] = triggered_in_a_row  #we only checked when enough days have passed
            comparison_array[:, :self.min_triggered -
                             1] = False  #the rest can't have had enough days in a row

        #only check on or after previous sample day by
        #setting the comparison values from days 0 to prev sample day (excluding end) to False
        for i in range(population.npersons):
            comparison_array[i, :sampling_days[i, order - 1]] = False
        #if it is True on the same day as the previous sample day, the finish_sampling will consider it already_reached

        #the day at which the milestone is reached for each person
        sampling_days_temp = np.argmax(comparison_array, axis=1)
        #the day at which the milestone is reached for each person, inc. 0 for 'never reached'
        sampling_days[:, order] = sampling_days_temp

        #record of which persons actually reached the milestones
        persons_reached_milestone = np.take_along_axis(
            comparison_array, helper.to_vertical(sampling_days_temp), axis=1)
        #give invalid day to those who didn't reach
        sampling_days[~persons_reached_milestone.flatten(),
                      order] = _UNREACHED_SMILE
        if not np.all(persons_reached_milestone):
            warn(
                f"There are {(~persons_reached_milestone.flatten()).sum()} who didn't reach their milestone"
            )

        super().finish_sampling(population, order, sampling_days)
예제 #10
0
    def __init__(self,
                 index=FIRSTVISIT,
                 ratio=0.5,
                 scorename='symptom',
                 triggered_by_equal=True,
                 min_triggered=1,
                 **kwargs):
        '''
        index: int of the day or 2-tuple where the first entry is the string 'sample'
            and the second entry determines which previous sample to reference (nonzero int)
        ratio: what ratio triggers this smile milestone, between 0 and 1 for useful results
        scorename: which score the ratio refers to
        triggered_by_equal: if True, use <= for trigger, if False, use < for trigger
        min_triggered: the number of days in a row to fulfill the condition when sampling
        kwargs: passed to parent class
        '''
        super().__init__(name='smile', **kwargs)

        #check parameters

        #check index
        if isinstance(index, int):
            pass
        elif isinstance(index, tuple):  #check if refers to previous sample
            if len(index) == 2 and index[0] == 'sample':
                if not isinstance(index[1], int):
                    raise TypeError(
                        f"index reference has value {index[1]} which is not an int"
                    )
            else:
                raise ValueError(
                    f"index tuple of {index} is defined wrong. "
                    "It should have length 2 and it's first value should be the string 'sample'"
                )
        elif callable(index):
            if index.__code__.co_varnames != ('shape', 'prev_sampling_days'):
                raise ValueError(
                    "The function for index day generation should only have 'shape' and 'prev_sampling_days' as an argument."
                )
        else:
            raise TypeError(
                f"index of {index} is of type {type(index)}, which is not int, tuple, or a callable"
            )
        self.index = index

        #check and set ratio
        if not (0 < ratio < 1):
            warn(f"ratio of {ratio} may be unobtainable.")
        self.ratio = ratio

        #check and set scorename
        if scorename not in {'symptom', 'visual', 'symptom_noerror'}:
            raise ValueError(f"scorename of {scorename} not understood")
        self.scorename = scorename

        #check and set triggered_by_equal
        if not isinstance(triggered_by_equal, bool):
            raise TypeError(
                f"triggred_by_equal of {triggered_by_equal} should be a boolean"
            )
        self.triggered_by_equal = triggered_by_equal

        #check and set min_triggered
        if not isinstance(min_triggered, int) or min_triggered < 1:
            raise TypeError(
                f"min_triggered of {min_triggered} should be an int of at least 1"
            )
        self.min_triggered = min_triggered
예제 #11
0
    def plot(self,
             ax,
             ndays=None,
             npersons=None,
             x='day',
             y='symptom',
             viztype='lines',
             vizcolor='person'):
        #x and y are either 'day' or a scorename (either 'visual', 'symptom_noerror', or 'symptom')
        #viztype is either 'lines', 'points', or 'both'
        #vizcolor is either 'person' (each person is a color) or 'day' (each day is a color)
        #TODO raise exception for non-existant viztype

        if ndays is None: ndays = self.ndays
        if npersons is None: npersons = self.npersons

        if (viztype == 'lines' or viztype == 'both') and vizcolor == 'day':
            warn('vizcolor of "day" can only be applied to points, not lines')
        if vizcolor not in {'person', 'day'}:
            raise ValueError()
        if viztype not in {'lines', 'points', 'both'}:
            raise ValueError()

        #abscissas
        if x == 'day':
            xlabel = 'days since concussion'
            ax.xaxis.set_minor_locator(MultipleLocator(10))
            x = self.days[:npersons, :ndays]
        elif x in self.scores:
            xlabel = x + ' scores'
            ax.xaxis.set_minor_locator(MultipleLocator(1))
            x = self.scores[x][:npersons, :ndays]
        else:
            raise ValueError()

        #ordinates
        if y == 'day':
            x = self.days[:npersons, :ndays]
            ylabel = 'days since concussion'
        elif y in self.scores:
            ylabel = y + ' scores'
            y = self.scores[y][:npersons, :ndays]
        else:
            raise ValueError()

        #titles and labels
        ax.set_title(self.title, wrap=True)
        ax.set(xlabel=xlabel, ylabel=ylabel)

        #plotting
        #lines
        if viztype == 'lines' or viztype == 'both':
            points = np.stack([x, y], axis=2)
            colors = mpl.cm.get_cmap(
                lines_cmap_name
            ).colors  # https://matplotlib.org/2.0.1/users/colormaps.html
            ax.add_collection(LineCollection(points, colors=colors))
        #points
        if viztype == 'points' or viztype == 'both':
            if vizcolor == 'person':
                colors = np.array(mpl.cm.get_cmap(
                    lines_cmap_name).colors)  # not the right shape
                colors = helper.rgblist_to_rgbapop(colors, npersons, ndays)
                colors = colors.reshape(
                    npersons * ndays, 4
                )  #scatter converts the 2d arrays x and y to flat arrays, and colors should respect that flatness
            elif vizcolor == 'day':
                colors = self.days[:npersons, :ndays]
                cmap = mpl.cm.get_cmap(
                    points_cmap_name
                )  # https://matplotlib.org/2.0.1/users/colormaps.html
                colors = cmap(
                    helper.normalize(colors))  # converts scalars to rgba
                colors = colors.reshape(
                    npersons * ndays, 4
                )  #scatter converts the 2d arrays x and y to flat arrays, and colors should respect that flatness
            else:
                raise ValueError("vizcolor of '{}' unknown".format(vizcolor))
            ax.scatter(x, y, facecolors='none', edgecolors=colors)

        ax.autoscale()
예제 #12
0
 def set_score_generator(self, scorename, func):
     '''scorename is either visual, symptom_noerror, or symptom'''
     if scorename not in self.scores:
         warn("Scorename '{}' not known. Known options are: {}".format(
             scorename, self.scores.keys()))
     self.function_generators[scorename] = func