def set_alreadyreached(self, population, order, sampling_days, is_same_day_as_prev): ''' For those who have multiple samplings at the same day ''' already_reached = is_same_day_as_prev[:, order] if np.any(already_reached): warn( f"There are {already_reached.sum()} who had already reached their milestone" ) if self.if_reached == 'same': #fast forwards new sample to previous sample sampling_days[:, order] = np.where(already_reached, sampling_days[:, order - 1], sampling_days[:, order]) if self.if_reached == 'NaN': #will be masked with fill_value = NaN sampling_days[:, order] = np.where(already_reached, _ALREADYREACHED, sampling_days[:, order]) if self.if_reached == 'raise': if np.any(already_reached): raise ValueError( "Patient was already here when he arrived for his prev sample" ) #remember how many triggered if_reached population.sampling_summary['if_reached'].append( (np.sum(already_reached), self.if_reached))
def __getitem__( self, subscript): #for slicing like a numpy 2d array of (persons, days) ''' Returns a new population by slicing the days and scores as specified (in a numpy-like fashion) keeping arrays two-dimensional ''' if isinstance(self.days, np.ma.masked_array): warn('slicing converts masks to arrays') #TODO newpop = self.copy() newpop.parameters = { paramname: np.array(helper.twodarray(paramval)[subscript]) if paramval.ndim > 0 else paramval #slice as twodarray but keep as ndarray for paramname, paramval in newpop.parameters.items() } newpop.scores = { scorename: np.array(helper.twodarray(scoreval)[subscript]) for scorename, scoreval in newpop.scores.items() } newpop.days = np.array(helper.twodarray(newpop.days)[subscript]) if isinstance(subscript, tuple): subscript = subscript[ 0] #since self.persons can only have its rows indexed newpop.persons = np.array(helper.twodarray(newpop.persons)[subscript]) return newpop
def regress_persons(self, x='visual', y='symptom'): '''Simple linear regression on each person in self, independently''' warn('Deprecated') poplist = self.to_populationlist() #regress each person regresults = poplist.regress_linear(y=y, x=x) return regresults #TODO return as Result not Resultslist
def generate(self, generate_parameters=True): if generate_parameters: self.generate_parameters() for scorename in self.scores: #cannot be done by dict comprehension since later dict values depend on previous ones self.scores[scorename] = self.generate_from_score_generator( scorename).astype(scoretype) minvisualscore = np.min( self.scores['visual'], initial=VMIN) #initial arg to avoid error of min on empty array if minvisualscore < VMIN: warn( "visual score in {} has min={}, which is below VMIN={}".format( self.title, minvisualscore, VMIN)) minsymptomscore = np.min( self.scores['symptom'], initial=SMIN) #initial arg to avoid error of min on empty array if minsymptomscore < SMIN: warn("symptom score in {} has min={}, which is below SMIN={}". format(self.title, minsymptomscore, SMIN)) #if all parameters are 'population', the generation process will only have created a single row #so, repeat that row 'self.npersons' times to create the full matrix for scorename in self.scores: if self.scores[scorename].shape != self.data_shape and self.scores[ scorename].size > 0: #if score array is wrong shape but nonzero self.scores[scorename] = np.broadcast_to( self.scores[scorename], self.data_shape) #change shape by broadcasting elif self.scores[scorename].size == 0: #if score array is empty self.scores[scorename] = self.scores[scorename].reshape( self.data_shape) #change shape by adding empty axes
def sample(self, population, order, sampling_days): smilescores = population.scores[ self.scorename] #scores which the method value refers to smilescore_lowerbound = get_MIN(self.scorename) # Compute the days where the milestones are triggered comparison_array = ( smilescores <= self.value) if self.triggered_by_equal else ( smilescores < self.value) # Compute the days where the milestones are triggered consecutively if self.min_triggered == 1: pass #don't change comparison_array elif self.min_triggered > 1: triggered_in_a_row = np.ones_like( comparison_array[:, self.min_triggered - 1:]) #initial for start in range(self.min_triggered): end = start + 1 - self.min_triggered if end == 0: end = None triggered_in_a_row = triggered_in_a_row * comparison_array[:, start: end] # accumulate comparison_array[:, self.min_triggered - 1:] = triggered_in_a_row #we only checked when enough days have passed comparison_array[:, :self.min_triggered - 1] = False #the rest can't have had enough days in a row #only check on or after previous (valid) sample day by #setting the comparison values from days 0 to prev sample day (excluding end) to False if order > 0: for i in range(population.npersons): #for getting valid prev day for prev_order in range(order - 1, 0 - 1, -1): prev_sample_day = sampling_days[i, prev_order] if prev_sample_day < NDAYS: break #for setting days until then as don't consider comparison_array[i, :prev_sample_day] = False #if it is True on the same day as the previous sample day, the finish_sampling will consider it already_reached #the day at which the milestone is reached for each person sampling_days_temp = np.argmax(comparison_array, axis=1) #the day at which the milestone is reached for each person, inc. 0 for 'never reached' sampling_days[:, order] = sampling_days_temp #record of which persons reached the milestones persons_reached_milestone = np.take_along_axis( comparison_array, helper.to_vertical(sampling_days_temp), axis=1) #give invalid day to those who didn't reach sampling_days[~persons_reached_milestone.flatten(), order] = _UNREACHED_MAGNITUDE if not np.all(persons_reached_milestone): warn( f"There are {(~persons_reached_milestone.flatten()).sum()} who didn't reach their milestone" ) super().finish_sampling(population, order, sampling_days)
def regress_mixed(self, x='visual', y='symptom', random_effect='both'): '''Mixed effects linear regression on self, with random intercept and slope random_effect can be 'intercept', 'slope', or 'both' ''' # Argument parsing # TODO make into helper function for clutter reduction y_possibilities = {'symptom'} #TODO add more possibilities x_possibilities = {'visual'} #TODO add more possibilities if y not in y_possibilities: raise ValueError( 'Dependent variable {} not recognized. Use one of {} instead.'. format(y, y_possibilities)) if x not in x_possibilities: raise ValueError( 'Independent variable {} not recognized. Use one of {} instead.' .format(x, x_possibilities)) df = self.to_dataframe() #check for NaN, will decide later if should be dropped when specifying model null_count = df.isnull().sum().sum( ) #TODO fix: reports 3 times the NaN since counts for each scoretype if null_count > 0: warn('Population {} has {} NaN values'.format( self.title, null_count)) missing = 'drop' #regress if random_effect == 'intercept': model = smf.mixedlm(f' {y}~{x} ', df, groups=df['person'], missing=missing) elif random_effect == 'slope': model = smf.mixedlm(f' {y}~{x} ', df, groups=df['person'], re_formula=f' ~{x}+0', missing=missing) elif random_effect == 'both': model = smf.mixedlm(f' {y}~{x} ', df, groups=df['person'], re_formula=f' ~{x}', missing=missing) else: raise ValueError( f"random_effect of {random_effect} not understood") #TODO check notes of https://www.statsmodels.org/stable/generated/statsmodels.formula.api.mixedlm result = model.fit() #fit model return RegressionResult(result, self)
def __init__(self, value=None, scorename='symptom', triggered_by_equal=True, min_triggered=1, **kwargs): ''' value: what value triggers this milestone (None means minimum possible given the scorename) scorename: which score the value refers to triggered_by_equal: if True, use <= for trigger, if False, use < for trigger min_triggered: the number of days in a row to fulfill the condition when sampling kwargs: passed to parent class ''' super().__init__(name='magnitude', **kwargs) #check and set scorename if scorename not in {'symptom', 'visual', 'symptom_noerror'}: raise ValueError(f"scorename of {scorename} not understood") self.scorename = scorename #check and set triggered_by_equal if not isinstance(triggered_by_equal, bool): raise TypeError( f"triggred_by_equal of {triggered_by_equal} should be a boolean" ) self.triggered_by_equal = triggered_by_equal #check and set value if value is None: value = get_MIN(self.scorename) if self.triggered_by_equal: if value < get_MIN(self.scorename): warn( f"value of {value} may be unobtainable since it is smaller than " f"{self.scorename}'s min of MIN of {get_MIN(self.scorename)}" ) else: if value <= get_MIN(self.scorename): warn( f"value of {value} may be unobtainable since it is smaller or equal to " f"{self.scorename}'s MIN of {get_MIN(self.scorename)}") self.value = value #check and set min_triggered if not isinstance(min_triggered, int) or min_triggered < 1: raise TypeError( f"min_triggered of {min_triggered} should be an int of at least 1" ) self.min_triggered = min_triggered
def generate_from_score_generator(self, scorename): '''scorename is either visual, symptom_noerror, or error''' try: func = self.function_generators[scorename] except KeyError as e: warn( "There is no generator for '{}' score attached to this Population." .format(scorename)) func = lambda: np.array([]) paramnames = func.__code__.co_varnames paramvals = [] for paramname in paramnames: #reserved parameters if paramname == 't' or paramname == 'day': paramvals.append(self.days) elif paramname == 'v' or paramname == 'visual': paramvals.append(self.scores['visual']) elif paramname == 's' or paramname == 'symptom_noerror': paramvals.append(self.scores['symptom_noerror']) #custom parameters else: paramvals.append(self.parameters[paramname]) return func(*paramvals)
def sample(self, population, order, sampling_days): smilescores = population.scores[ self.scorename] #scores which the method ratio refers to smilescore_lowerbound = get_MIN(self.scorename) #get and check index days if isinstance(self.index, int): index_days = np.full((population.npersons, ), self.index) elif isinstance(self.index, tuple): prev_sampling_days = sampling_days[:, :order] index_days = prev_sampling_days[:, self.day[1]] elif callable(self.index): prev_sampling_days = sampling_days[:, :order] #TODO check if int not outside NDAYS, FIRSTVISIT, LASTVISIT index_days = self.index((population.npersons, ), prev_sampling_days) # Compute the scores which will trigger milestones smilescores_at_index = np.take_along_axis( smilescores, helper.to_vertical(index_days), axis=1) #column array smile_vals = (smilescores_at_index - smilescore_lowerbound ) * self.ratio + smilescore_lowerbound #column array # Compute the days where the milestones are triggered comparison_array = ( smilescores <= smile_vals) if self.triggered_by_equal else ( smilescores < smile_vals) # Compute the days where the milestones are triggered consecutively if self.min_triggered == 1: pass #don't change comparison_array elif self.min_triggered > 1: triggered_in_a_row = np.ones_like( comparison_array[:, self.min_triggered - 1:]) #initial for start in range(self.min_triggered): end = start + 1 - self.min_triggered if end == 0: end = None triggered_in_a_row = triggered_in_a_row * comparison_array[:, start: end] # accumulate comparison_array[:, self.min_triggered - 1:] = triggered_in_a_row #we only checked when enough days have passed comparison_array[:, :self.min_triggered - 1] = False #the rest can't have had enough days in a row #only check on or after previous sample day by #setting the comparison values from days 0 to prev sample day (excluding end) to False for i in range(population.npersons): comparison_array[i, :sampling_days[i, order - 1]] = False #if it is True on the same day as the previous sample day, the finish_sampling will consider it already_reached #the day at which the milestone is reached for each person sampling_days_temp = np.argmax(comparison_array, axis=1) #the day at which the milestone is reached for each person, inc. 0 for 'never reached' sampling_days[:, order] = sampling_days_temp #record of which persons actually reached the milestones persons_reached_milestone = np.take_along_axis( comparison_array, helper.to_vertical(sampling_days_temp), axis=1) #give invalid day to those who didn't reach sampling_days[~persons_reached_milestone.flatten(), order] = _UNREACHED_SMILE if not np.all(persons_reached_milestone): warn( f"There are {(~persons_reached_milestone.flatten()).sum()} who didn't reach their milestone" ) super().finish_sampling(population, order, sampling_days)
def __init__(self, index=FIRSTVISIT, ratio=0.5, scorename='symptom', triggered_by_equal=True, min_triggered=1, **kwargs): ''' index: int of the day or 2-tuple where the first entry is the string 'sample' and the second entry determines which previous sample to reference (nonzero int) ratio: what ratio triggers this smile milestone, between 0 and 1 for useful results scorename: which score the ratio refers to triggered_by_equal: if True, use <= for trigger, if False, use < for trigger min_triggered: the number of days in a row to fulfill the condition when sampling kwargs: passed to parent class ''' super().__init__(name='smile', **kwargs) #check parameters #check index if isinstance(index, int): pass elif isinstance(index, tuple): #check if refers to previous sample if len(index) == 2 and index[0] == 'sample': if not isinstance(index[1], int): raise TypeError( f"index reference has value {index[1]} which is not an int" ) else: raise ValueError( f"index tuple of {index} is defined wrong. " "It should have length 2 and it's first value should be the string 'sample'" ) elif callable(index): if index.__code__.co_varnames != ('shape', 'prev_sampling_days'): raise ValueError( "The function for index day generation should only have 'shape' and 'prev_sampling_days' as an argument." ) else: raise TypeError( f"index of {index} is of type {type(index)}, which is not int, tuple, or a callable" ) self.index = index #check and set ratio if not (0 < ratio < 1): warn(f"ratio of {ratio} may be unobtainable.") self.ratio = ratio #check and set scorename if scorename not in {'symptom', 'visual', 'symptom_noerror'}: raise ValueError(f"scorename of {scorename} not understood") self.scorename = scorename #check and set triggered_by_equal if not isinstance(triggered_by_equal, bool): raise TypeError( f"triggred_by_equal of {triggered_by_equal} should be a boolean" ) self.triggered_by_equal = triggered_by_equal #check and set min_triggered if not isinstance(min_triggered, int) or min_triggered < 1: raise TypeError( f"min_triggered of {min_triggered} should be an int of at least 1" ) self.min_triggered = min_triggered
def plot(self, ax, ndays=None, npersons=None, x='day', y='symptom', viztype='lines', vizcolor='person'): #x and y are either 'day' or a scorename (either 'visual', 'symptom_noerror', or 'symptom') #viztype is either 'lines', 'points', or 'both' #vizcolor is either 'person' (each person is a color) or 'day' (each day is a color) #TODO raise exception for non-existant viztype if ndays is None: ndays = self.ndays if npersons is None: npersons = self.npersons if (viztype == 'lines' or viztype == 'both') and vizcolor == 'day': warn('vizcolor of "day" can only be applied to points, not lines') if vizcolor not in {'person', 'day'}: raise ValueError() if viztype not in {'lines', 'points', 'both'}: raise ValueError() #abscissas if x == 'day': xlabel = 'days since concussion' ax.xaxis.set_minor_locator(MultipleLocator(10)) x = self.days[:npersons, :ndays] elif x in self.scores: xlabel = x + ' scores' ax.xaxis.set_minor_locator(MultipleLocator(1)) x = self.scores[x][:npersons, :ndays] else: raise ValueError() #ordinates if y == 'day': x = self.days[:npersons, :ndays] ylabel = 'days since concussion' elif y in self.scores: ylabel = y + ' scores' y = self.scores[y][:npersons, :ndays] else: raise ValueError() #titles and labels ax.set_title(self.title, wrap=True) ax.set(xlabel=xlabel, ylabel=ylabel) #plotting #lines if viztype == 'lines' or viztype == 'both': points = np.stack([x, y], axis=2) colors = mpl.cm.get_cmap( lines_cmap_name ).colors # https://matplotlib.org/2.0.1/users/colormaps.html ax.add_collection(LineCollection(points, colors=colors)) #points if viztype == 'points' or viztype == 'both': if vizcolor == 'person': colors = np.array(mpl.cm.get_cmap( lines_cmap_name).colors) # not the right shape colors = helper.rgblist_to_rgbapop(colors, npersons, ndays) colors = colors.reshape( npersons * ndays, 4 ) #scatter converts the 2d arrays x and y to flat arrays, and colors should respect that flatness elif vizcolor == 'day': colors = self.days[:npersons, :ndays] cmap = mpl.cm.get_cmap( points_cmap_name ) # https://matplotlib.org/2.0.1/users/colormaps.html colors = cmap( helper.normalize(colors)) # converts scalars to rgba colors = colors.reshape( npersons * ndays, 4 ) #scatter converts the 2d arrays x and y to flat arrays, and colors should respect that flatness else: raise ValueError("vizcolor of '{}' unknown".format(vizcolor)) ax.scatter(x, y, facecolors='none', edgecolors=colors) ax.autoscale()
def set_score_generator(self, scorename, func): '''scorename is either visual, symptom_noerror, or symptom''' if scorename not in self.scores: warn("Scorename '{}' not known. Known options are: {}".format( scorename, self.scores.keys())) self.function_generators[scorename] = func