def retractOnWorld(cogstate,worldname,proposition):
	"""
	INPUT: a triple (cognitive state, worldname, proposition)
	OUTPUT: array of worlds
	WHAT IT DOES: Definition 4 (i)
	"""
	result=[]
	world=getWorldByName(worldname,cogstate)
	for situation in sitgen(world): # s
		if Forceable(situation, proposition, cogstate):
			continue # s may not force P
		adding=False
		for basis in getAllBases(world,cogstate): # s'
			if not subset(situation,basis):
				continue # s is has to be a subset of s'
			Maximal=True
			for t in subsitgen(basis):
				if Forceable(situation, proposition, cogstate):
					continue # t may not force P
				if subset(situation,t):
					if situation != t:
						Maximal=False
			if not Maximal:
				continue # s should be a maximal subset of s'
			adding=True
		if adding:
			result.append(situation)
	return result
def Forceable(situation, proposition, cogstate):
    """
	INPUT: a triple (situation, proposition, cognitive state)
	OUTPUT: a Boolean
	WHAT IT DOES: Decides whether the given situation forces
	the proposition in the cognitive state.
	"""
    for world in cogstate:
        if world[meta][US]:
            if subset(situation, world):
                if world[meta][name] not in proposition:
                    return False
    return True
def retractOnState(cogstate,proposition):
	"""
	INPUT: a pair (cognitive state, proposition)
	OUTPUT: cognitive state
	WHAT IT DOES: Definition 4 (ii)
	"""
	result=[]
	for world in cogstate:
		newworld={}  # do not shoot ourselves in the foot
		newworld["values"]=dict(world["values"])
		newworld["meta"]=dict(world["meta"])
		addingToFS=False
		if world["meta"]["US"]:
			for biworld in cogstate:
				if biworld["meta"]["FS"]:
					biretract=retractOnWorld(cogstate,biworld["meta"]["name"],proposition)
					for s in biretract:
						if subset(s,world):
							addingToFS=True
		newworld["meta"]["FS"]=addingToFS
		result.append(dict(newworld))
	return result
Ejemplo n.º 4
0
def knock_out_leaving_surveys(data, y, se, key, year, survey_span, num_surveys, prop, survey_date):
    """
    Knock out values of y and se in data so that the remaining data simulates a set 
    of surveys. More specifically, in each level of key multiple surveys are generated by
    randomly selecting a year to conduct each survey and then marking that year and a number 
    of previous years determined by survey_span to leave in the dataset.
    
    Parameters
    ----------
    data : ndarray
        A structured NumPy array. Should probably not have any missing values
        or else strange behavior will ensue
    y : string
        The label for the response variable in data
    se : string
        The label for the standard error variable in data
    key : string or list of strings
        The labels for the variables in data that will define separate levels 
        for the knock out scheme.
    year : string
        The label for the year in data
    survey_span : int
        The number of years that each survey covers
    num_surveys : int 
        The number of surveys in each country
    prop : float
        Proportion of countries to apply the knock out design. 0 <= prop <= 1
    survey_date : string
        The name of a variable to be added to data that contains the year each survey
        was conducted so that surveys can be distinguished within a given level of the key.
        This comes in handy if you want to specify correlated noise among surveys in your noiser.

    Returns
    -------
    ko_data : ndarray
        The same as data except with values of y and se knocked out

    Notes
    -----
    In this framework, multiple observations of the same data points cannot be generated
    """

    ko_data = copy.copy(data)

    ko_data = utilities.add_unique_id(ko_data, key, 'unique_id_for_ko')

    r = np.where(np.arange(1.,len(np.unique(ko_data['unique_id_for_ko']))+1.) <= len(np.unique(ko_data['unique_id_for_ko']))*prop, True, False)
        
    if type(r.tolist()) != type(True):
        random.shuffle(r)
    else:
        r = [r]

    should_be_kept = {}
    survey_date_dict = {}
    for i, id in enumerate(np.unique(ko_data['unique_id_for_ko'])):
        ko_data_i = utilities.subset(ko_data, 'unique_id_for_ko', id)
        
        should_be_kept[id] = []
        survey_date_dict[id] = []
        if r[i] == True:
            for s in range(0, num_surveys):
                survey_year_index = random.choice(range(0,len(ko_data_i[year])))
                for j in range(survey_year_index-survey_span, survey_year_index): 
                    if (j in range(0, len(ko_data_i[year]))) == True:
                        should_be_kept[id].append(ko_data_i[year][j])
                        survey_date_dict[id].append(ko_data_i[year][survey_year_index])
        else:
            for j in range(0, len(ko_data_i[year])):
                should_be_kept[id].append(ko_data_i[year][j])
                survey_date_dict[id].append(np.nan)

    survey_date_list = [np.nan]*len(ko_data[y])
    for i in range(0, len(ko_data[y])):
        id = ko_data['unique_id_for_ko'][i]
        yr = ko_data[year][i]

        for j, kept_yr in enumerate(should_be_kept[id]):
            if kept_yr == yr:
                survey_date_list[i] = survey_date_dict[id][j]
                break

        if utilities.is_nan(survey_date_list[i]) == True:
            ko_data[y][i] = np.nan
            ko_data[se][i] = np.nan    
    
    ko_data = numpy.lib.recfunctions.append_fields(ko_data, survey_date, np.array(survey_date_list))
        
    ko_data = numpy.lib.recfunctions.drop_fields(ko_data, 'unique_id_for_ko')
    
    return ko_data
Ejemplo n.º 5
0
def knock_out_cluster_unit(data, y, se, cluster='iso3', unit='year', prop=.2, design='random'):
    """
    Within levels defined by the cluster variable, knock out a proportion of
    units in data by replacing values of the variable y.
 
    Parameters
    ----------
    data : ndarray
        A structured array.
    y : string
        A label of variable in data that corresponds to the response variable to be knocked out
    se : string
        A label of variable in data that corresponds to the standard error variable to be knocked out        
    cluster : string or list of strings
        A field or list of fields in self.data (e.g. 'iso3' or \"['iso3','age']\"). The knock out scheme is applied separately to
        levels defined by cluster.
    unit : string
        A field in self.data. The unit of the data to knock out. Unit should not have multiple values
        with in levels of cluster.
    proportion : float
        The proportion of data to knock out.
    design : string
        If 'random', then a proportion of data is knocked out randomally.
        If 'first', then the first proportion of data is knocked out and
        analagously for last.

    Examples
    --------
    >>> dtype = [('iso3','|S4'),('year','<i4'),('y','<f4'),('se','<f4')]
    >>> data = np.array([('USA',1990,1,.1),('USA',1991,2,.2),('CAN',1990,3,.3),('CAN',1991,4,.4)], dtype=dtype)
    >>> ko_data = knock_out_cluster_unit(data,'y','se','iso3','year',.5,'first')
    >>> utilities.is_nan(ko_data['y'][0])
    True
    >>> utilities.is_nan(ko_data['y'][1])
    False
    >>> utilities.is_nan(ko_data['y'][2])
    True
    >>> utilities.is_nan(ko_data['y'][3])
    False

    >>> utilities.is_nan(ko_data['se'][0])
    True
    >>> utilities.is_nan(ko_data['se'][1])
    False
    >>> utilities.is_nan(ko_data['se'][2])
    True
    >>> utilities.is_nan(ko_data['se'][3])
    False

    # Check to see that original data has not been changed
    >>> utilities.is_nan(data['y'][0])
    False
    """

    data = copy.copy(data)
    
    data_cluster = {}
    if cluster == '':
        data_cluster[''] = data
    else:
        if len(cluster[0]) >= 2:
            data = utilities.add_unique_id(data, cluster, 'knockerouters_unique_cluster_id')
            cluster = 'knockerouters_unique_cluster_id'
               
        for level in np.unique(data[cluster]):
            data_cluster[level] = utilities.subset(data, cluster, level)

    for key in data_cluster.keys():

        candidates = []
        for i, val in enumerate(data_cluster[key][y]):
            if utilities.is_nan(val) == False:
                candidates.append(i)

        should_be_knocked_out = {}

        r = np.where(np.arange(1.,len(candidates)+1.) <= len(candidates)*prop, True, False)
        
        if type(r.tolist()) != type(True):
            random.shuffle(r)
        else:
            r = [r]
                
        for index, i in enumerate(candidates):
            level = data_cluster[key][unit][i]

            if design == 'random':
                should_be_knocked_out[level] = r[index]
            elif design == 'first':
                should_be_knocked_out[level] = (float(i+1)/len(candidates)) <= prop
            elif design == 'last':
                should_be_knocked_out[level] = (float(i+1)/len(candidates)) >= (1-prop)
        
        for i, level in enumerate(data[unit]):
            if (level in should_be_knocked_out.keys()) == True:
                if cluster == '':
                    if should_be_knocked_out[level] == True:
                        data[y][i] = np.nan
                        data[se][i] = np.nan
                else:
                    if should_be_knocked_out[level] == True and data[cluster][i] == key: 
                        data[y][i] = np.nan
                        data[se][i] = np.nan
                        
    if cluster == 'knockerouters_unique_cluster_id':
        data = numpy.lib.recfunctions.drop_fields(data, 'knockerouters_unique_cluster_id')

    return data