def setUp(self): self.path = './tests/' # self.path = '' project_name = 'Example Data (A)' # Load Example Data (A) data and meta into self name_data = '%s.csv' % (project_name) path_data = '%s%s' % (self.path, name_data) self.data = pd.read_csv(path_data) name_meta = '%s.json' % (project_name) path_meta = '%s%s' % (self.path, name_meta) self.meta = load_json(path_meta) # Variables by type for Example Data A self.dk = 'Example Data (A)' self.fk = 'no_filter' self.single = ['gender', 'locality', 'ethnicity', 'religion', 'q1'] self.delimited_set = ['q2', 'q3', 'q8', 'q9'] self.q5 = ['q5_1', 'q5_2', 'q5_3', 'q5_4', 'q5_5', 'q5_6'] self.x_vars = self.q5 self.y_vars = ['@', 'gender', 'locality', 'q2', 'q3'] self.views = ['cbase', 'counts'] self.weights = [None, 'weight_a'] self.text_key = 'en-GB' self.stack = get_stack(self, self.meta, self.data, self.x_vars, self.y_vars, self.views, self.weights)
def setUp(self): self.path = './tests/' # self.path = '' project_name = 'Example Data (A)' # Load Example Data (A) data and meta into self name_data = '%s.csv' % (project_name) path_data = '%s%s' % (self.path, name_data) self.example_data_A_data = pd.read_csv(path_data) self.example_data_A_data = dataframe_fix_string_types( self.example_data_A_data) name_meta = '%s.json' % (project_name) path_meta = '%s%s' % (self.path, name_meta) self.example_data_A_meta = load_json(path_meta) # Variables by type for Example Data A self.int = [ 'record_number', 'unique_id', 'age', 'birth_day', 'birth_month' ] self.float = ['weight', 'weight_a', 'weight_b'] self.single = ['gender', 'locality', 'ethnicity', 'religion', 'q1'] self.delimited_set = ['q2', 'q3', 'q8', 'q9'] self.string = ['q8a', 'q9a'] self.date = ['start_time', 'end_time'] self.time = ['duration'] self.array = ['q5', 'q6', 'q7'] # The minimum list of variables required to populate a stack with all single*delimited set variations self.minimum = ['Wave', 'ethnicity', 'q2', 'gender'] # Set up the expected weight iterations self.weights = [None, 'weight_a'] # # Set up example stack # self.setup_stack_Example_Data_A() # Set up the net views ViewMapper self.net_views = ViewMapper( template={ 'method': QuantipyViews().frequency, 'kwargs': { 'axis': 'x', 'groups': ['Nets'], 'iterators': { 'rel_to': [None, 'y'], 'weights': self.weights } } })
def setUp(self): self.path = './tests/' # self.path = '' project_name = 'Example Data (A)' # Load Example Data (A) data and meta into self name_data = '%s.csv' % (project_name) path_data = '%s%s' % (self.path, name_data) self.example_data_A_data = pd.DataFrame.from_csv(path_data) name_meta = '%s.json' % (project_name) path_meta = '%s%s' % (self.path, name_meta) self.example_data_A_meta = load_json(path_meta) # Variables by type for Example Data A self.single = ['gender', 'locality', 'ethnicity', 'religion', 'q1'] self.delimited_set = ['q2', 'q3', 'q8', 'q9']
def setUp(self): self.path = './tests/' # self.path = '' self.filepath = '%sengine_B_data.csv' % (self.path) self.metapath = '%sengine_B_meta.json' % (self.path) self.stack = Stack("StackName") self.stack.seperator = ',' self.stack.decoding = "UTF-8" self.data = pd.DataFrame.from_csv(self.filepath) self.meta = load_json(self.metapath) self.stack.add_data(data_key="Jan", meta=self.meta, data=self.data) # self.x_names=['age', 'cost_breakfast', 'age_group', 'endtime', 'name', 'q4'], self.x_names = ['age', 'cost_breakfast', 'age_group', 'q4'] # self._types = ['int', 'float', 'single', 'date', 'string', 'delimited set'] self.x_types = ['int', 'float', 'single', 'delimited set'] self.y_names = ['profile_gender']
def setUp(self): self.path = './tests/' # self.path = '' project_name = 'Example Data (A)' # Load Example Data (A) data and meta into self name_data = '%s.csv' % (project_name) path_data = '%s%s' % (self.path, name_data) self.example_data_A_data = pd.DataFrame.from_csv(path_data) name_meta = '%s.json' % (project_name) path_meta = '%s%s' % (self.path, name_meta) self.example_data_A_meta = load_json(path_meta) # The minimum list of variables required to populate a stack with all single*delimited set variations self.minimum = ['q2b', 'Wave', 'q2', 'q3', 'q5_1'] self.setup_stack_Example_Data_A()
def setUp(self): self.path = './tests/' # self.path = '' project_name = 'Example Data (A)' # Load Example Data (A) data and meta into self name_data = '%s.csv' % (project_name) path_data = '%s%s' % (self.path, name_data) self.example_data_A_data = pd.DataFrame.from_csv(path_data) name_meta = '%s.json' % (project_name) path_meta = '%s%s' % (self.path, name_meta) self.example_data_A_meta = load_json(path_meta) # Variables by type for Example Data A self.int = ['record_number', 'unique_id', 'age', 'birth_day', 'birth_month'] self.float = ['weight', 'weight_a', 'weight_b'] self.single = ['gender', 'locality', 'ethnicity', 'religion', 'q1'] self.delimited_set = ['q2', 'q3', 'q8', 'q9'] self.string = ['q8a', 'q9a'] self.date = ['start_time', 'end_time'] self.time = ['duration'] self.array = ['q5', 'q6', 'q7'] # The minimum list of variables required to populate a stack with all single*delimited set variations self.minimum = ['Wave', 'ethnicity', 'q2', 'gender'] # Set up the expected weight iterations self.weights = [None, 'weight_a'] # # Set up example stack # self.setup_stack_Example_Data_A() # Set up the net views ViewMapper self.net_views = ViewMapper( template={ 'method': QuantipyViews().frequency, 'kwargs': { 'axis': 'x', 'groups': ['Nets'], 'iterators': { 'rel_to': [None, 'y'], 'weights': self.weights } } })
def setUp(self): self.path = './tests/' # self.path = '' self.project_name = 'Example Data (A)' # Load Example Data (A) data and meta into self name_data = '%s.csv' % (self.project_name) path_data = '%s%s' % (self.path, name_data) self.example_data_A_data = pd.DataFrame.from_csv(path_data) name_meta = '%s.json' % (self.project_name) path_meta = '%s%s' % (self.path, name_meta) self.example_data_A_meta = load_json(path_meta) # Variables by type for Example Data A self.int = [ 'record_number', 'unique_id', 'age', 'birth_day', 'birth_month' ] self.float = ['weight', 'weight_a', 'weight_b'] self.single = ['gender', 'locality', 'ethnicity', 'religion', 'q1'] self.delimited_set = ['q2', 'q3', 'q8', 'q9'] self.string = ['q8a', 'q9a'] self.date = ['start_time', 'end_time'] self.time = ['duration'] self.array = ['q5', 'q6', 'q7'] # The minimum list of variables required to populate a stack with all single*delimited set variations self.minimum = ['q2b', 'Wave', 'q2', 'q3', 'q5_1'] self.one_of_each = [ 'record_number', 'weight', 'gender', 'q2', 'q8a', 'start_time', 'duration' ] # Set up example stacks self.stack0 = self.setup_stack_Example_Data_A(name='Jan') self.stack1 = self.setup_stack_Example_Data_A(name='Feb') self.stack2 = self.setup_stack_Example_Data_A(name='Mar') self.stack3 = self.setup_stack_Example_Data_A(name='Apr') self.path_cluster = '%sClusterName.cluster' % (self.path) if os.path.exists(self.path_cluster): os.remove(self.path_cluster)
def setUp(self): self.path = './tests/' # self.path = '' project_name = 'Example Data (A)' # Load Example Data (A) data and meta into self name_data = '%s.csv' % (project_name) path_data = '%s%s' % (self.path, name_data) self.example_data_A_data = pd.read_csv(path_data) self.example_data_A_data = dataframe_fix_string_types(self.example_data_A_data) name_meta = '%s.json' % (project_name) path_meta = '%s%s' % (self.path, name_meta) self.example_data_A_meta = load_json(path_meta) # Variables by type for Example Data A self.dk = 'Example Data (A)' self.fk = 'no_filter' self.single = ['gender', 'locality', 'ethnicity', 'religion', 'q1'] self.delimited_set = ['q2', 'q3', 'q8', 'q9'] self.q5 = ['q5_1', 'q5_2', 'q5_3']
def setUp(self): self.path = './tests/' # self.path = '' self.project_name = 'Example Data (A)' # Load Example Data (A) data and meta into self name_data = '%s.csv' % (self.project_name) path_data = '%s%s' % (self.path, name_data) self.example_data_A_data = pd.DataFrame.from_csv(path_data) name_meta = '%s.json' % (self.project_name) path_meta = '%s%s' % (self.path, name_meta) self.example_data_A_meta = load_json(path_meta) # Variables by type for Example Data A self.int = ['record_number', 'unique_id', 'age', 'birth_day', 'birth_month'] self.float = ['weight', 'weight_a', 'weight_b'] self.single = ['gender', 'locality', 'ethnicity', 'religion', 'q1'] self.delimited_set = ['q2', 'q3', 'q8', 'q9'] self.string = ['q8a', 'q9a'] self.date = ['start_time', 'end_time'] self.time = ['duration'] self.array = ['q5', 'q6', 'q7'] # The minimum list of variables required to populate a stack with all single*delimited set variations self.minimum = ['q2b', 'Wave', 'q2', 'q3', 'q5_1'] self.one_of_each = ['record_number', 'weight', 'gender', 'q2', 'q8a', 'start_time', 'duration'] # Set up example stacks self.stack0 = self.setup_stack_Example_Data_A(name='Jan') self.stack1 = self.setup_stack_Example_Data_A(name='Feb') self.stack2 = self.setup_stack_Example_Data_A(name='Mar') self.stack3 = self.setup_stack_Example_Data_A(name='Apr') self.path_cluster = '%sClusterName.cluster' % (self.path) if os.path.exists(self.path_cluster): os.remove(self.path_cluster)
def quantipy_from_decipher(decipher_meta, decipher_data, text_key='main'): """ Converts the given Decipher data (which must have been exported in tab-delimited format) to Quantipy-ready meta and data. Parameters ---------- decipher_meta : str or dict Either the path to the Decipher meta document saved as JSON or said document read into memory decipher_data : str or pandas.DataFrame Either the path to the Decipher data saved as tab-delimited text said file read into memory Returns ------- meta : dict The Quantipy meta document data : pandas.DataFrame The converted data """ # If they're not already in memory, read in the Decipher meta and # data files if isinstance(decipher_meta, str): dmeta = load_json(decipher_meta) if isinstance(decipher_data, str): data = pd.DataFrame.from_csv(decipher_data, sep='\t') data[data.index.name] = data.index meta = start_meta(text_key=text_key) quotas = { 'vqtable': {}, 'voqtable': {} } types_map = { 'text': 'string', 'number': 'int', 'float': 'float', 'single': 'single', 'multiple': 'delimited set' } # Create generator for compound questions compound_questions = [ question for question in dmeta['questions'] if len(question['variables']) > 1] # Get basic variables for var in dmeta['variables']: # Collect quota variables # These will be dealt with later for qtable in ['vqtable', 'voqtable']: if qtable in var['vgroup']: if not var['vgroup'] in quotas[qtable]: quotas[qtable][var['vgroup']] = [] quotas[qtable][var['vgroup']].append(var) continue # Start the column meta for the current variable var_name = var['label'] column = meta['columns'][var_name] = { 'type': types_map[var['type']], 'text': {text_key: var['title']} } # Add meta-mapped path for current column to the 'data file' set # object so that the original order of the variables is known set_item = 'columns@%s' % (var_name) if not set_item in meta['sets']['data file']['items']: meta['sets']['data file']['items'].append(set_item) if var['type']=='single': # Get the response values column['values'] = get_decipher_values(var['values'], text_key) # Manage compound variables (delimited sets, arrays, mixed-type # sets) for question in compound_questions: if question['type']=='multiple': # Construct delimited set meta, data, vgroups, vgroup_variables = make_delimited_set( meta, data, question ) # If there's only 1 vgroup then this is a basic multiple- # choice question and doesn't require construction as an # array or set if len(vgroups)==1: continue else: # vgroups indicate how many groups of discrete variables sit # in the question # Find the number of variable groups in the set vgroups = get_vgroups(question['variables']) # For each variable group, get its members vgroup_variables = get_vgroup_variables( vgroups, question['variables'] ) # vgroup_types is used to keep track of the types used in the # variable group. This will help us identify mixed-type # question groups which are not arrays. vgroup_types = get_vgroup_types(vgroups, question['variables']) unique_vgroup_types = set(vgroup_types.values()) # Note if the vgroups use more than one variable type mixed_types = len(unique_vgroup_types) > 1 if mixed_types: # A set should be creted to bind mixed-type variables # together vgroup = vgroups[0] # Create the set mask = meta['sets'][vgroup] = { 'item type': 'mixed', 'text': {text_key: question['qtitle']}, 'items': [ 'columns@%s' % (var['label']) for var in question['variables'] ] } if 'multiple' in list(vgroup_types.values()): # This is a multiple grid # vgroup and vgroup_variables needs to be # edited to make them useable in the next step # This is related to the structure of multiple # response variables in Decipher multiple_vgroups = [ vgroup for vgroup in vgroups if vgroup_types[vgroup] == 'multiple' ] vgroup_variables = [copy.copy(vgroups)] new_vgroup_match = re.match('(^.+)(?=[c|r][0-9]+)', vgroups[0]) if new_vgroup_match is None: continue else: vgroups = [new_vgroup_match.group(0)] vgroup_types[vgroups[0]] = 'multiple' # Extract only the vgroups that contain multiple variables # so that an array mask can be created for each of them array_vgroups = [ (vgroup, vars) for vgroup, vars in zip(vgroups, vgroup_variables) if len(vars) > 1 ] # If there are any array-like groups of variables inside the # question, add an array mask/s accordingly for vgroup, vars in array_vgroups: if vgroup in meta['masks']: # This was a multiple-choice grid and has # already been converted continue # It's possible the vgroup is in the 'data file' set # and needs to be replaced with the name of the group's # component vars. This happens with compound questions # that are arrays with added open-ends variables mapped_vgroup = 'columns@%s' % (vgroup) df_items = meta['sets']['data file']['items'] if mapped_vgroup in df_items: mapped_vars = [('columns@%s' % v['label']) for v in vars] idx = meta['sets']['data file']['items'].index(mapped_vgroup) df_items = df_items[:idx] + mapped_vars + df_items[idx+1:] meta['sets']['data file']['items'] = df_items # Create the array mask mask = meta['masks'][vgroup] = { 'type': 'array', 'item type': types_map[vgroup_types[vgroup]], 'text': {text_key: ( '{} - {}'.format( vars[0]['rowTitle'], question['qtitle'] ) if vgroup_types[vgroup] in ['number', 'float', 'text'] else question['qtitle'] )}, 'items': [{ 'source': 'columns@{}'.format(var['label']), 'text': {text_key: var['rowTitle']}} for var in vars ]} if vgroup_types[vgroup] in ['single', 'multiple']: # Create lib values entry values_mapper = 'lib@values@%s' % (vgroup) meta['masks'][vgroup]['values'] = values_mapper if vgroup_types[vgroup] == 'single': values = get_decipher_values(question['values'], text_key) elif vgroup_types[vgroup] == 'multiple': values = copy.deepcopy(meta['columns'][vars[0]]['values']) meta['lib']['values'][vgroup] = values # Use meta-mapped values reference for single or # multiple array variables for item in mask['items']: col = item['source'].split('@')[-1] if col in meta['columns']: if 'values' in meta['columns'][col]: meta['columns'][col]['values'] = values_mapper # Construct quota columns (meta+data) meta, data = manage_decipher_quota_variables(meta, data, quotas) # Confirm that all meta columns exist in the data for col in list(meta['columns'].keys()): if not col in data.columns: print(( "Unpaired data warning: {} found in meta['columns']" " but not in data.columns. Removing it.".format(col))) del meta['columns'][col] set_item = 'columns@{}'.format(col) if set_item in meta['sets']['data file']['items']: idx = meta['sets']['data file']['items'].remove(set_item) # Confirm that all data columns exist in the meta for col in data.columns: if not col in meta['columns']: print(( "Unpaired meta warning: {} found in data.columns" " but not in meta['columns']. Removing it.".format(col))) data.drop(col, axis=1, inplace=True) return meta, data
def quantipy_from_decipher(decipher_meta, decipher_data, text_key='main'): """ Converts the given Decipher data (which must have been exported in tab-delimited format) to Quantipy-ready meta and data. Parameters ---------- decipher_meta : str or dict Either the path to the Decipher meta document saved as JSON or said document read into memory decipher_data : str or pandas.DataFrame Either the path to the Decipher data saved as tab-delimited text said file read into memory Returns ------- meta : dict The Quantipy meta document data : pandas.DataFrame The converted data """ # If they're not already in memory, read in the Decipher meta and # data files if isinstance(decipher_meta, (str, unicode)): dmeta = load_json(decipher_meta) if isinstance(decipher_data, (str, unicode)): data = pd.DataFrame.from_csv(decipher_data, sep='\t') meta = start_meta(text_key=text_key) quotas = { 'vqtable': {}, 'voqtable': {} } types_map = { 'text': 'string', 'number': 'int', 'float': 'float', 'single': 'single', 'multiple': 'delimited set' } # Get basic variables for var in dmeta['variables']: # Collect quota variables # These will be dealt with later for qtable in ['vqtable', 'voqtable']: if qtable in var['vgroup']: if not var['vgroup'] in quotas[qtable]: quotas[qtable][var['vgroup']] = [] quotas[qtable][var['vgroup']].append(var) continue # Add meta-mapped path for current column to the 'data file' set # object so that the original order of the variables is known set_item = 'columns@%s' % (var['vgroup']) if not set_item in meta['sets']['data file']['items']: meta['sets']['data file']['items'].append(set_item) # Start the column meta for the current variable var_name = var['label'] column = meta['columns'][var_name] = { 'type': types_map[var['type']], 'text': {text_key: var['title']} } if var['type']=='single': # Get the response values column['values'] = get_decipher_values(var['values'], text_key) # Create generator for compound questions compound_questions = ( question for question in dmeta['questions'] if len(question['variables']) > 1 ) # Manage compound variables (delimited sets, arrays, mixed-type # sets) for question in compound_questions: if question['type']=='multiple': # Construct delimited set meta, data, vgroups, vgroup_variables = make_delimited_set( meta, data, question ) # If there's only 1 vgroup then this is a basic multiple- # choice question and doesn't require construction as an # array or set if len(vgroups)==1: continue else: # vgroups indicate how many groups of discrete variables sit # in the question # Find the number of variable groups in the set vgroups = get_vgroups(question['variables']) # For each variable group, get its members vgroup_variables = get_vgroup_variables( vgroups, question['variables'] ) # vgroup_types is used to keep track of the types used in the # variable group. This will help us identify mixed-type # question groups which are not arrays. vgroup_types = get_vgroup_types(vgroups, question['variables']) unique_vgroup_types = set(vgroup_types.values()) # Note if the vgroups use more than one variable type mixed_types = len(unique_vgroup_types) > 1 if mixed_types: # A set should be creted to bind mixed-type variables # together vgroup = vgroups[0] # Create the set mask = meta['sets'][vgroup] = { 'item type': 'mixed', 'text': {text_key: question['qtitle']}, 'items': [ 'columns@%s' % (var['label']) for var in question['variables'] ] } if 'multiple' in vgroup_types.values(): # This is a multiple grid # vgroup and vgroup_variables needs to be # edited to make them useable in the next step # This is related to the structure of multiple # response variables in Decipher multiple_vgroups = [ vgroup for vgroup in vgroups if vgroup_types[vgroup] == 'multiple' ] vgroup_variables = [copy.copy(vgroups)] new_vgroup_match = re.match('(^.+)(?=[c|r][0-9]+)', vgroups[0]) if new_vgroup_match is None: continue else: vgroups = [new_vgroup_match.group(0)] vgroup_types[vgroups[0]] = 'multiple' # Extract only the vgroups that contain multiple variables # so that an array mask can be created for each of them array_vgroups = [ (vgroup, vars) for vgroup, vars in zip(vgroups, vgroup_variables) if len(vars) > 1 ] # If there are any array-like groups of variables inside the # question, add an array mask/s accordingly for vgroup, vars in array_vgroups: # It's possible the vgroup is in the 'data file' set # and needs to be replaced with the name of the group's # component vars. This happens with compound questions # that are arrays with added open-ends variables mapped_vgroup = 'columns@%s' % (vgroup) df__items = meta['sets']['data file']['items'] if mapped_vgroup in df__items: mapped_vars = [('columns@%s' % v['label']) for v in vars] idx = meta['sets']['data file']['items'].index(mapped_vgroup) df__items = df__items[:idx] + mapped_vars + df__items[idx+1:] meta['sets']['data file']['items'] = df__items # Create the array mask mask = meta['masks'][vgroup] = { 'type': 'array', 'item type': types_map[vgroup_types[vgroup]], 'text': {text_key: ( '%s - %s' % ( vars[0]['rowTitle'], question['qtitle'] ) if vgroup_types[vgroup] in ['number', 'float', 'text'] else question['qtitle'] )}, 'items': [ 'columns@%s' % ( var if vgroup_types[vgroup]=='multiple' else var['label'] ) for var in vars ] } if vgroup_types[vgroup] in ['single', 'multiple']: # Create lib values entry values_mapping = 'lib@values@%s' % (vgroup) if vgroup_types[vgroup] == 'single': values = get_decipher_values(question['values'], text_key) elif vgroup_types[vgroup] == 'multiple': values = copy.deepcopy(meta['columns'][vars[0]]['values']) meta['lib']['values'][vgroup] = values # Use meta-mapped values reference for single or # multiple array variables for item in mask['items']: col = item.split('@')[-1] meta['columns'][col]['values'] = values_mapping # Construct quota columns (meta+data) meta, data = manage_decipher_quota_variables(meta, data, quotas) return meta, data
def test_iterations_object(self): # Set up path to example files path_tests = self.path project_name = 'Example Data (A)' # Load Example Data (A) data and meta name_data = '%s.csv' % (project_name) path_data = '%s%s' % (path_tests, name_data) example_data_A_data = pd.DataFrame.from_csv(path_data) name_meta = '%s.json' % (project_name) path_meta = '%s%s' % (path_tests, name_meta) example_data_A_meta = load_json(path_meta) # Variables by type for Example Data A eda_int = ['record_number', 'unique_id', 'age', 'birth_day', 'birth_month'] eda_float = ['weight', 'weight_a', 'weight_b'] eda_single = ['gender', 'locality', 'ethnicity', 'religion', 'q1'] eda_delimited_set = ['q2', 'q3', 'q8', 'q9'] eda_string = ['q8a', 'q9a'] eda_date = ['start_time', 'end_time'] eda_time = ['duration'] eda_array = ['q5', 'q6', 'q7'] eda_minimum = ['q2b', 'Wave', 'q2', 'q3', 'q5_1'] # Create basic stack stack = Stack(name=project_name) stack.add_data(project_name, example_data_A_data, example_data_A_meta) stack.add_link( data_keys=project_name, filters=['no_filter'], x=eda_minimum, y=['@'], views=QuantipyViews(['default', 'cbase', 'counts', 'c%']), weights=[None, 'weight_a', 'weight_b'] ) # Get list of views created views_present = stack.describe(index=['view']) # Test that weighted an unweighted versions of all basic views # were created self.assertIn('x|default|x:y|||default', views_present) self.assertIn('x|default|x:y||weight_a|default', views_present) self.assertIn('x|default|x:y||weight_b|default', views_present) self.assertIn('x|frequency|x:y|||cbase', views_present) self.assertIn('x|frequency|x:y||weight_a|cbase', views_present) self.assertIn('x|frequency|x:y||weight_b|cbase', views_present) self.assertIn('x|frequency||y||c%', views_present) self.assertIn('x|frequency||y|weight_a|c%', views_present) self.assertIn('x|frequency||y|weight_b|c%', views_present) self.assertIn('x|frequency||||counts', views_present) self.assertIn('x|frequency|||weight_a|counts', views_present) self.assertIn('x|frequency|||weight_b|counts', views_present) # Create a ViewMapper using the iterator object in a template xnets = ViewMapper( template={ 'method': QuantipyViews().frequency, 'kwargs': { 'axis': 'x', 'groups': ['Nets'], 'iterators': { 'rel_to': [None, 'y'], 'weights': [None, 'weight_a'] } } }) # Add a method to the xnets ViewMapper, then use it to generate additional # views which include N/c% and unweighted/weighted xnets.add_method(name='ever', kwargs={'text': 'Ever', 'logic': [1, 2]}) stack.add_link(x='q2b', y=['@'], views=xnets.subset(['ever'])) # Get list of views created views_present = stack.describe(index=['view']) # Test that the expected views were all created self.assertIn('x|frequency|x[(1,2)]:y|||ever', views_present) self.assertIn('x|frequency|x[(1,2)]:y|y||ever', views_present) self.assertIn('x|frequency|x[(1,2)]:y||weight_a|ever', views_present) self.assertIn('x|frequency|x[(1,2)]:y|y|weight_a|ever', views_present) # Add another method to the xnets ViewMapper, but then override the weights # in the iterator object using the stack.add_link(weights) parameter stack.add_link(x='q2b', y=['@'], views=xnets.subset(['ever']), weights='weight_b') # Get list of views created views_present = stack.describe(index=['view']) # Test that the expected views were all created self.assertIn('x|frequency|x[(1,2)]:y||weight_b|ever', views_present) self.assertIn('x|frequency|x[(1,2)]:y|y|weight_b|ever', views_present) # Add two methods and apply them at the same time, make sure all expected iterations # of both were created xnets.add_method(name='ever (multi test)', kwargs={'text': 'Ever', 'logic': [1, 2]}) xnets.add_method(name='never (multi test)', kwargs={'text': 'Never', 'logic': [2, 3]}) stack.add_link(x='q2b', y=['@'], views=xnets.subset(['ever (multi test)', 'never (multi test)'])) # Get list of views created views_present = stack.describe(index=['view']) # Test that the expected views were all created self.assertIn('x|frequency|x[(1,2)]:y|||ever (multi test)', views_present) self.assertIn('x|frequency|x[(1,2)]:y|y||ever (multi test)', views_present) self.assertIn('x|frequency|x[(1,2)]:y||weight_a|ever (multi test)', views_present) self.assertIn('x|frequency|x[(1,2)]:y|y|weight_a|ever (multi test)', views_present) self.assertIn('x|frequency|x[(2,3)]:y|||never (multi test)', views_present) self.assertIn('x|frequency|x[(2,3)]:y|y||never (multi test)', views_present) self.assertIn('x|frequency|x[(2,3)]:y||weight_a|never (multi test)', views_present) self.assertIn('x|frequency|x[(2,3)]:y|y|weight_a|never (multi test)', views_present) # Add two methods and apply them at the same time, make sure all expected iterations # of both were created, in this case that the weights arg for stack.add_link() overrides # what the iterator object is asking for xnets.add_method(name='ever (weights test)', kwargs={'text': 'Ever', 'logic': [1, 2]}) xnets.add_method(name='never (weights test)', kwargs={'text': 'Never', 'logic': [2, 3]}) stack.add_link( x='q2b', y=['@'], views=xnets.subset(['ever (weights test)', 'never (weights test)']), weights=['weight_b'] ) # Get list of views created views_present = stack.describe(index=['view']) # Test that the expected views were all created self.assertNotIn('x|frequency|x[(1,2)]:y|||ever (weights test)', views_present) self.assertNotIn('x|frequency|x[(1,2)]:y|y||ever (weights test)', views_present) self.assertIn('x|frequency|x[(1,2)]:y||weight_b|ever (weights test)', views_present) self.assertIn('x|frequency|x[(1,2)]:y|y|weight_b|ever (weights test)', views_present) self.assertNotIn('x|frequency|x[(2,3)]:y|||never (weights test)', views_present) self.assertNotIn('x|frequency|x[(2,3)]:y|y||never (weights test)', views_present) self.assertIn('x|frequency|x[(2,3)]:y||weight_b|never (weights test)', views_present) self.assertIn('x|frequency|x[(2,3)]:y|y|weight_b|never (weights test)', views_present)
def test_iterations_object(self): # Set up path to example files path_tests = self.path project_name = 'Example Data (A)' # Load Example Data (A) data and meta name_data = '%s.csv' % (project_name) path_data = '%s%s' % (path_tests, name_data) example_data_A_data = pd.DataFrame.from_csv(path_data) name_meta = '%s.json' % (project_name) path_meta = '%s%s' % (path_tests, name_meta) example_data_A_meta = load_json(path_meta) # Variables by type for Example Data A eda_int = [ 'record_number', 'unique_id', 'age', 'birth_day', 'birth_month' ] eda_float = ['weight', 'weight_a', 'weight_b'] eda_single = ['gender', 'locality', 'ethnicity', 'religion', 'q1'] eda_delimited_set = ['q2', 'q3', 'q8', 'q9'] eda_string = ['q8a', 'q9a'] eda_date = ['start_time', 'end_time'] eda_time = ['duration'] eda_array = ['q5', 'q6', 'q7'] eda_minimum = ['q2b', 'Wave', 'q2', 'q3', 'q5_1'] # Create basic stack stack = Stack(name=project_name) stack.add_data(project_name, example_data_A_data, example_data_A_meta) stack.add_link(data_keys=project_name, filters=['no_filter'], x=eda_minimum, y=['@'], views=QuantipyViews( ['default', 'cbase', 'counts', 'c%']), weights=[None, 'weight_a', 'weight_b']) # Get list of views created views_present = stack.describe(index=['view']) # Test that weighted an unweighted versions of all basic views # were created self.assertIn('x|default|x:y|||default', views_present) self.assertIn('x|default|x:y||weight_a|default', views_present) self.assertIn('x|default|x:y||weight_b|default', views_present) self.assertIn('x|frequency|x:y|||cbase', views_present) self.assertIn('x|frequency|x:y||weight_a|cbase', views_present) self.assertIn('x|frequency|x:y||weight_b|cbase', views_present) self.assertIn('x|frequency||y||c%', views_present) self.assertIn('x|frequency||y|weight_a|c%', views_present) self.assertIn('x|frequency||y|weight_b|c%', views_present) self.assertIn('x|frequency||||counts', views_present) self.assertIn('x|frequency|||weight_a|counts', views_present) self.assertIn('x|frequency|||weight_b|counts', views_present) # Create a ViewMapper using the iterator object in a template xnets = ViewMapper( template={ 'method': QuantipyViews().frequency, 'kwargs': { 'axis': 'x', 'groups': ['Nets'], 'iterators': { 'rel_to': [None, 'y'], 'weights': [None, 'weight_a'] } } }) # Add a method to the xnets ViewMapper, then use it to generate additional # views which include N/c% and unweighted/weighted xnets.add_method(name='ever', kwargs={'text': 'Ever', 'logic': [1, 2]}) stack.add_link(x='q2b', y=['@'], views=xnets.subset(['ever'])) # Get list of views created views_present = stack.describe(index=['view']) # Test that the expected views were all created self.assertIn('x|frequency|x[(1,2)]:y|||ever', views_present) self.assertIn('x|frequency|x[(1,2)]:y|y||ever', views_present) self.assertIn('x|frequency|x[(1,2)]:y||weight_a|ever', views_present) self.assertIn('x|frequency|x[(1,2)]:y|y|weight_a|ever', views_present) # Add another method to the xnets ViewMapper, but then override the weights # in the iterator object using the stack.add_link(weights) parameter stack.add_link(x='q2b', y=['@'], views=xnets.subset(['ever']), weights='weight_b') # Get list of views created views_present = stack.describe(index=['view']) # Test that the expected views were all created self.assertIn('x|frequency|x[(1,2)]:y||weight_b|ever', views_present) self.assertIn('x|frequency|x[(1,2)]:y|y|weight_b|ever', views_present) # Add two methods and apply them at the same time, make sure all expected iterations # of both were created xnets.add_method(name='ever (multi test)', kwargs={ 'text': 'Ever', 'logic': [1, 2] }) xnets.add_method(name='never (multi test)', kwargs={ 'text': 'Never', 'logic': [2, 3] }) stack.add_link(x='q2b', y=['@'], views=xnets.subset( ['ever (multi test)', 'never (multi test)'])) # Get list of views created views_present = stack.describe(index=['view']) # Test that the expected views were all created self.assertIn('x|frequency|x[(1,2)]:y|||ever (multi test)', views_present) self.assertIn('x|frequency|x[(1,2)]:y|y||ever (multi test)', views_present) self.assertIn('x|frequency|x[(1,2)]:y||weight_a|ever (multi test)', views_present) self.assertIn('x|frequency|x[(1,2)]:y|y|weight_a|ever (multi test)', views_present) self.assertIn('x|frequency|x[(2,3)]:y|||never (multi test)', views_present) self.assertIn('x|frequency|x[(2,3)]:y|y||never (multi test)', views_present) self.assertIn('x|frequency|x[(2,3)]:y||weight_a|never (multi test)', views_present) self.assertIn('x|frequency|x[(2,3)]:y|y|weight_a|never (multi test)', views_present) # Add two methods and apply them at the same time, make sure all expected iterations # of both were created, in this case that the weights arg for stack.add_link() overrides # what the iterator object is asking for xnets.add_method(name='ever (weights test)', kwargs={ 'text': 'Ever', 'logic': [1, 2] }) xnets.add_method(name='never (weights test)', kwargs={ 'text': 'Never', 'logic': [2, 3] }) stack.add_link(x='q2b', y=['@'], views=xnets.subset( ['ever (weights test)', 'never (weights test)']), weights=['weight_b']) # Get list of views created views_present = stack.describe(index=['view']) # Test that the expected views were all created self.assertNotIn('x|frequency|x[(1,2)]:y|||ever (weights test)', views_present) self.assertNotIn('x|frequency|x[(1,2)]:y|y||ever (weights test)', views_present) self.assertIn('x|frequency|x[(1,2)]:y||weight_b|ever (weights test)', views_present) self.assertIn('x|frequency|x[(1,2)]:y|y|weight_b|ever (weights test)', views_present) self.assertNotIn('x|frequency|x[(2,3)]:y|||never (weights test)', views_present) self.assertNotIn('x|frequency|x[(2,3)]:y|y||never (weights test)', views_present) self.assertIn('x|frequency|x[(2,3)]:y||weight_b|never (weights test)', views_present) self.assertIn('x|frequency|x[(2,3)]:y|y|weight_b|never (weights test)', views_present)