def setUp(self):
        self.path = './tests/'
        #         self.path = ''
        project_name = 'Example Data (A)'

        # Load Example Data (A) data and meta into self
        name_data = '%s.csv' % (project_name)
        path_data = '%s%s' % (self.path, name_data)
        self.data = pd.read_csv(path_data)

        name_meta = '%s.json' % (project_name)
        path_meta = '%s%s' % (self.path, name_meta)
        self.meta = load_json(path_meta)

        # Variables by type for Example Data A
        self.dk = 'Example Data (A)'
        self.fk = 'no_filter'
        self.single = ['gender', 'locality', 'ethnicity', 'religion', 'q1']
        self.delimited_set = ['q2', 'q3', 'q8', 'q9']
        self.q5 = ['q5_1', 'q5_2', 'q5_3', 'q5_4', 'q5_5', 'q5_6']
        self.x_vars = self.q5
        self.y_vars = ['@', 'gender', 'locality', 'q2', 'q3']
        self.views = ['cbase', 'counts']
        self.weights = [None, 'weight_a']
        self.text_key = 'en-GB'

        self.stack = get_stack(self, self.meta, self.data, self.x_vars,
                               self.y_vars, self.views, self.weights)
Beispiel #2
0
    def setUp(self):
        self.path = './tests/'
        #         self.path = ''
        project_name = 'Example Data (A)'

        # Load Example Data (A) data and meta into self
        name_data = '%s.csv' % (project_name)
        path_data = '%s%s' % (self.path, name_data)
        self.example_data_A_data = pd.read_csv(path_data)
        self.example_data_A_data = dataframe_fix_string_types(
            self.example_data_A_data)
        name_meta = '%s.json' % (project_name)
        path_meta = '%s%s' % (self.path, name_meta)
        self.example_data_A_meta = load_json(path_meta)

        # Variables by type for Example Data A
        self.int = [
            'record_number', 'unique_id', 'age', 'birth_day', 'birth_month'
        ]
        self.float = ['weight', 'weight_a', 'weight_b']
        self.single = ['gender', 'locality', 'ethnicity', 'religion', 'q1']
        self.delimited_set = ['q2', 'q3', 'q8', 'q9']
        self.string = ['q8a', 'q9a']
        self.date = ['start_time', 'end_time']
        self.time = ['duration']
        self.array = ['q5', 'q6', 'q7']

        # The minimum list of variables required to populate a stack with all single*delimited set variations
        self.minimum = ['Wave', 'ethnicity', 'q2', 'gender']

        # Set up the expected weight iterations
        self.weights = [None, 'weight_a']

        #         # Set up example stack
        #         self.setup_stack_Example_Data_A()

        # Set up the net views ViewMapper
        self.net_views = ViewMapper(
            template={
                'method': QuantipyViews().frequency,
                'kwargs': {
                    'axis': 'x',
                    'groups': ['Nets'],
                    'iterators': {
                        'rel_to': [None, 'y'],
                        'weights': self.weights
                    }
                }
            })
Beispiel #3
0
    def setUp(self):
        self.path = './tests/'
#         self.path = ''
        project_name = 'Example Data (A)'

        # Load Example Data (A) data and meta into self
        name_data = '%s.csv' % (project_name)
        path_data = '%s%s' % (self.path, name_data)
        self.example_data_A_data = pd.DataFrame.from_csv(path_data)
        name_meta = '%s.json' % (project_name)
        path_meta = '%s%s' % (self.path, name_meta)
        self.example_data_A_meta = load_json(path_meta)

        # Variables by type for Example Data A
        self.single = ['gender', 'locality', 'ethnicity', 'religion', 'q1']
        self.delimited_set = ['q2', 'q3', 'q8', 'q9']
    def setUp(self):
        self.path = './tests/'
#         self.path = ''
        project_name = 'Example Data (A)'

        # Load Example Data (A) data and meta into self
        name_data = '%s.csv' % (project_name)
        path_data = '%s%s' % (self.path, name_data)
        self.example_data_A_data = pd.DataFrame.from_csv(path_data)
        name_meta = '%s.json' % (project_name)
        path_meta = '%s%s' % (self.path, name_meta)
        self.example_data_A_meta = load_json(path_meta)

        # Variables by type for Example Data A
        self.single = ['gender', 'locality', 'ethnicity', 'religion', 'q1']
        self.delimited_set = ['q2', 'q3', 'q8', 'q9']
    def setUp(self):
        self.path = './tests/'
#         self.path = ''
        self.filepath = '%sengine_B_data.csv' % (self.path)
        self.metapath = '%sengine_B_meta.json' % (self.path)
        self.stack = Stack("StackName")
        self.stack.seperator = ','
        self.stack.decoding = "UTF-8"
        self.data = pd.DataFrame.from_csv(self.filepath)
        self.meta = load_json(self.metapath)
        self.stack.add_data(data_key="Jan", meta=self.meta, data=self.data)
#         self.x_names=['age', 'cost_breakfast', 'age_group', 'endtime', 'name', 'q4'],
        self.x_names = ['age', 'cost_breakfast', 'age_group', 'q4']
#         self._types = ['int', 'float', 'single', 'date', 'string', 'delimited set']
        self.x_types = ['int', 'float', 'single', 'delimited set']
        self.y_names = ['profile_gender']
Beispiel #6
0
 def setUp(self):
     self.path = './tests/'
     #         self.path = ''
     self.filepath = '%sengine_B_data.csv' % (self.path)
     self.metapath = '%sengine_B_meta.json' % (self.path)
     self.stack = Stack("StackName")
     self.stack.seperator = ','
     self.stack.decoding = "UTF-8"
     self.data = pd.DataFrame.from_csv(self.filepath)
     self.meta = load_json(self.metapath)
     self.stack.add_data(data_key="Jan", meta=self.meta, data=self.data)
     #         self.x_names=['age', 'cost_breakfast', 'age_group', 'endtime', 'name', 'q4'],
     self.x_names = ['age', 'cost_breakfast', 'age_group', 'q4']
     #         self._types = ['int', 'float', 'single', 'date', 'string', 'delimited set']
     self.x_types = ['int', 'float', 'single', 'delimited set']
     self.y_names = ['profile_gender']
Beispiel #7
0
    def setUp(self):        
        self.path = './tests/'
#         self.path = ''
        project_name = 'Example Data (A)'
        
        # Load Example Data (A) data and meta into self
        name_data = '%s.csv' % (project_name)
        path_data = '%s%s' % (self.path, name_data)
        self.example_data_A_data = pd.DataFrame.from_csv(path_data)        
        name_meta = '%s.json' % (project_name)
        path_meta = '%s%s' % (self.path, name_meta)
        self.example_data_A_meta = load_json(path_meta)
        
        # The minimum list of variables required to populate a stack with all single*delimited set variations
        self.minimum = ['q2b', 'Wave', 'q2', 'q3', 'q5_1']
        
        self.setup_stack_Example_Data_A()
Beispiel #8
0
    def setUp(self):
        self.path = './tests/'
        #         self.path = ''
        project_name = 'Example Data (A)'

        # Load Example Data (A) data and meta into self
        name_data = '%s.csv' % (project_name)
        path_data = '%s%s' % (self.path, name_data)
        self.example_data_A_data = pd.DataFrame.from_csv(path_data)
        name_meta = '%s.json' % (project_name)
        path_meta = '%s%s' % (self.path, name_meta)
        self.example_data_A_meta = load_json(path_meta)

        # The minimum list of variables required to populate a stack with all single*delimited set variations
        self.minimum = ['q2b', 'Wave', 'q2', 'q3', 'q5_1']

        self.setup_stack_Example_Data_A()
    def setUp(self):
        self.path = './tests/'
#         self.path = ''
        project_name = 'Example Data (A)'

        # Load Example Data (A) data and meta into self
        name_data = '%s.csv' % (project_name)
        path_data = '%s%s' % (self.path, name_data)
        self.example_data_A_data = pd.DataFrame.from_csv(path_data)
        name_meta = '%s.json' % (project_name)
        path_meta = '%s%s' % (self.path, name_meta)
        self.example_data_A_meta = load_json(path_meta)

        # Variables by type for Example Data A
        self.int = ['record_number', 'unique_id', 'age', 'birth_day', 'birth_month']
        self.float = ['weight', 'weight_a', 'weight_b']
        self.single = ['gender', 'locality', 'ethnicity', 'religion', 'q1']
        self.delimited_set = ['q2', 'q3', 'q8', 'q9']
        self.string = ['q8a', 'q9a']
        self.date = ['start_time', 'end_time']
        self.time = ['duration']
        self.array = ['q5', 'q6', 'q7']

        # The minimum list of variables required to populate a stack with all single*delimited set variations
        self.minimum = ['Wave', 'ethnicity', 'q2', 'gender']

        # Set up the expected weight iterations
        self.weights = [None, 'weight_a']
        
#         # Set up example stack
#         self.setup_stack_Example_Data_A()
        
        # Set up the net views ViewMapper
        self.net_views = ViewMapper(
            template={
                'method': QuantipyViews().frequency,
                'kwargs': {
                    'axis': 'x',
                    'groups': ['Nets'],
                    'iterators': {
                        'rel_to': [None, 'y'],
                        'weights': self.weights
                    }
                }
            })
Beispiel #10
0
    def setUp(self):
        self.path = './tests/'
        #         self.path = ''
        self.project_name = 'Example Data (A)'

        # Load Example Data (A) data and meta into self
        name_data = '%s.csv' % (self.project_name)
        path_data = '%s%s' % (self.path, name_data)
        self.example_data_A_data = pd.DataFrame.from_csv(path_data)
        name_meta = '%s.json' % (self.project_name)
        path_meta = '%s%s' % (self.path, name_meta)
        self.example_data_A_meta = load_json(path_meta)

        # Variables by type for Example Data A
        self.int = [
            'record_number', 'unique_id', 'age', 'birth_day', 'birth_month'
        ]
        self.float = ['weight', 'weight_a', 'weight_b']
        self.single = ['gender', 'locality', 'ethnicity', 'religion', 'q1']
        self.delimited_set = ['q2', 'q3', 'q8', 'q9']
        self.string = ['q8a', 'q9a']
        self.date = ['start_time', 'end_time']
        self.time = ['duration']
        self.array = ['q5', 'q6', 'q7']

        # The minimum list of variables required to populate a stack with all single*delimited set variations
        self.minimum = ['q2b', 'Wave', 'q2', 'q3', 'q5_1']
        self.one_of_each = [
            'record_number', 'weight', 'gender', 'q2', 'q8a', 'start_time',
            'duration'
        ]

        # Set up example stacks
        self.stack0 = self.setup_stack_Example_Data_A(name='Jan')
        self.stack1 = self.setup_stack_Example_Data_A(name='Feb')
        self.stack2 = self.setup_stack_Example_Data_A(name='Mar')
        self.stack3 = self.setup_stack_Example_Data_A(name='Apr')

        self.path_cluster = '%sClusterName.cluster' % (self.path)

        if os.path.exists(self.path_cluster):
            os.remove(self.path_cluster)
Beispiel #11
0
    def setUp(self):
        self.path = './tests/'
#         self.path = ''
        project_name = 'Example Data (A)'

        # Load Example Data (A) data and meta into self
        name_data = '%s.csv' % (project_name)
        path_data = '%s%s' % (self.path, name_data)
        self.example_data_A_data = pd.read_csv(path_data)
        self.example_data_A_data = dataframe_fix_string_types(self.example_data_A_data)
        name_meta = '%s.json' % (project_name)
        path_meta = '%s%s' % (self.path, name_meta)
        self.example_data_A_meta = load_json(path_meta)

        # Variables by type for Example Data A
        self.dk = 'Example Data (A)'
        self.fk = 'no_filter'
        self.single = ['gender', 'locality', 'ethnicity', 'religion', 'q1']
        self.delimited_set = ['q2', 'q3', 'q8', 'q9']
        self.q5 = ['q5_1', 'q5_2', 'q5_3']
Beispiel #12
0
    def setUp(self):
        self.path = './tests/'
#         self.path = ''
        self.project_name = 'Example Data (A)'
        
        # Load Example Data (A) data and meta into self
        name_data = '%s.csv' % (self.project_name)
        path_data = '%s%s' % (self.path, name_data)
        self.example_data_A_data = pd.DataFrame.from_csv(path_data)        
        name_meta = '%s.json' % (self.project_name)
        path_meta = '%s%s' % (self.path, name_meta)
        self.example_data_A_meta = load_json(path_meta)

        # Variables by type for Example Data A
        self.int = ['record_number', 'unique_id', 'age', 'birth_day', 'birth_month']
        self.float = ['weight', 'weight_a', 'weight_b']
        self.single = ['gender', 'locality', 'ethnicity', 'religion', 'q1']
        self.delimited_set = ['q2', 'q3', 'q8', 'q9']
        self.string = ['q8a', 'q9a']
        self.date = ['start_time', 'end_time']
        self.time = ['duration']
        self.array = ['q5', 'q6', 'q7']
        
        # The minimum list of variables required to populate a stack with all single*delimited set variations
        self.minimum = ['q2b', 'Wave', 'q2', 'q3', 'q5_1']
        self.one_of_each = ['record_number', 'weight', 'gender', 'q2', 'q8a', 'start_time', 'duration']

        # Set up example stacks
        self.stack0 = self.setup_stack_Example_Data_A(name='Jan')
        self.stack1 = self.setup_stack_Example_Data_A(name='Feb')
        self.stack2 = self.setup_stack_Example_Data_A(name='Mar')
        self.stack3 = self.setup_stack_Example_Data_A(name='Apr')

        self.path_cluster = '%sClusterName.cluster' % (self.path)

        if os.path.exists(self.path_cluster):
            os.remove(self.path_cluster)
Beispiel #13
0
def quantipy_from_decipher(decipher_meta, decipher_data, text_key='main'): 
    """ Converts the given Decipher data (which must have been exported
    in tab-delimited format) to Quantipy-ready meta and data.
    
    Parameters
    ----------
    decipher_meta : str or dict
        Either the path to the Decipher meta document saved as JSON or
        said document read into memory

    decipher_data : str or pandas.DataFrame
        Either the path to the Decipher data saved as tab-delimited text
        said file read into memory

    Returns
    -------
    meta : dict
        The Quantipy meta document

    data : pandas.DataFrame
        The converted data
    """

    # If they're not already in memory, read in the Decipher meta and
    # data files
    if isinstance(decipher_meta, str):
        dmeta = load_json(decipher_meta)
    if isinstance(decipher_data, str):
        data = pd.DataFrame.from_csv(decipher_data, sep='\t')
        data[data.index.name] = data.index

    meta = start_meta(text_key=text_key)

    quotas = {
        'vqtable': {}, 
        'voqtable': {}
    }

    types_map = {
        'text': 'string',
        'number': 'int',
        'float': 'float',
        'single': 'single',
        'multiple': 'delimited set'
    }

    # Create generator for compound questions
    compound_questions = [
        question 
        for question in dmeta['questions'] 
        if len(question['variables']) > 1]
    
    # Get basic variables
    for var in dmeta['variables']:
        
        # Collect quota variables
        # These will be dealt with later
        for qtable in ['vqtable', 'voqtable']:
            if qtable in var['vgroup']:
                if not var['vgroup'] in quotas[qtable]:
                    quotas[qtable][var['vgroup']] = []
                quotas[qtable][var['vgroup']].append(var)
                continue
        
        # Start the column meta for the current variable
        var_name = var['label']
        column = meta['columns'][var_name] = {
            'type': types_map[var['type']],
            'text': {text_key: var['title']}
        }
        
        # Add meta-mapped path for current column to the 'data file' set
        # object so that the original order of the variables is known
        set_item = 'columns@%s' % (var_name)
        if not set_item in meta['sets']['data file']['items']:
            meta['sets']['data file']['items'].append(set_item)
        
        if var['type']=='single':
            # Get the response values
            column['values'] = get_decipher_values(var['values'], text_key)

    # Manage compound variables (delimited sets, arrays, mixed-type 
    # sets)
    for question in compound_questions:

        if question['type']=='multiple':

            # Construct delimited set
            meta, data, vgroups, vgroup_variables = make_delimited_set(
                meta, data, question
            )
            
            # If there's only 1 vgroup then this is a basic multiple-
            # choice question and doesn't require construction as an
            # array or set
            if len(vgroups)==1:
                continue

        else:
            # vgroups indicate how many groups of discrete variables sit
            # in the question
            
            # Find the number of variable groups in the set
            vgroups = get_vgroups(question['variables'])        
            
            # For each variable group, get its members
            vgroup_variables = get_vgroup_variables(
                vgroups, question['variables']
            )
        
        # vgroup_types is used to keep track of the types used in the
        # variable group. This will help us identify mixed-type
        # question groups which are not arrays.            
        vgroup_types = get_vgroup_types(vgroups, question['variables'])
        unique_vgroup_types = set(vgroup_types.values())
        
        # Note if the vgroups use more than one variable type
        mixed_types = len(unique_vgroup_types) > 1
        
        if mixed_types:
            # A set should be creted to bind mixed-type variables 
            # together

            vgroup = vgroups[0]
            
            # Create the set
            mask = meta['sets'][vgroup] = {
                'item type': 'mixed',
                'text': {text_key: question['qtitle']},
                'items': [
                    'columns@%s' % (var['label'])
                    for var in question['variables']
                ]
            }        

        if 'multiple' in list(vgroup_types.values()):
            # This is a multiple grid
            # vgroup and vgroup_variables needs to be
            # edited to make them useable in the next step
            # This is related to the structure of multiple
            # response variables in Decipher
            multiple_vgroups = [
                vgroup
                for vgroup in vgroups
                if vgroup_types[vgroup] == 'multiple'
            ]
            vgroup_variables = [copy.copy(vgroups)]
            new_vgroup_match = re.match('(^.+)(?=[c|r][0-9]+)', vgroups[0])
            if new_vgroup_match is None:
                continue
            else:
                vgroups = [new_vgroup_match.group(0)]
                vgroup_types[vgroups[0]] = 'multiple'
        
        # Extract only the vgroups that contain multiple variables
        # so that an array mask can be created for each of them
        array_vgroups = [
            (vgroup, vars)
            for vgroup, vars in zip(vgroups, vgroup_variables)
            if len(vars) > 1
        ]
        
        # If there are any array-like groups of variables inside the
        # question, add an array mask/s accordingly
        for vgroup, vars in array_vgroups:
        
            if vgroup in meta['masks']:
                # This was a multiple-choice grid and has
                # already been converted
                continue
        
            # It's possible the vgroup is in the 'data file' set
            # and needs to be replaced with the name of the group's
            # component vars. This happens with compound questions
            # that are arrays with added open-ends variables
            mapped_vgroup = 'columns@%s' % (vgroup)
            df_items = meta['sets']['data file']['items']
            if mapped_vgroup in df_items:
                mapped_vars = [('columns@%s' % v['label']) for v in vars]
                idx = meta['sets']['data file']['items'].index(mapped_vgroup)
                df_items = df_items[:idx] + mapped_vars + df_items[idx+1:]
                meta['sets']['data file']['items'] = df_items
                    
            # Create the array mask
            mask = meta['masks'][vgroup] = {
                'type': 'array',
                'item type': types_map[vgroup_types[vgroup]],
                'text': {text_key: (
                    '{} - {}'.format(
                        vars[0]['rowTitle'], 
                        question['qtitle']
                    )
                    if vgroup_types[vgroup] in ['number', 'float', 'text']
                    else question['qtitle']
                )},
                'items': [{
                    'source': 'columns@{}'.format(var['label']),
                    'text': {text_key: var['rowTitle']}}
                    for var in vars
                ]}
    
            if vgroup_types[vgroup] in ['single', 'multiple']:
                # Create lib values entry
                values_mapper = 'lib@values@%s' % (vgroup)
                meta['masks'][vgroup]['values'] = values_mapper
                if vgroup_types[vgroup] == 'single':
                    values = get_decipher_values(question['values'], text_key)
                elif vgroup_types[vgroup] == 'multiple':
                    values = copy.deepcopy(meta['columns'][vars[0]]['values'])
                meta['lib']['values'][vgroup] = values
                
                # Use meta-mapped values reference for single or 
                # multiple array variables
                for item in mask['items']:
                    col = item['source'].split('@')[-1]
                    if col in meta['columns']:
                        if 'values' in meta['columns'][col]:
                            meta['columns'][col]['values'] = values_mapper
    
    # Construct quota columns (meta+data)
    meta, data = manage_decipher_quota_variables(meta, data, quotas)

    # Confirm that all meta columns exist in the data
    for col in list(meta['columns'].keys()):
        if not col in data.columns:
            print((
                "Unpaired data warning: {} found in meta['columns']"
                " but not in data.columns. Removing it.".format(col)))
            del meta['columns'][col]
            set_item = 'columns@{}'.format(col)
            if set_item in meta['sets']['data file']['items']:
                idx = meta['sets']['data file']['items'].remove(set_item)

    # Confirm that all data columns exist in the meta
    for col in data.columns:
        if not col in meta['columns']:
            print((
                "Unpaired meta warning: {} found in data.columns"
                " but not in meta['columns']. Removing it.".format(col)))
            data.drop(col, axis=1, inplace=True)

    return meta, data
Beispiel #14
0
def quantipy_from_decipher(decipher_meta, decipher_data, text_key='main'): 
    """ Converts the given Decipher data (which must have been exported
    in tab-delimited format) to Quantipy-ready meta and data.
    
    Parameters
    ----------
    decipher_meta : str or dict
        Either the path to the Decipher meta document saved as JSON or
        said document read into memory

    decipher_data : str or pandas.DataFrame
        Either the path to the Decipher data saved as tab-delimited text
        said file read into memory

    Returns
    -------
    meta : dict
        The Quantipy meta document

    data : pandas.DataFrame
        The converted data
    """

    # If they're not already in memory, read in the Decipher meta and
    # data files
    if isinstance(decipher_meta, (str, unicode)):
        dmeta = load_json(decipher_meta)
    if isinstance(decipher_data, (str, unicode)):
        data = pd.DataFrame.from_csv(decipher_data, sep='\t')

    meta = start_meta(text_key=text_key)

    quotas = {
        'vqtable': {}, 
        'voqtable': {}
    }

    types_map = {
        'text': 'string',
        'number': 'int',
        'float': 'float',
        'single': 'single',
        'multiple': 'delimited set'
    }

    # Get basic variables
    for var in dmeta['variables']:
        
        # Collect quota variables
        # These will be dealt with later
        for qtable in ['vqtable', 'voqtable']:
            if qtable in var['vgroup']:
                if not var['vgroup'] in quotas[qtable]:
                    quotas[qtable][var['vgroup']] = []
                quotas[qtable][var['vgroup']].append(var)
                continue
        
        # Add meta-mapped path for current column to the 'data file' set
        # object so that the original order of the variables is known
        set_item = 'columns@%s' % (var['vgroup'])    
        if not set_item in meta['sets']['data file']['items']:
            meta['sets']['data file']['items'].append(set_item)
        
        # Start the column meta for the current variable
        var_name = var['label']
        column = meta['columns'][var_name] = {
            'type': types_map[var['type']],
            'text': {text_key: var['title']}
        }
        
        if var['type']=='single':
            # Get the response values
            column['values'] = get_decipher_values(var['values'], text_key)

    # Create generator for compound questions
    compound_questions = (
        question 
        for question in dmeta['questions'] 
        if len(question['variables']) > 1
    )

    # Manage compound variables (delimited sets, arrays, mixed-type 
    # sets)
    for question in compound_questions:
        
        if question['type']=='multiple':

            # Construct delimited set
            meta, data, vgroups, vgroup_variables = make_delimited_set(
                meta, data, question
            )
            
            # If there's only 1 vgroup then this is a basic multiple-
            # choice question and doesn't require construction as an
            # array or set
            if len(vgroups)==1:
                continue

        else:
            # vgroups indicate how many groups of discrete variables sit
            # in the question
            
            # Find the number of variable groups in the set
            vgroups = get_vgroups(question['variables'])        
            
            # For each variable group, get its members
            vgroup_variables = get_vgroup_variables(
                vgroups, question['variables']
            )
        
        # vgroup_types is used to keep track of the types used in the
        # variable group. This will help us identify mixed-type
        # question groups which are not arrays.            
        vgroup_types = get_vgroup_types(vgroups, question['variables'])
        unique_vgroup_types = set(vgroup_types.values())
        
        # Note if the vgroups use more than one variable type
        mixed_types = len(unique_vgroup_types) > 1
        
        if mixed_types:
            # A set should be creted to bind mixed-type variables 
            # together

            vgroup = vgroups[0]
            
            # Create the set
            mask = meta['sets'][vgroup] = {
                'item type': 'mixed',
                'text': {text_key: question['qtitle']},
                'items': [
                    'columns@%s' % (var['label'])
                    for var in question['variables']
                ]
            }        

        if 'multiple' in vgroup_types.values():
            # This is a multiple grid
            # vgroup and vgroup_variables needs to be
            # edited to make them useable in the next step
            # This is related to the structure of multiple
            # response variables in Decipher
            multiple_vgroups = [
                vgroup
                for vgroup in vgroups
                if vgroup_types[vgroup] == 'multiple'
            ]
            vgroup_variables = [copy.copy(vgroups)]
            new_vgroup_match = re.match('(^.+)(?=[c|r][0-9]+)', vgroups[0])
            if new_vgroup_match is None:
                continue
            else:
                vgroups = [new_vgroup_match.group(0)]
                vgroup_types[vgroups[0]] = 'multiple'
        
        # Extract only the vgroups that contain multiple variables
        # so that an array mask can be created for each of them
        array_vgroups = [
            (vgroup, vars)
            for vgroup, vars in zip(vgroups, vgroup_variables)
            if len(vars) > 1
        ]
        
        # If there are any array-like groups of variables inside the
        # question, add an array mask/s accordingly
        for vgroup, vars in array_vgroups:
        
            # It's possible the vgroup is in the 'data file' set
            # and needs to be replaced with the name of the group's
            # component vars. This happens with compound questions
            # that are arrays with added open-ends variables
            mapped_vgroup = 'columns@%s' % (vgroup)
            df__items = meta['sets']['data file']['items']
            if mapped_vgroup in df__items:
                mapped_vars = [('columns@%s' % v['label']) for v in vars]
                idx = meta['sets']['data file']['items'].index(mapped_vgroup)
                df__items = df__items[:idx] + mapped_vars + df__items[idx+1:]
                meta['sets']['data file']['items'] = df__items
                    
            # Create the array mask
            mask = meta['masks'][vgroup] = {
                'type': 'array',
                'item type': types_map[vgroup_types[vgroup]],
                'text': {text_key: (
                    '%s - %s' % (
                        vars[0]['rowTitle'], 
                        question['qtitle']
                    )
                    if vgroup_types[vgroup] in ['number', 'float', 'text']
                    else question['qtitle']
                )},
                'items': [
                    'columns@%s' % (
                        var
                        if vgroup_types[vgroup]=='multiple' 
                        else var['label'] 
                    )
                    for var in vars
                ]
            }
    
            if vgroup_types[vgroup] in ['single', 'multiple']:
                # Create lib values entry
                values_mapping = 'lib@values@%s' % (vgroup)
                if vgroup_types[vgroup] == 'single':
                    values = get_decipher_values(question['values'], text_key)
                elif vgroup_types[vgroup] == 'multiple':
                    values = copy.deepcopy(meta['columns'][vars[0]]['values'])
                meta['lib']['values'][vgroup] = values
                
                # Use meta-mapped values reference for single or 
                # multiple array variables
                for item in mask['items']:
                    col = item.split('@')[-1]
                    meta['columns'][col]['values'] = values_mapping
    
    # Construct quota columns (meta+data)
    meta, data = manage_decipher_quota_variables(meta, data, quotas)

    return meta, data
    def test_iterations_object(self):
        
        # Set up path to example files
        path_tests = self.path
        project_name = 'Example Data (A)'

        # Load Example Data (A) data and meta
        name_data = '%s.csv' % (project_name)
        path_data = '%s%s' % (path_tests, name_data)
        example_data_A_data = pd.DataFrame.from_csv(path_data)
        
        name_meta = '%s.json' % (project_name)
        path_meta = '%s%s' % (path_tests, name_meta)
        example_data_A_meta = load_json(path_meta)
 
        # Variables by type for Example Data A
        eda_int = ['record_number', 'unique_id', 'age', 'birth_day', 'birth_month']
        eda_float = ['weight', 'weight_a', 'weight_b']
        eda_single = ['gender', 'locality', 'ethnicity', 'religion', 'q1']
        eda_delimited_set = ['q2', 'q3', 'q8', 'q9']
        eda_string = ['q8a', 'q9a']
        eda_date = ['start_time', 'end_time']
        eda_time = ['duration']
        eda_array = ['q5', 'q6', 'q7']       
        eda_minimum = ['q2b', 'Wave', 'q2', 'q3', 'q5_1']
        
        # Create basic stack
        stack = Stack(name=project_name)
        stack.add_data(project_name, example_data_A_data, example_data_A_meta)
        stack.add_link(
            data_keys=project_name,
            filters=['no_filter'],
            x=eda_minimum,
            y=['@'],
            views=QuantipyViews(['default', 'cbase', 'counts', 'c%']),
            weights=[None, 'weight_a', 'weight_b']
        )
    
        # Get list of views created
        views_present = stack.describe(index=['view'])
        
        # Test that weighted an unweighted versions of all basic views
        # were created
        self.assertIn('x|default|x:y|||default', views_present)
        self.assertIn('x|default|x:y||weight_a|default', views_present)
        self.assertIn('x|default|x:y||weight_b|default', views_present)
        
        self.assertIn('x|frequency|x:y|||cbase', views_present)
        self.assertIn('x|frequency|x:y||weight_a|cbase', views_present)
        self.assertIn('x|frequency|x:y||weight_b|cbase', views_present)
        
        self.assertIn('x|frequency||y||c%', views_present)
        self.assertIn('x|frequency||y|weight_a|c%', views_present)
        self.assertIn('x|frequency||y|weight_b|c%', views_present)
        
        self.assertIn('x|frequency||||counts', views_present)
        self.assertIn('x|frequency|||weight_a|counts', views_present)
        self.assertIn('x|frequency|||weight_b|counts', views_present)

        # Create a ViewMapper using the iterator object in a template
        xnets = ViewMapper(
            template={
                'method': QuantipyViews().frequency,
                'kwargs': {
                    'axis': 'x',
                    'groups': ['Nets'],
                    'iterators': {
                        'rel_to': [None, 'y'],
                        'weights': [None, 'weight_a']
                    }
                }
            })
        
        # Add a method to the xnets ViewMapper, then use it to generate additional
        # views which include N/c% and unweighted/weighted
        xnets.add_method(name='ever', kwargs={'text': 'Ever', 'logic': [1, 2]})
        stack.add_link(x='q2b', y=['@'], views=xnets.subset(['ever']))
        
        # Get list of views created
        views_present = stack.describe(index=['view'])
        
        # Test that the expected views were all created
        self.assertIn('x|frequency|x[(1,2)]:y|||ever', views_present)
        self.assertIn('x|frequency|x[(1,2)]:y|y||ever', views_present)
        self.assertIn('x|frequency|x[(1,2)]:y||weight_a|ever', views_present)
        self.assertIn('x|frequency|x[(1,2)]:y|y|weight_a|ever', views_present)
        
        # Add another method to the xnets ViewMapper, but then override the weights
        # in the iterator object using the stack.add_link(weights) parameter
        stack.add_link(x='q2b', y=['@'], views=xnets.subset(['ever']), weights='weight_b')
        
        # Get list of views created
        views_present = stack.describe(index=['view'])
        
        # Test that the expected views were all created
        self.assertIn('x|frequency|x[(1,2)]:y||weight_b|ever', views_present)
        self.assertIn('x|frequency|x[(1,2)]:y|y|weight_b|ever', views_present)
        
        # Add two methods and apply them at the same time, make sure all expected iterations 
        # of both were created
        xnets.add_method(name='ever (multi test)', kwargs={'text': 'Ever', 'logic': [1, 2]})
        xnets.add_method(name='never (multi test)', kwargs={'text': 'Never', 'logic': [2, 3]})
        stack.add_link(x='q2b', y=['@'], views=xnets.subset(['ever (multi test)', 'never (multi test)']))
        
        # Get list of views created
        views_present = stack.describe(index=['view'])
        
        # Test that the expected views were all created
        self.assertIn('x|frequency|x[(1,2)]:y|||ever (multi test)', views_present)
        self.assertIn('x|frequency|x[(1,2)]:y|y||ever (multi test)', views_present)
        self.assertIn('x|frequency|x[(1,2)]:y||weight_a|ever (multi test)', views_present)
        self.assertIn('x|frequency|x[(1,2)]:y|y|weight_a|ever (multi test)', views_present)
        self.assertIn('x|frequency|x[(2,3)]:y|||never (multi test)', views_present)
        self.assertIn('x|frequency|x[(2,3)]:y|y||never (multi test)', views_present)
        self.assertIn('x|frequency|x[(2,3)]:y||weight_a|never (multi test)', views_present)
        self.assertIn('x|frequency|x[(2,3)]:y|y|weight_a|never (multi test)', views_present)
        
        # Add two methods and apply them at the same time, make sure all expected iterations 
        # of both were created, in this case that the weights arg for stack.add_link() overrides
        # what the iterator object is asking for
        xnets.add_method(name='ever (weights test)', kwargs={'text': 'Ever', 'logic': [1, 2]})
        xnets.add_method(name='never (weights test)', kwargs={'text': 'Never', 'logic': [2, 3]})
        stack.add_link(
            x='q2b', y=['@'], 
            views=xnets.subset(['ever (weights test)', 'never (weights test)']), 
            weights=['weight_b']
        )
        
        # Get list of views created
        views_present = stack.describe(index=['view'])
        
        # Test that the expected views were all created
        self.assertNotIn('x|frequency|x[(1,2)]:y|||ever (weights test)', views_present)
        self.assertNotIn('x|frequency|x[(1,2)]:y|y||ever (weights test)', views_present)
        self.assertIn('x|frequency|x[(1,2)]:y||weight_b|ever (weights test)', views_present)
        self.assertIn('x|frequency|x[(1,2)]:y|y|weight_b|ever (weights test)', views_present)
        self.assertNotIn('x|frequency|x[(2,3)]:y|||never (weights test)', views_present)
        self.assertNotIn('x|frequency|x[(2,3)]:y|y||never (weights test)', views_present)
        self.assertIn('x|frequency|x[(2,3)]:y||weight_b|never (weights test)', views_present)
        self.assertIn('x|frequency|x[(2,3)]:y|y|weight_b|never (weights test)', views_present)
Beispiel #16
0
    def test_iterations_object(self):

        # Set up path to example files
        path_tests = self.path
        project_name = 'Example Data (A)'

        # Load Example Data (A) data and meta
        name_data = '%s.csv' % (project_name)
        path_data = '%s%s' % (path_tests, name_data)
        example_data_A_data = pd.DataFrame.from_csv(path_data)

        name_meta = '%s.json' % (project_name)
        path_meta = '%s%s' % (path_tests, name_meta)
        example_data_A_meta = load_json(path_meta)

        # Variables by type for Example Data A
        eda_int = [
            'record_number', 'unique_id', 'age', 'birth_day', 'birth_month'
        ]
        eda_float = ['weight', 'weight_a', 'weight_b']
        eda_single = ['gender', 'locality', 'ethnicity', 'religion', 'q1']
        eda_delimited_set = ['q2', 'q3', 'q8', 'q9']
        eda_string = ['q8a', 'q9a']
        eda_date = ['start_time', 'end_time']
        eda_time = ['duration']
        eda_array = ['q5', 'q6', 'q7']
        eda_minimum = ['q2b', 'Wave', 'q2', 'q3', 'q5_1']

        # Create basic stack
        stack = Stack(name=project_name)
        stack.add_data(project_name, example_data_A_data, example_data_A_meta)
        stack.add_link(data_keys=project_name,
                       filters=['no_filter'],
                       x=eda_minimum,
                       y=['@'],
                       views=QuantipyViews(
                           ['default', 'cbase', 'counts', 'c%']),
                       weights=[None, 'weight_a', 'weight_b'])

        # Get list of views created
        views_present = stack.describe(index=['view'])

        # Test that weighted an unweighted versions of all basic views
        # were created
        self.assertIn('x|default|x:y|||default', views_present)
        self.assertIn('x|default|x:y||weight_a|default', views_present)
        self.assertIn('x|default|x:y||weight_b|default', views_present)

        self.assertIn('x|frequency|x:y|||cbase', views_present)
        self.assertIn('x|frequency|x:y||weight_a|cbase', views_present)
        self.assertIn('x|frequency|x:y||weight_b|cbase', views_present)

        self.assertIn('x|frequency||y||c%', views_present)
        self.assertIn('x|frequency||y|weight_a|c%', views_present)
        self.assertIn('x|frequency||y|weight_b|c%', views_present)

        self.assertIn('x|frequency||||counts', views_present)
        self.assertIn('x|frequency|||weight_a|counts', views_present)
        self.assertIn('x|frequency|||weight_b|counts', views_present)

        # Create a ViewMapper using the iterator object in a template
        xnets = ViewMapper(
            template={
                'method': QuantipyViews().frequency,
                'kwargs': {
                    'axis': 'x',
                    'groups': ['Nets'],
                    'iterators': {
                        'rel_to': [None, 'y'],
                        'weights': [None, 'weight_a']
                    }
                }
            })

        # Add a method to the xnets ViewMapper, then use it to generate additional
        # views which include N/c% and unweighted/weighted
        xnets.add_method(name='ever', kwargs={'text': 'Ever', 'logic': [1, 2]})
        stack.add_link(x='q2b', y=['@'], views=xnets.subset(['ever']))

        # Get list of views created
        views_present = stack.describe(index=['view'])

        # Test that the expected views were all created
        self.assertIn('x|frequency|x[(1,2)]:y|||ever', views_present)
        self.assertIn('x|frequency|x[(1,2)]:y|y||ever', views_present)
        self.assertIn('x|frequency|x[(1,2)]:y||weight_a|ever', views_present)
        self.assertIn('x|frequency|x[(1,2)]:y|y|weight_a|ever', views_present)

        # Add another method to the xnets ViewMapper, but then override the weights
        # in the iterator object using the stack.add_link(weights) parameter
        stack.add_link(x='q2b',
                       y=['@'],
                       views=xnets.subset(['ever']),
                       weights='weight_b')

        # Get list of views created
        views_present = stack.describe(index=['view'])

        # Test that the expected views were all created
        self.assertIn('x|frequency|x[(1,2)]:y||weight_b|ever', views_present)
        self.assertIn('x|frequency|x[(1,2)]:y|y|weight_b|ever', views_present)

        # Add two methods and apply them at the same time, make sure all expected iterations
        # of both were created
        xnets.add_method(name='ever (multi test)',
                         kwargs={
                             'text': 'Ever',
                             'logic': [1, 2]
                         })
        xnets.add_method(name='never (multi test)',
                         kwargs={
                             'text': 'Never',
                             'logic': [2, 3]
                         })
        stack.add_link(x='q2b',
                       y=['@'],
                       views=xnets.subset(
                           ['ever (multi test)', 'never (multi test)']))

        # Get list of views created
        views_present = stack.describe(index=['view'])

        # Test that the expected views were all created
        self.assertIn('x|frequency|x[(1,2)]:y|||ever (multi test)',
                      views_present)
        self.assertIn('x|frequency|x[(1,2)]:y|y||ever (multi test)',
                      views_present)
        self.assertIn('x|frequency|x[(1,2)]:y||weight_a|ever (multi test)',
                      views_present)
        self.assertIn('x|frequency|x[(1,2)]:y|y|weight_a|ever (multi test)',
                      views_present)
        self.assertIn('x|frequency|x[(2,3)]:y|||never (multi test)',
                      views_present)
        self.assertIn('x|frequency|x[(2,3)]:y|y||never (multi test)',
                      views_present)
        self.assertIn('x|frequency|x[(2,3)]:y||weight_a|never (multi test)',
                      views_present)
        self.assertIn('x|frequency|x[(2,3)]:y|y|weight_a|never (multi test)',
                      views_present)

        # Add two methods and apply them at the same time, make sure all expected iterations
        # of both were created, in this case that the weights arg for stack.add_link() overrides
        # what the iterator object is asking for
        xnets.add_method(name='ever (weights test)',
                         kwargs={
                             'text': 'Ever',
                             'logic': [1, 2]
                         })
        xnets.add_method(name='never (weights test)',
                         kwargs={
                             'text': 'Never',
                             'logic': [2, 3]
                         })
        stack.add_link(x='q2b',
                       y=['@'],
                       views=xnets.subset(
                           ['ever (weights test)', 'never (weights test)']),
                       weights=['weight_b'])

        # Get list of views created
        views_present = stack.describe(index=['view'])

        # Test that the expected views were all created
        self.assertNotIn('x|frequency|x[(1,2)]:y|||ever (weights test)',
                         views_present)
        self.assertNotIn('x|frequency|x[(1,2)]:y|y||ever (weights test)',
                         views_present)
        self.assertIn('x|frequency|x[(1,2)]:y||weight_b|ever (weights test)',
                      views_present)
        self.assertIn('x|frequency|x[(1,2)]:y|y|weight_b|ever (weights test)',
                      views_present)
        self.assertNotIn('x|frequency|x[(2,3)]:y|||never (weights test)',
                         views_present)
        self.assertNotIn('x|frequency|x[(2,3)]:y|y||never (weights test)',
                         views_present)
        self.assertIn('x|frequency|x[(2,3)]:y||weight_b|never (weights test)',
                      views_present)
        self.assertIn('x|frequency|x[(2,3)]:y|y|weight_b|never (weights test)',
                      views_present)