Beispiel #1
0
 def _add_oe(oe, break_by, title, drop_empty, incl_nan, filter_by):
     columns = break_by + oe
     oe_data = self._data.copy()
     if self.filter != 'no_filter':
         ds = qp.DataSet('open_ends')
         ds.from_components(oe_data, self._meta)
         slicer = ds.take(self.filter.values()[0])
         oe_data = oe_data.loc[slicer, :]
     if filter_by:
         ds = qp.DataSet('open_ends')
         ds.from_components(oe_data, self._meta)
         slicer = ds.take(filter_by)
         oe_data = oe_data.loc[slicer, :]
     oe_data = oe_data[columns]
     oe_data.replace('__NA__', np.NaN, inplace=True)
     if replacements:
         for target, repl in replacements.items():
             oe_data.replace(target, repl, inplace=True)
     if drop_empty:
         oe_data.dropna(subset=oe, how='all', inplace=True)
     if not incl_nan:
         for col in oe:
             oe_data[col].replace(np.NaN, '', inplace=True)
     self.verbatims[title] = oe_data
     self.verbatim_names.extend(oe)
Beispiel #2
0
 def test_read_spss_readstat(self):
     dataset_v1 = qp.DataSet('spss')
     dataset_v1.read_spss(
         'tests/Example Data (A) - with multi choice q2.sav')
     dataset = qp.DataSet('spss')
     dataset.read_spss('tests/Example Data (A) - with multi choice q2.sav',
                       engine='readstat')
     # the label of the set is lost as this engine doesn't support delimited sets
     dataset.to_delimited_set('q2', dataset_v1.text('q2'),
                              dataset.find('q2_'))
     self.assertTrue(dataset.meta('q2').shape == (8, 3))
     assert dataset.crosstab('q2').equals(dataset_v1.crosstab('q2'))
     assert dataset.crosstab('q2b').equals(dataset_v1.crosstab('q2b'))
Beispiel #3
0
 def _load_ds(self, name):
     path_json = '{}/{}.json'.format(self.path, name)
     path_csv = '{}/{}.csv'.format(self.path, name)
     dataset = qp.DataSet(name, self._dimensions_comp)
     dataset.set_verbose_infomsg(False)
     dataset.read_quantipy(path_json, path_csv)
     return dataset
Beispiel #4
0
def test_read_from_api():
    dataset_from_api = qp.DataSet("confirmit")
    dataset_from_api.read_confirmit_api(projectid="p913481003361",
                                        public_url="https://ws.euro.confirmit.com/",
                                        idp_url="https://idp.euro.confirmit.com/",
                                        client_id="71a15e5d-b52d-4534-b54b-fa6e2a9da8a7",
                                        client_secret="2a943d4d-58ab-42b8-a276-53d07ad34064",
                                        schema_vars='status,q39,q21',
                                        schema_filter="response:status='complete'"
                                        )
    print(dataset_from_api.meta('q39'))
    assert dataset_from_api.crosstab('q39').shape == (3, 1)
    print(dataset_from_api.meta('q21'))
    assert dataset_from_api.crosstab('q21').shape == (6, 1)
    print(dataset_from_api.crosstab('q39', 'q21'))
    assert dataset_from_api.crosstab('q39', 'q21').shape == (3, 5)
    assert dataset_from_api.meta()['columns']['q39'] == json.loads("""
    {"name": "q39",
    "parent": {},
    "type": "single",
    "values": [
        {"text": {"en": "yes"},
        "value": 1},
        {"text": {"en": "no"},
        "value": 2}],
    "text": {"en": "Use script to set values"}}""")
Beispiel #5
0
 def _get_dataset(self):
     path = os.path.dirname(os.path.abspath(__file__)) + '/'
     name = 'Example Data (A)'
     casedata = '{}.csv'.format(name)
     metadata = '{}.json'.format(name)
     dataset = qp.DataSet(name)
     dataset.set_verbose_infomsg(False)
     dataset.read_quantipy(path + metadata, path + casedata)
     return dataset
Beispiel #6
0
def test_writer_to_api():
    dataset = qp.DataSet("confirmit")
    response = dataset.write_confirmit_api(projectid="p913481003361",
                                           public_url="https://ws.euro.confirmit.com/",
                                           idp_url="https://idp.euro.confirmit.com/",
                                           client_id="71a15e5d-b52d-4534-b54b-fa6e2a9da8a7",
                                           client_secret="2a943d4d-58ab-42b8-a276-53d07ad34064",
                                           schema_vars=["q7", "q9", "q11"])

    assert response.status_code == 200
    assert b'insertedRecords' in response.content
    assert b'updatedRecords' in response.content
    print(response.content)
Beispiel #7
0
def process_file():
    try:         
        uid = str(uuid.uuid4())  # use uid to track multiple instances in separate tabs/windows
        # reset and initialize session
        session[uid] = {}
        session[uid]['active'] = True
        file = request.files['file']  # .sav file user selected
        filename = file.filename
        print("name of file: " + filename)
        session[uid]['filename'] = filename
        # create temp directory if it does not exist
        if not os.path.isdir('./temp'):
            print('Creating temp directory')
            os.system(f"mkdir {'./temp'}")
        file.save('./temp/%s' %(filename))
        print("Temporary file saved")


        # read dataset from saved .sav file
        ds = qp.DataSet('data')
        ds.read_spss('./temp/' + filename, ioLocale=None, detect_dichot=False)
        

        # get file metadata
        meta_data = ds.meta()['columns'].values()
        meta_data = list(iter(meta_data))  ## try to remove this to make upload faster

        if ('uuid' not in ds.meta()['columns']):
            return {'success': 'false', 'message': 'This file does not have a uuid variable.'}

        return {'success': 'true', 'uid': uid, 'meta_data_array': meta_data, 'meta_data_obj': ds.meta()['columns']}

    except Exception as e:
        print(e)
        print(sys.exc_info()[0])
        print(traceback.print_exc())
        return {'success': 'false'}
Beispiel #8
0
    def to_dataset(self, mode=None, from_set="data file", additions="sort_within",
                   manifest_edits="keep", integrate_rc=(["_rc", "_rb"], True),
                   misc=["RecordNo", "caseid", "identity"]):
        """
        Create a qp.DataSet instance out of the batch settings.

        Parameters
        ----------
        mode: list of str {'x', 'y', 'v', 'oe', 'w', 'f'}
            Variables to keep.
        from_set: str or list of str, default None
            Set name or a list of variables to sort against.
        additions: str {'sort_within, sort_between', False}
            Add variables from additional batches.
        manifest_edits: str {'keep', 'apply', False}
            Keep meta from edits or apply rules.
        """
        batches = self._meta['sets']['batches']
        adds = batches[self.name]['additions']

        # prepare variable list
        if not mode:
            mode = ['x', 'y', 'v', 'oe', 'w', 'f']
        vlist = self._get_vlist(batches[self.name], mode)
        if additions == "sort_between":
            for add in adds:
                vlist += self._get_vlist(batches[add], mode)
        if not from_set:
            from_set = vlist
        vlist = self.align_order(vlist, from_set, integrate_rc, fix=misc)
        if additions == "sort_within":
            for add in adds:
                add_list = self._get_vlist(batches[add], mode)
                add_list = self.align_order(add_list, from_set, integrate_rc,
                                            fix=misc)
                vlist += add_list
        vlist = self.de_duplicate(vlist)
        vlist = self.roll_up(vlist)

        # handle filters
        merge_f = False
        f = self.filter
        if adds:
            filters = [self.filter] + [batches[add]['filter'] for add in adds]
            filters = [fi for fi in filters if fi]
            if len(filters) == 1:
                f = filters[0]
            elif not self.compare_filter(filters[0], filters[1:]):
                f = "merge_filter"
                merge_f = filters
            else:
                f = filters[0]

        # create ds
        ds = qp.DataSet(self.name, self._dimensions_comp)
        ds.from_components(self._data.copy(), org_copy.deepcopy(self._meta),
                           True, self.language)
        for b in ds.batches():
            if not (b in adds or b == ds.name):
                del ds._meta['sets']['batches'][b]

        if merge_f:
            ds.merge_filter(f, filters)
            if not manifest_edits:
                vlist.append(f)
        if f and manifest_edits:
            ds.filter(self.name, {f: 0}, True)
            if merge_f:
                ds.drop(f)

        ds.create_set(str(self.name), included=vlist, overwrite=True)
        ds.subset(from_set=self.name, inplace=True)
        ds.order(vlist)

        # manifest edits
        if manifest_edits in ['apply', 'keep']:
            b_meta = batches[self.name]['meta_edits']
            for v in ds.variables():
                if ds.is_array(v) and b_meta.get(v):
                    ds._meta['masks'][v] = b_meta[v]
                    try:
                        ds._meta['lib']['values'][v] = b_meta['lib'][v]
                    except:
                        pass
                elif b_meta.get(v):
                    ds._meta['columns'][v] = b_meta[v]
                if manifest_edits == "apply" and not ds._is_array_item(v):
                    for axis in ['x', 'y']:
                        if all(rule in ds._get_rules(v, axis)
                               for rule in ['dropx', 'slicex']):
                            drops = ds._get_rules(v, axis)['dropx']['values']
                            slicer = ds._get_rules(v, axis)['slicex']['values']
                        elif 'dropx' in ds._get_rules(v, axis):
                            drops = ds._get_rules(v, axis)['dropx']['values']
                            slicer = ds.codes(v)
                        elif 'slicex' in ds._get_rules(v, axis):
                            drops = []
                            slicer = ds._get_rules(v, axis)['slicex']['values']
                        else:
                            drops = slicer = []
                        if drops or slicer:
                            if not all(isinstance(c, int) for c in drops):
                                item_no = [ds.item_no(d) for d in drops]
                                ds.remove_items(v, item_no)
                            else:
                                codes = ds.codes(v)
                                n_codes = [c for c in slicer if not c in drops]
                                if not len(n_codes) == len(codes):
                                    remove = [c for c in codes
                                              if not c in n_codes]
                                    ds.remove_values(v, remove)
                                ds.reorder_values(v, n_codes)
                                if ds.is_array(v):
                                    ds._meta['masks'][v].pop('rules')
                                else:
                                    ds._meta['columns'][v].pop('rules')
        return ds
Beispiel #9
0
 def test_read_spss(self):
     dataset = qp.DataSet('spss')
     dataset.read_spss('tests/Example Data (A) - with multi choice q2.sav')
     self.assertTrue(dataset.meta('q2').shape == (8, 3))
Beispiel #10
0
def dataset():
    _dataset = qp.DataSet(NAME_PROJ, dimensions_comp=False)
    _dataset.read_quantipy(PATH_META, PATH_DATA)
    yield _dataset.split()
    del _dataset
def main(spss_filename):
    """
        Purpose: Run program from terminal
        Parameters:
            spss_filename - name of .sav file, extension not included
        Returns: None
    """

    ### OPEN SPSS DATA
    # edit main() based on desired weighting scheme
    ds = qp.DataSet(spss_filename)
    # ds.read_spss('./' + spss_filename, ioLocale=None, detect_dichot=False)
    ds.read_spss(
        'C://Users//Jamie Smart//Dropbox (Latitude)//Active Projects//AETN - Lifetime - KFC (10478)//Fieldwork//data management//'
        + spss_filename,
        ioLocale=None,
        detect_dichot=False)

    scheme = create_scheme()

    ### ADD TARGETS TO SCHEME
    tv_only_targets = []
    tv_social_targets = []

    #add_target(name, variable, dictionary_of_targets, target_list_to_add_to)
    add_target("age_targets", "S1_RC", {1: 46.6, 2: 53.4}, tv_only_targets)
    add_target("age_targets", "S1_RC", {1: 66.0, 2: 34.0}, tv_social_targets)
    add_target("gender_targets", "S2", {1: 32.4, 2: 67.6}, tv_only_targets)
    add_target("gender_targets", "S2", {1: 17.0, 2: 83.0}, tv_social_targets)
    add_target("ethnicity_targets", "Ethnicity", {
        1: 80.4,
        2: 4.7,
        3: 8.8,
        4: 6.1
    }, tv_only_targets)
    add_target("ethnicity_targets", "Ethnicity", {
        1: 54.2,
        2: 10.5,
        3: 17.0,
        4: 18.3
    }, tv_social_targets)
    add_target("usage_targets", "A10r1", {0: 54.1, 1: 45.9}, tv_only_targets)
    add_target("usage_targets", "A10r1", {0: 35.9, 1: 64.1}, tv_social_targets)
    add_target("frequency_targets", "A11_KFC", {
        0: 73.0,
        1: 27.0
    }, tv_only_targets)
    add_target("frequency_targets", "A11_KFC", {
        0: 52.9,
        1: 47.1
    }, tv_social_targets)
    # apply_targets(scheme, tv_only_targets)
    # apply_targets(scheme, tv_social_targets)

    ds_group = ds[[
        'CellSocial', 'S1_RC', 'S2', 'Ethnicity', 'A10r1', 'A11_KFC'
    ]]
    add_group(ds_group, scheme, "tv only primary", "CellSocial", "2",
              tv_only_targets)
    add_group(ds_group, scheme, "tv only secondary", "CellSocial", "3",
              tv_only_targets)
    add_group(ds_group, scheme, "tv+social primary", "CellSocial", "5",
              tv_social_targets)
    add_group(ds_group, scheme, "tv+social secondary", "CellSocial", "6",
              tv_social_targets)

    ds.weight(scheme, weight_name="weight", unique_key='uuid')
    ds["weight"].fillna(1, inplace=True)

    check_weights(ds, ['S1_RC', 'S2', 'Ethnicity', 'A10r1', 'A11_KFC'],
                  group='CellSocial')

    save_file(
        ds,
        'C://Users//Jamie Smart//Dropbox (Latitude)//Active Projects//AETN - Lifetime - KFC (10478)//Fieldwork//data management//'
        + spss_filename + '_weighted.sav')
    save_syntax_file(
        ds,
        'C://Users//Jamie Smart//Dropbox (Latitude)//Active Projects//AETN - Lifetime - KFC (10478)//Fieldwork//data management//'
        + spss_filename + '_syntax.sps')
def weight_data(variables,
                mapping,
                grouping,
                file_name,
                weight_name="weight",
                unique_key="uuid"):
    """
        Purpose: Takes variables, groupings and uses them to weight the dataset
        Parameters:
            variables - array of variables to use for weighting
            mapping - dict with weighting targets for each variable in variables array
            grouping - None or a metadata dict for a variable to group by
            file_name - name of original data file
            weight_name - name for weight variable
        Returns: 
            file_location - location to save weighted file to
            syntax_location - location to save syntax file to
            crosstabs - unweighted and weighted crosstabs of variables used to set targets
            report - weighting summary
    """

    # text description of weighting scheme
    weighting_desc = "* Weighting targets: " + str(
        mapping) + "\n* Grouping variables: " + (str(grouping['name']) if
                                                 grouping else "None") + "\n\n"

    ds = qp.DataSet('data')
    ds.read_spss('./temp/' + file_name, ioLocale=None, detect_dichot=False)
    #meta, data = read_spss('./temp/' + file_name, ioLocale=None, detect_dichot=False)

    scheme = create_scheme("rake_scheme")

    ### ADD TARGETS TO SCHEME
    all_targets = []
    for i in range(len(variables)):  # loop through target variables
        target_var = variables[i]
        target_dict = mapping[target_var]

        str_keys = list(target_dict.keys())
        for key in str_keys:  # loop through keys and replace string key with integer key
            int_key = int(key)
            val_copy = target_dict[key]
            del target_dict[key]
            target_dict[int_key] = val_copy

        add_target("targets_" + str(i), target_var, target_dict, all_targets)

    apply_targets(scheme, all_targets)

    if grouping:
        grouping_var = grouping['name']
        for v in grouping['values']:
            val = v['value']
            label = list(v['text'].values())[0]
            ds_minified = ds[variables + [
                grouping_var
            ]]  ## create smaller version of ds to pass to add_group function
            add_group(ds_minified, scheme, label, grouping_var, val,
                      all_targets)
        grouping = grouping_var
    else:
        grouping = None  # set for crosstabs

    ds.weight(scheme, weight_name=weight_name, unique_key=unique_key)
    ds[weight_name].fillna(1, inplace=True)
    ds.meta()['measureLevels'][weight_name] = 'scale'

    ### Relabel weight
    weight_label_key = list(ds.meta()['columns'][weight_name]['text'])[
        0]  ## get key from weight dict
    ds.meta()['columns'][weight_name]['text'][weight_label_key] = weight_name

    ### Create name for saved weighted dataset using the time it was created
    dt = str(datetime.now()).replace(':', '').replace('.', '')
    file_location = './temp/' + file_name.replace(
        ".sav", "") + "_" + dt.replace(" ", "_") + '_weighted.sav'
    syntax_location = './temp/' + file_name.replace(
        ".sav", "") + "_" + dt.replace(" ", "_") + '_weight.sps'

    crosstabs = check_weights(ds,
                              variables,
                              group=grouping,
                              weight=weight_name)

    report = generate_report(scheme)

    save_file(ds, file_location)
    save_syntax_file(ds, syntax_location, weight_name, unique_key,
                     weighting_desc)

    return file_location, syntax_location, crosstabs, report
def confirmit_dataset():
    dataset = qp.DataSet("confirmit")
    dataset.read_confirmit_from_files('tests/confirmit_meta.json',
                                      'tests/confirmit_data.json')
    return dataset
def quantipy_dataset():
    dataset = qp.DataSet("quantipy test data")
    dataset.read_quantipy('tests/Example Data (A).json',
                          'tests/Example Data (A).csv')
    return dataset
Beispiel #15
0
weighting_collumn = 'weighting'
if (len(sys.argv) > 1):
    data_dir = sys.argv[1]
    weighting_collumn = sys.argv[2]
weights_filename = 'weights_def.json'
results_filename = 'results.json'
schema_filename = 'schema.json'
data_filename = 'data.json'

schema_file = os.path.join(data_dir, schema_filename)
data_file = os.path.join(data_dir, data_filename)
weights_file = os.path.join(data_dir, weights_filename)
results_file = os.path.join(data_dir, 'results', results_filename)

# READ DATA AND SCHEMA
dataset = qp.DataSet("test")
dataset.read_confirmit_from_files(schema_file, data_file)

# READ WEIGHTS DEF
with open(weights_file, "r") as weights_json:
    weights_def = json.load(weights_json)

# DEFINE WEIGHTS
scheme = qp.Rim('w')
group_targets = {}
for group_def in weights_def:
    var_targets = []
    for target_def in group_def['targets']:
        targets = dict()
        for var_target_def in target_def['targets']:
            targets.update({var_target_def['code']: var_target_def['target']})
Beispiel #16
0
def confirmit_dataset_verbose():
    dataset = qp.DataSet("confirmit")
    dataset.read_confirmit_from_files('tests/confirmit_meta.json',
                                      'tests/confirmit_data.json', verbose=True
                                      )
    return dataset
Beispiel #17
0
def dataset():
    ds = qp.DataSet(NAME_PROJ, dimensions_comp=False)
    ds.read_quantipy(PATH_META, PATH_DATA)
    meta, data = ds.split()
    return meta, data.head(250)