Exemple #1
0
    def to_representation(self, instance):
        ret = super(TableauDataSerializer, self).to_representation(instance)
        if 'json' in ret:
            ret = ret['json']
            # Remove metadata fields from the instance
            remove_metadata_fields(ret)

        return ret
Exemple #2
0
    def get_tableau_column_headers(self):
        '''
        Retrieve columns headers that are valid in tableau.
        '''
        tableau_colulmn_headers = []

        def append_to_tableau_colulmn_headers(header, question_type=None):
            quest_type = 'string'
            if question_type:
                quest_type = question_type

            # alias can be updated in the future to question labels
            tableau_colulmn_headers.append({
                'id': header,
                'dataType': quest_type,
                'alias': header
            })
        # Remove metadata fields from the column headers
        # Calling set to remove duplicates in group data
        xform_headers = set(remove_metadata_fields(self.xform_headers))

        # using nested loops to determine what valid data types to set for
        # tableau.
        for header in xform_headers:
            for quest_name, quest_type in self.flattened_dict.items():
                if header == quest_name or header.endswith('_%s' % quest_name):
                    append_to_tableau_colulmn_headers(header, quest_type)
                    break
            else:
                if header == '_id':
                    append_to_tableau_colulmn_headers(header, "int")
                else:
                    append_to_tableau_colulmn_headers(header)

        return tableau_colulmn_headers
Exemple #3
0
def process_tableau_data(data, xform):
    """
    Streamlines the row header fields
    with the column header fields for the same form.
    Handles Flattenning repeat data for tableau
    """
    def get_ordered_repeat_value(key, item, index):
        """
        Return Ordered Dict of repeats in the order in which they appear in
        the XForm.
        """
        index_tags = DEFAULT_INDEX_TAGS
        children = xform.get_child_elements(key, split_select_multiples=False)
        item_list = OrderedDict()
        data = {}

        for elem in children:
            if not question_types_to_exclude(elem.type):
                new_xpath = elem.get_abbreviated_xpath()
                item_list[new_xpath] = item.get(new_xpath, DEFAULT_NA_REP)
                # Loop through repeat data and flatten it
                # given the key "children/details" and nested_key/
                # abbreviated xpath "children/details/immunization/polio_1",
                # generate ["children", index, "immunization/polio_1"]
                for (nested_key, nested_val) in item_list.items():
                    xpaths = [
                        '{key}{open_tag}{index}{close_tag}'.format(
                            key=nested_key.split('/')[0],
                            open_tag=index_tags[0],
                            index=index,
                            close_tag=index_tags[1])] + [
                                nested_key.split('/')[1]]
                    xpaths = "/".join(xpaths)
                    data[xpaths] = nested_val
        return data

    result = []
    if data:
        headers = xform.get_headers()
        tableau_headers = remove_metadata_fields(headers)
        for row in data:
            diff = set(tableau_headers).difference(set(row))
            flat_dict = dict.fromkeys(diff, None)
            for (key, value) in row.items():
                if isinstance(value, list) and key not in [
                        ATTACHMENTS, NOTES, GEOLOCATION]:
                    for index, item in enumerate(value, start=1):
                        # order repeat according to xform order
                        item = get_ordered_repeat_value(key, item, index)
                        flat_dict.update(item)
                else:
                    flat_dict[key] = value
            result.append(flat_dict)
    return result
Exemple #4
0
def process_tableau_data(data, xform):
    """
    Streamlines the row header fields
    with the column header fields for the same form.
    Handles Flattenning repeat data for tableau
    """
    def get_xpath(key, nested_key):
        val = nested_key.split('/')
        nested_key_diff = val[len(key.split('/')):]
        xpaths = key + f'[{index}]/' + '/'.join(nested_key_diff)
        return xpaths

    def get_updated_data_dict(key, value, data_dict):
        """
        Generates key, value pairs for select multiple question types.
        Defining the new xpaths from the
        question name(key) and the choice name(value)
        in accordance with how we generate the tableau schema.
        """
        if isinstance(value, str) and data_dict:
            choices = value.split(" ")
            for choice in choices:
                xpaths = f'{key}/{choice}'
                data_dict[xpaths] = choice
        elif isinstance(value, list):
            try:
                for item in value:
                    for (nested_key, nested_val) in item.items():
                        xpath = get_xpath(key, nested_key)
                        data_dict[xpath] = nested_val
            except AttributeError:
                data_dict[key] = value

        return data_dict

    def get_ordered_repeat_value(key, item, index):
        """
        Return Ordered Dict of repeats in the order in which they appear in
        the XForm.
        """
        children = xform.get_child_elements(key, split_select_multiples=False)
        item_list = OrderedDict()
        data = {}

        for elem in children:
            if not question_types_to_exclude(elem.type):
                new_xpath = elem.get_abbreviated_xpath()
                item_list[new_xpath] = item.get(new_xpath, DEFAULT_NA_REP)
                # Loop through repeat data and flatten it
                # given the key "children/details" and nested_key/
                # abbreviated xpath "children/details/immunization/polio_1",
                # generate ["children", index, "immunization/polio_1"]
                for (nested_key, nested_val) in item_list.items():
                    qstn_type = xform.get_element(nested_key).type
                    xpaths = get_xpath(key, nested_key)
                    if qstn_type == MULTIPLE_SELECT_TYPE:
                        data = get_updated_data_dict(xpaths, nested_val, data)
                    elif qstn_type == REPEAT_SELECT_TYPE:
                        data = get_updated_data_dict(xpaths, nested_val, data)
                    else:
                        data[xpaths] = nested_val
        return data

    result = []
    if data:
        headers = xform.get_headers()
        tableau_headers = remove_metadata_fields(headers)
        for row in data:
            diff = set(tableau_headers).difference(set(row))
            flat_dict = dict.fromkeys(diff, None)
            for (key, value) in row.items():
                if isinstance(value, list) and key not in [
                        ATTACHMENTS, NOTES, GEOLOCATION
                ]:
                    for index, item in enumerate(value, start=1):
                        # order repeat according to xform order
                        item = get_ordered_repeat_value(key, item, index)
                        flat_dict.update(item)
                else:
                    try:
                        qstn_type = xform.get_element(key).type
                        if qstn_type == MULTIPLE_SELECT_TYPE:
                            flat_dict = get_updated_data_dict(
                                key, value, flat_dict)
                        if qstn_type == 'geopoint':
                            parts = value.split(' ')
                            gps_xpaths = \
                                DataDictionary.get_additional_geopoint_xpaths(
                                    key)
                            gps_parts = dict([(xpath, None)
                                              for xpath in gps_xpaths])
                            if len(parts) == 4:
                                gps_parts = dict(zip(gps_xpaths, parts))
                                flat_dict.update(gps_parts)
                        else:
                            flat_dict[key] = value
                    except AttributeError:
                        flat_dict[key] = value

            result.append(flat_dict)
    return result