Ejemplo n.º 1
0
    def _get_multi_choice_data(
            self, question_metadata: QuestionMetadata
    ) -> Series:

        # merge multi-choice questions to single column
        level_0 = self.survey_data.columns.get_level_values(0).to_list()
        level_1 = self.survey_data.columns.get_level_values(1).to_list()
        if question_metadata.expression is None:
            match_cols = [(l0, l1) for l0, l1 in zip(level_0, level_1)
                          if l0 == question_metadata.text]
        else:
            match_cols = [(l0, l1) for l0, l1 in zip(level_0, level_1)
                          if match(question_metadata.expression, l0)]
        if len(match_cols) == 0:
            if question_metadata.expression is None:
                raise ValueError(
                    f'Could not find any columns matching '
                    f'"{question_metadata.text}"'
                )
            else:
                raise ValueError(
                    f'Could not match expression '
                    f'"{question_metadata.expression}" '
                    f'for MultiChoice question "{question_metadata}"'
                )
        return Series(
            name=match_cols[0][0],
            index=self.survey_data.index,
            data=self.survey_data[match_cols].apply(
                         lambda row: CATEGORY_SPLITTER.join(
                             row.dropna().astype(str)
                         ) if len(row.dropna()) else nan,
                axis=1)
        )
Ejemplo n.º 2
0
 def replace_item(item: str):
     if isnull(item):
         return nan
     items = item.split(CATEGORY_SPLITTER)
     items = [
         replacements[item] if item in replacements.keys() else item
         for item in items
     ]
     return CATEGORY_SPLITTER.join(items)
Ejemplo n.º 3
0
        def rank_choices(value: str):

            choices_ranks = value.split(' | ')
            choices = [rank_choice.split(':')[0]
                       for rank_choice in choices_ranks]
            ranks = [int(rank_choice.split(':')[1])
                     for rank_choice in choices_ranks]
            ranked_choices = [choice for rank, choice in
                              sorted(zip(ranks, choices))]
            return CATEGORY_SPLITTER.join(
                str(choice) for choice in ranked_choices
            )
Ejemplo n.º 4
0
 def get_selected(item: str):
     if isnull(item):
         return nan
     selected = []
     while len(item) > 0:
         for category in categories:
             if item == category:
                 selected.append(category)
                 item = ''
             elif item[: len(category) + 2] == category + '; ':
                 selected.append(category)
                 item = item[len(category) + 2:]
     return CATEGORY_SPLITTER.join(selected)
Ejemplo n.º 5
0
    def _get_multi_choice_data(self,
                               question_metadata: QuestionMetadata) -> Series:

        # merge multi-choice questions to single column
        if question_metadata.expression is None:
            raise ValueError('Need a regular expression to match '
                             'MultiChoice question columns.')
        match_cols = [
            c for c in self.survey_data.columns
            if match(question_metadata.expression, c)
        ]
        if len(match_cols) == 0:
            raise ValueError(
                f'Could not match expression "{question_metadata.expression}" '
                f'for MultiChoice question "{question_metadata}"')
        return self.survey_data[match_cols].apply(
            lambda row: CATEGORY_SPLITTER.join(row.dropna().astype(str)),
            axis=1)
Ejemplo n.º 6
0
    def _get_multi_choice_data(
            self, question_metadata: QuestionMetadata
    ) -> Series:

        # get data
        data: DataFrame = self.survey_data[question_metadata.text]
        # replace other values
        if question_metadata.text in self.questions_metadata['text'].to_list():
            # this if condition excludes repeated questions that have had their
            # text changed
            def replace_other(value):
                if isnull(value):
                    return value
                else:
                    if value in categories:
                        return value
                    else:
                        return 'Other'

            metadata: Series = self.questions_metadata.loc[
                self.questions_metadata['text'] == question_metadata.text
            ].iloc[0]
            if 'other' in metadata.keys() and metadata['other'] == True:
                # do replacement
                category_name = metadata['categories']
                categories = self.orders_metadata.loc[
                    self.orders_metadata['category'] == category_name,
                    'value'
                ].to_list()
                data = data.applymap(replace_other)
                # update categories in orders_metadata
                self.orders_metadata = self.orders_metadata.append(
                    Series({'category': question_metadata.name,
                            'value': 'Other'}),
                    ignore_index=True
                )
        # merge multi-choice questions to single column
        return data.apply(
            lambda row: CATEGORY_SPLITTER.join(row.dropna().astype(str)),
            axis=1
        )
Ejemplo n.º 7
0
    def _get_ranked_choice_data(self,
                                question_metadata: QuestionMetadata) -> list:

        # merge ranked choice questions to single column
        if question_metadata.expression is None:
            raise ValueError('Need a regular expression to match '
                             'RankedChoice question columns.')
        match_cols = [
            c for c in self.survey_data.columns
            if match(question_metadata.expression, c)
        ]
        choices = [self._get_ranked_choice_name(c) for c in match_cols]
        # create column in new dataframe
        new_answers = []
        for _, row in self.survey_data[match_cols].iterrows():
            # order choices by rank
            ranks = row.tolist()
            ranked_choices = [
                choice for rank, choice in sorted(zip(ranks, choices))
            ]
            ranked_choices = CATEGORY_SPLITTER.join(
                str(choice) for choice in ranked_choices)
            new_answers.append(ranked_choices)
        return new_answers
Ejemplo n.º 8
0
    def stack(self, name: str,
              drop_na: bool = True,
              null_category: Optional[str] = None,
              name_index: Optional[str] = None,
              key_index: Optional[str] = None,
              number_index: Optional[str] = None,
              **kwargs):
        """
        Stack the responses to each question in the group into a new question.

        :param name: Name for the new question.
        :param drop_na: For MultiChoiceQuestions, whether to drop rows where the
                        respondent was not asked the question.
        :param null_category: For MultiChoiceQuestions, optional response to
                              exclude from the new question group.
        :param name_index: Name of a new index column to create with values
                           corresponding to the name of the question the data
                           comes from.
        :param key_index: Name of a new index column to create with values
                          corresponding to the question's key in the group.
        :param number_index: Name of a new index column to create with values
                             corresponding to the number of the question the
                             data comes from.
        :param kwargs: Optional new attribute values to override in the new
                       question.
        """
        if name == '':
            raise ValueError('Name cannot be empty.')
        # create index names
        index_names = [self._questions[0].data.index.name]
        if name_index is not None:
            index_names.append(name_index)
        if key_index is not None:
            index_names.append(key_index)
        if number_index is not None:
            index_names.append(number_index)

        question_datas = []
        question: MultiChoiceQuestion

        for question in self._questions:
            # create data
            question_data = question.make_features(
                naming='{{choice}}', drop_na=drop_na,
                null_category=null_category
            )
            # create index
            index_list = question_data.index.to_list()
            if name_index is not None:
                name_list = [question.name] * len(question_data)
            else:
                name_list = None
            if key_index is not None:
                key_list = [
                    [k for k in self._item_dict.keys()
                     if self._item_dict[k] == question][0]
                ] * len(question_data)
            else:
                key_list = None
            if number_index is not None:
                number_list = [
                    self._questions.index(question)
                ] * len(question_data)
            else:
                number_list = None
            if name_list is None and key_list is None and number_list is None:
                question_data.index = Index(data=index_list,
                                            name=index_names[0])
            else:
                index_tuples = list(zip(*[
                    ix_list for ix_list in [index_list, name_list,
                                            key_list, number_list]
                    if ix_list is not None
                ]))
                question_data.index = MultiIndex.from_tuples(
                    tuples=index_tuples, names=index_names
                )
            question_datas.append(question_data)
        new_data = concat(question_datas, axis=0)
        new_data = DataFrame({
            column: column_data.replace({1: column, 0: ''})
            for column, column_data in new_data.iteritems()
        })
        new_data = Series(
            data=[
                CATEGORY_SPLITTER.join(row.replace('', nan).dropna().to_list())
                for _, row in new_data.iterrows()
            ],
            index=new_data.index, name=name
        )

        # copy question
        new_question = copy(self._questions[0])
        new_question.name = name
        new_question._data = new_data
        if isinstance(new_question, MultiChoiceQuestion):
            if null_category is not None:
                new_question._categories = [
                    c for c in new_question._categories
                    if c != null_category
                ]
        for kw, arg in kwargs.items():
            setattr(new_question, kw, arg)
        return new_question