예제 #1
0
    async def get_clan_members(self, clan_ids, sorted_by=None):
        username = Case(ClanMember.platform_id,
                        ((constants.PLATFORM_XBOX, Member.xbox_username),
                         (constants.PLATFORM_PSN, Member.psn_username),
                         (constants.PLATFORM_BLIZZARD, Member.psn_username),
                         (constants.PLATFORM_STEAM, Member.steam_username),
                         (constants.PLATFORM_STADIA, Member.stadia_username)))

        query = Member.select(
            Member, ClanMember, Clan, username.alias('username')).join(
                ClanMember).join(Clan).where(Clan.clan_id << clan_ids)

        if sorted_by == 'join_date':
            query = query.order_by(ClanMember.join_date)
        elif sorted_by == 'username':
            query = query.order_by(username)
        return await self.execute(query)
예제 #2
0
def sequential(*included_interviews,
               included_properties=None,
               client_as_numeric=True):
    """
    datasets.sequential(*included_interviews, included_properties) -> pandas.DataFrame
    Builds a sequential dataset with including those interviews specified in included_interviews and the
    properties specified in included_properties
    :param included_interviews: sequence of interviews to be included in the dataset
    :param included_properties: Sequence of CodingProperty whose data is to be included (can be ID as well)
    :return: pandas.DataFrame
    """

    # No need to include properties twice
    included_properties = sorted(set(included_properties))
    property_ctes = []
    display_names = []
    property_cases = []
    cast_columns = []
    property_query = UtteranceCode.select(UtteranceCode.utterance_id, PropertyValue.pv_value,
                                          CodingProperty.cp_data_type) \
        .join(PropertyValue)\
        .join(CodingProperty)

    STR_MSNG = '-999999999999999'
    NUM_MSNG = -999999999999999

    client_column = Cast(
        Interview.client_id,
        "INT").alias('client_id') if client_as_numeric else Interview.client_id

    # The dataset construction needs to be atomic to avoid race conditions
    with UtteranceCode._meta.database.atomic() as transaction:

        # Having the display name will be required for giving columns useful names, so fetch them
        cp_dict = {
            itm[0]: (
                itm[1],
                itm[2],
            )
            for itm in CodingProperty.select(
                CodingProperty.coding_property_id, CodingProperty.
                cp_display_name, CodingProperty.cp_data_type).where(
                    CodingProperty.coding_property_id.in_(
                        included_properties)).tuples().execute()
        }

        # Need a CTE for each property whose data is to be included, so construct queries and convert to CTE
        # Need to conditionally create a CAST expression as well because some properties are Numeric, some are STR
        for prop_pk in included_properties:
            cp_display_name, cp_data_type = cp_dict.get(
                int(prop_pk), (None, None))

            if cp_display_name is None:
                logging.warning(
                    f"CodingProperty with id of {prop_pk} not found. This data will not be included..."
                )
                continue

            if cp_data_type == 'numeric': cast_columns.append(cp_display_name)

            cte = property_query.where(PropertyValue.coding_property_id == prop_pk)\
                                .cte(f"cte_{cp_display_name}", columns=['utterance_id', cp_display_name, 'cp_data_type'])
            data_field = getattr(cte.c, cp_display_name)
            property_ctes.append(cte)

            pc = Case(None, ((data_field.is_null(), STR_MSNG), ), data_field)
            property_cases.append(pc)
            display_names.append(cp_display_name)

        # The outer query will select the Utterances of the interview.
        # any CTE will match on the Utterannce.utterance_id field and insert the appropriate fields with codes
        # outer query needs to include the fields of the CTE as well, so start there
        basic_query = Interview.select(Interview.interview_name, Interview.rater_id, client_column,
                                       Interview.session_number, Utterance.utt_line,
                                       Utterance.utt_enum, Utterance.utt_role,
                                       *(Cast(pc, "FLOAT").alias(name) if name in cast_columns
                                         else pc.alias(name) for name, pc in zip(display_names, property_cases)),
                                       Utterance.utt_text, Utterance.utt_start_time, Utterance.utt_end_time)\
                               .join(Utterance)

        # Once the basic query is constructed, the joins need to be added into the query
        # so that the fields of the CTE can be queried property
        for name, cte in zip(display_names, property_ctes):
            basic_query = basic_query.join(
                cte,
                JOIN.LEFT_OUTER,
                on=(Utterance.utterance_id == cte.c.utterance_id))

        # Final step of query preparation is to add in the CTE themselves and narrow the results
        basic_query = basic_query.with_cte(*property_ctes)

        basic_query = basic_query.where(Interview.interview_name.in_(included_interviews))\
            .order_by(client_column, Interview.session_number, Utterance.utt_enum)

        results = basic_query.tuples().execute()
        columns = [itm[0] for itm in results.cursor.description]
        df = pandas.DataFrame(data=results,
                              columns=columns).replace([NUM_MSNG, STR_MSNG],
                                                       [NaN, ''])

    return df