async def get_clan_members(self, clan_ids, sorted_by=None): username = Case(ClanMember.platform_id, ((constants.PLATFORM_XBOX, Member.xbox_username), (constants.PLATFORM_PSN, Member.psn_username), (constants.PLATFORM_BLIZZARD, Member.psn_username), (constants.PLATFORM_STEAM, Member.steam_username), (constants.PLATFORM_STADIA, Member.stadia_username))) query = Member.select( Member, ClanMember, Clan, username.alias('username')).join( ClanMember).join(Clan).where(Clan.clan_id << clan_ids) if sorted_by == 'join_date': query = query.order_by(ClanMember.join_date) elif sorted_by == 'username': query = query.order_by(username) return await self.execute(query)
def sequential(*included_interviews, included_properties=None, client_as_numeric=True): """ datasets.sequential(*included_interviews, included_properties) -> pandas.DataFrame Builds a sequential dataset with including those interviews specified in included_interviews and the properties specified in included_properties :param included_interviews: sequence of interviews to be included in the dataset :param included_properties: Sequence of CodingProperty whose data is to be included (can be ID as well) :return: pandas.DataFrame """ # No need to include properties twice included_properties = sorted(set(included_properties)) property_ctes = [] display_names = [] property_cases = [] cast_columns = [] property_query = UtteranceCode.select(UtteranceCode.utterance_id, PropertyValue.pv_value, CodingProperty.cp_data_type) \ .join(PropertyValue)\ .join(CodingProperty) STR_MSNG = '-999999999999999' NUM_MSNG = -999999999999999 client_column = Cast( Interview.client_id, "INT").alias('client_id') if client_as_numeric else Interview.client_id # The dataset construction needs to be atomic to avoid race conditions with UtteranceCode._meta.database.atomic() as transaction: # Having the display name will be required for giving columns useful names, so fetch them cp_dict = { itm[0]: ( itm[1], itm[2], ) for itm in CodingProperty.select( CodingProperty.coding_property_id, CodingProperty. cp_display_name, CodingProperty.cp_data_type).where( CodingProperty.coding_property_id.in_( included_properties)).tuples().execute() } # Need a CTE for each property whose data is to be included, so construct queries and convert to CTE # Need to conditionally create a CAST expression as well because some properties are Numeric, some are STR for prop_pk in included_properties: cp_display_name, cp_data_type = cp_dict.get( int(prop_pk), (None, None)) if cp_display_name is None: logging.warning( f"CodingProperty with id of {prop_pk} not found. This data will not be included..." ) continue if cp_data_type == 'numeric': cast_columns.append(cp_display_name) cte = property_query.where(PropertyValue.coding_property_id == prop_pk)\ .cte(f"cte_{cp_display_name}", columns=['utterance_id', cp_display_name, 'cp_data_type']) data_field = getattr(cte.c, cp_display_name) property_ctes.append(cte) pc = Case(None, ((data_field.is_null(), STR_MSNG), ), data_field) property_cases.append(pc) display_names.append(cp_display_name) # The outer query will select the Utterances of the interview. # any CTE will match on the Utterannce.utterance_id field and insert the appropriate fields with codes # outer query needs to include the fields of the CTE as well, so start there basic_query = Interview.select(Interview.interview_name, Interview.rater_id, client_column, Interview.session_number, Utterance.utt_line, Utterance.utt_enum, Utterance.utt_role, *(Cast(pc, "FLOAT").alias(name) if name in cast_columns else pc.alias(name) for name, pc in zip(display_names, property_cases)), Utterance.utt_text, Utterance.utt_start_time, Utterance.utt_end_time)\ .join(Utterance) # Once the basic query is constructed, the joins need to be added into the query # so that the fields of the CTE can be queried property for name, cte in zip(display_names, property_ctes): basic_query = basic_query.join( cte, JOIN.LEFT_OUTER, on=(Utterance.utterance_id == cte.c.utterance_id)) # Final step of query preparation is to add in the CTE themselves and narrow the results basic_query = basic_query.with_cte(*property_ctes) basic_query = basic_query.where(Interview.interview_name.in_(included_interviews))\ .order_by(client_column, Interview.session_number, Utterance.utt_enum) results = basic_query.tuples().execute() columns = [itm[0] for itm in results.cursor.description] df = pandas.DataFrame(data=results, columns=columns).replace([NUM_MSNG, STR_MSNG], [NaN, '']) return df