def user_pull(): """ Pull app creators Mixpanel information. Parameters ---------- Global Variables ---------- api_creator_secret: Mixpanel Creator Project API key. Use to make API calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe containing app creator information from Mixpanel. """ #generate JQL query query_user = JQL(api_creator_secret, people=People({'user_selectors': [{}] })).group_by(keys=[ "e.properties.$email", "e.properties.$username", "e.properties.$distinct_id", "e.properties.hs_mrr" ], accumulator=Reducer.count()) #initialize list to track emails, user IDs, distinct IDs, and mrr email_list = [] user_id_list = [] distinct_id_list = [] hs_mrr_list = [] #process query results for row in query_user.send(): email_list.append(row['key'][0]) user_id_list.append(row['key'][1]) distinct_id_list.append(row['key'][2]) hs_mrr_list.append(row['key'][3]) #create dataframe data = { 'email': email_list, 'user_id': user_id_list, 'distinct_id': distinct_id_list, 'hs_mrr': hs_mrr_list } df_users = pd.DataFrame(data=data) return df_users
def AppStart_pull(from_date, to_date): """ Pull app user AppStart events in Mixpanel. Parameters ---------- from_date: date Start date of query. to_date: date End date of query. Global Variables ---------- api_user_secret: str Client secret used to make calls to Mixpanel User Project. Returns ---------- dataframe Dataframe contains user IDs and AppStart event count for app users in Mixpanel. """ #generate JQL query query = JQL(api_user_secret, events=Events({ 'event_selectors': [{ 'event': "AppStart" }], 'from_date': from_date, 'to_date': to_date })).group_by(keys=["e.properties.zUserId"], accumulator=Reducer.count()) #initalize lists to record user ID and AppStarts user_id_list = [] AppStart_list = [] #process query results for row in query.send(): if row['key'][0] is not None: user_id_list.append(int(row['key'][0])) AppStart_list.append(row['value']) #generate dataframe data = {'user_id': user_id_list, 'AppStart': AppStart_list} df_AppStart = pd.DataFrame(data) df_AppStart = df_AppStart.dropna() df_AppStart.user_id = df_AppStart.user_id.astype(int) return df_AppStart
def get_new_signup(yesterday): """ Get yesterday new signup. Parameters ---------- yesterday: date Yesterday's date. Global Variables ---------- api_creator_secret: Mixpanel Creator Project API key. Use to make API calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe containing user IDs and emails of creators who signed up yesterday. """ # New Signup Web query = JQL(api_creator_secret, events=Events({ 'event_selectors': [{ 'event': "New Signup Web" }], 'from_date': yesterday, 'to_date': yesterday }), people=People({'user_selectors': []}), join_params={ 'type': 'full', 'selectors': [{ 'event': "New Signup Web" }] }) #store email, user id, and sign up events email_list = [] userid_list = [] for row in query.send(): if 'user' in list(row.keys()): if '$username' in list(row['user']['properties'].keys()): userid_list.append(row['user']['properties']['$username']) if '$email' in list(row['user']['properties'].keys()): email_list.append(row['user']['properties']['$email']) #create dataframe data = {'user_id': userid_list, 'email': email_list} df_new_users = pd.DataFrame(data=data) return df_new_users
def New_Signup_Web_pull(from_date, to_date): """ Pull app creator sign up events in Mixpanel. Parameters ---------- from_date: date Start date of query. to_date: date End date of query. Global Variables ---------- api_creator_secret: str Client secret used to make calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe contains user IDs and new sign up event count for app creators in Mixpanel. """ #generate JQL query query = JQL(api_creator_secret, events=Events({ 'event_selectors': [{ 'event': "New Signup Web" }], 'from_date': from_date, 'to_date': to_date })).group_by( keys=[ "e.properties.userId", #use userId ], accumulator=Reducer.count()) #initialize lists to record user IDs and New Signup Web user_id_list = [] new_sign_up_list = [] for row in query.send(): if row['key'][0] is not None: user_id_list.append(int(row['key'][0])) new_sign_up_list.append(row['value']) #generate dataframe data = {'user_id': user_id_list, 'new_sign_up': new_sign_up_list} df_New_Signup_Web = pd.DataFrame(data) return df_New_Signup_Web
def creator_pull(): """ Pull app creators' info in Mixpanel. Parameters ---------- Global Variables ---------- api_creator_secret: str Client secret used to make calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe app creators info in Mixpanel. """ #generate JQL query query = JQL(api_creator_secret, people=People({'user_selectors': [{}]})).group_by(keys=[ "e.properties.$email", "e.properties.$username", "e.properties.hs_owner" ], accumulator=Reducer.count()) #store emails, user IDs, and user journey stages in lists email_list = [] user_id_list = [] hs_owner_list = [] for row in query.send(): if row['key'][0] is not None: email_list.append(row['key'][0]) user_id_list.append(int(row['key'][1])) hs_owner_list.append(row['key'][2]) #create dataframe data = { 'email': email_list, 'user_id': user_id_list, 'hs_owner': hs_owner_list } df_creators = pd.DataFrame(data=data) #only keep users with HubSpot owner field. Indicating that it exists in hubspot df_creators = df_creators[~df_creators.hs_owner.isnull()] return df_creators
def app_creator_pull(): """ Pull app creators' info from Mixpanel. Parameters ---------- Global Variables ---------- api_creator_secret: str Client secret used to make calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe app creators info in Mixpanel. """ #generate JQL query query_user = JQL(api_creator_secret, people=People({'user_selectors': [{}] })).group_by(keys=[ "e.properties.$email", "e.properties.$username", "e.properties.$distinct_id", "e.properties.active_milestone" ], accumulator=Reducer.count()) #store emails, user IDs, distinct_id, active_milestone in lists email_list = [] user_id_list = [] distinct_id_list = [] active_milestone_list = [] for row in query_user.send(): if row['key'][1] is not None: email_list.append(row['key'][0]) user_id_list.append(int(row['key'][1])) distinct_id_list.append(row['key'][2]) active_milestone_list.append(row['key'][3]) #create dataframe data = { 'email': email_list, 'app_owner_id': user_id_list, 'distinct_id': distinct_id_list, 'active_milestone': active_milestone_list } df_creators = pd.DataFrame(data=data) return df_creators
def user_pull(): """ Pull app creators' info in Mixpanel. Parameters ---------- Global Variables ---------- api_creator_secret: str Client secret used to make calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe contains app creators info in Mixpanel. """ #generate JQL query query = JQL(api_creator_secret, people=People({'user_selectors': [{}]})).group_by(keys=[ "e.properties.$email", "e.properties.company_domain", "e.properties.$distinct_id" ], accumulator=Reducer.count()) #initiate list to store emails, user IDs, and company domains email_list = [] company_domain_list = [] distinct_id_list = [] #process query results for row in query.send(): email_list.append(row['key'][0]) company_domain_list.append(row['key'][1]) distinct_id_list.append(row['key'][2]) #create dataframe data = { 'email': email_list, 'company_domain': company_domain_list, 'distinct_id': distinct_id_list } df_users = pd.DataFrame(data=data) return df_users
def creator_pull(): """ Pull app creators' info in Mixpanel. Parameters ---------- Global Variables ---------- api_creator_secret: str Client secret used to make calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe contains app creators info in Mixpanel. """ #generate JQL query query = JQL(api_creator_secret, people=People({'user_selectors': [{}]})).group_by(keys=[ "e.properties.$email", "e.properties.$username", "e.properties.user_journey_stage" ], accumulator=Reducer.count()) #initialize lists to store user IDs, email, and journey stage email_list = [] user_id_list = [] user_journey_stage_list = [] #process query results for row in query.send(): email_list.append(row['key'][0]) user_id_list.append(row['key'][1]) user_journey_stage_list.append(row['key'][2]) #create dataframe data = { 'email': email_list, 'creator_user_id': user_id_list, 'user_journey_stage': user_journey_stage_list } df_creators = pd.DataFrame(data=data) return df_creators
def creator_pull(): """ Pull app creators' info in Mixpanel. Parameters ---------- Global Variables ---------- api_creator_secret: str Client secret used to make calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe app creators info in Mixpanel. """ #generate JQL query query_user = JQL( api_creator_secret, people=People({ 'user_selectors': [{ } ] }) ).group_by( keys=[ "e.properties.$distinct_id", "e.properties.$username"], accumulator=Reducer.count() ) #store emails, user IDs, and user journey stages in lists distinct_id_list = [] user_id_list = [] for row in query_user.send(): distinct_id_list.append(row['key'][0]) user_id_list.append(row['key'][1]) #create dataframe data = {'distinct_id': distinct_id_list, 'user_id': user_id_list} df_creators = pd.DataFrame(data=data) return df_creators
def pull_creators(): """ Pull app creators Mixpanel information. Parameters ---------- Global Variables ---------- api_creator_secret: Mixpanel Creator Project API key Use to make API calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe containing app creator information from Mixpanel. """ #generate JQL query query_category = JQL( api_creator_secret, people=People({'user_selectors': [{}]})).group_by( keys=["e.properties.$email", "e.properties.$distinct_id"], accumulator=Reducer.count()) #initialize lists to store emails and distinct IDs email_list = [] distinct_id_list = [] #process query response for row in query_category.send(): email_list.append(row['key'][0]) distinct_id_list.append(row['key'][1]) #create dataframe data = {'email': email_list, 'mixpanel_distinct_id': distinct_id_list} df_creators_mixpanel = pd.DataFrame(data=data) #remove creators with missing information df_creators_mixpanel = df_creators_mixpanel.dropna() return df_creators_mixpanel
def user_pull(): """ Pull app creators Mixpanel information. Parameters ---------- Global Variables ---------- api_creator_secret: Mixpanel Creator Project API key. Use to make API calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe containing app creator information from Mixpanel. """ #generate JQL query query_user = JQL(api_creator_secret, people=People({'user_selectors': [{}] })).group_by(keys=[ "e.properties.$email", "e.properties.$username" ], accumulator=Reducer.count()) #initialize lists to record app creator information email_list = [] user_id_list = [] for row in query_user.send(): email_list.append(row['key'][0]) user_id_list.append(row['key'][1]) #create dataframe data = {'email': email_list, 'user_id': user_id_list} df_users = pd.DataFrame(data=data) return df_users
def creator_pull(): """ Pull app creators' info in Mixpanel. Parameters ---------- Global Variables ---------- api_creator_secret: str Client secret used to make calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe app creators info in Mixpanel. """ #generate JQL query query_user = JQL(api_creator_secret, people=People({'user_selectors': [{}] })).group_by(keys=[ "e.properties.$email", "e.properties.$unsubscribed" ], accumulator=Reducer.count()) #store emails, user IDs, and user journey stages in lists email_list = [] unsubscribed_list = [] for row in query_user.send(): if ((row['key'][0] is not None) & (row['key'][1] is not None) ): #only keep accounts with both email and unsubscribe status email_list.append(row['key'][0]) unsubscribed_list.append(row['key'][1]) #create dataframe data = {'email': email_list, 'unsubscribed': unsubscribed_list} df_creators = pd.DataFrame(data=data) return df_creators
def app_user_pull(): """ Pull app users' information in Mixpanel. Parameters ---------- Global Variables ---------- api_user_secret: str Client secret used to make calls to Mixpanel User Project. Returns ---------- dataframe Dataframe contains app users' information in Mixpanel. """ #generate JQL query query_user = JQL(api_user_secret, people=People({ 'user_selectors': [{}] })).group_by(keys=["e.properties.$email", "e.properties.$username"], accumulator=Reducer.count()) #initialize lists to record email and user ID email_list = [] user_id_list = [] #process query results for row in query_user.send(): email_list.append(row['key'][0]) user_id_list.append(row['key'][1]) #create dataframe data = {'user_email': email_list, 'app_user_id': user_id_list} df_app_users = pd.DataFrame(data=data) return df_app_users
def creator_pull(): #generate JQL query query = JQL(api_creator_secret, people=People({'user_selectors': [{}]})).group_by( keys=["e.properties.$email", "e.properties.$distinct_id"], accumulator=Reducer.count()) #store emails, user IDs, and user journey stages in lists email_list = [] distinct_id_list = [] for row in query.send(): if row['key'][0] is not None: email_list.append(row['key'][0]) distinct_id_list.append(row['key'][1]) #create dataframe data = {'email': email_list, 'distinct_id': distinct_id_list} df_creators = pd.DataFrame(data=data) return df_creators
def user_appstart_pull(from_date, to_date): """ Pull app user AppStart events in Mixpanel. Parameters ---------- from_date: date Start date of query. to_date: date End date of query. Global Variables ---------- api_user_secret: str Client secret used to make calls to Mixpanel User Project. Returns ---------- df_user_AppStart: dataframe Dataframe contains app creator user ID and number of app users. """ #generate JQL query query = JQL(api_user_secret, events=Events({ 'event_selectors': [{ 'event': "AppStart" }], 'from_date': from_date, 'to_date': to_date })).group_by( keys=["e.properties.zUserId", "e.properties.zAppOwnerId"], accumulator=Reducer.count()) #initialize lists to record app user IDs, app creator IDs, app name, and number of AppStarts app_user_id_list = [] owner_id_list = [] AppStart_list = [] #process query results for row in query.send(): if (row['key'][0] is not None) & (row['key'][1] is not None): app_user_id_list.append(int(row['key'][0])) owner_id_list.append(int(row['key'][1])) AppStart_list.append(row['value']) #generate email data = { 'app_user_id': app_user_id_list, 'creator_id': owner_id_list, 'AppStart': AppStart_list } df_AppStart = pd.DataFrame(data) #make sure IDs are valid df_user_AppStart = df_AppStart[(df_AppStart.app_user_id > 1) & (df_AppStart.creator_id > 1)] return df_user_AppStart
def EditorAction_pull(from_date, to_date): """ Pull app creator EditorAction-Save events in Mixpanel. Parameters ---------- from_date: date Start date of query. to_date: date End date of query. Global Variables ---------- api_creator_secret: str Client secret used to make calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe contains app creator emails, number of EditorAction events, and zEvent = Save filter. """ #generate JQL query query = JQL(api_creator_secret, events=Events({ 'event_selectors': [{ 'event': "EditorAction" }], 'from_date': from_date, 'to_date': to_date })).group_by( keys=["e.properties.zUserEmail", "e.properties.zEvent"], accumulator=Reducer.count()) #initialize lists to record emails, EditorAction events, and zEvent user_email_list = [] EditorAction_list = [] zevent_list = [] #process query results for row in query.send(): if row['key'][0] is not None: user_email_list.append(row['key'][0]) zevent_list.append(row['key'][1]) EditorAction_list.append(row['value']) #create dataframe data = { 'email': user_email_list, 'zevent': zevent_list, 'EditorAction': EditorAction_list } df_editor_action = pd.DataFrame(data) #filter to only include "Save" editor events df_editor_action = df_editor_action[df_editor_action.zevent == 'Save'] return df_editor_action
def app_user_pull(): """ Pull app users' info in Mixpanel. Parameters ---------- Global Variables ---------- api_user_secret: str Client secret used to make calls to Mixpanel User Project. Returns ---------- dataframe Dataframe contains app users info in Mixpanel. """ #generate JQL query query = JQL(api_user_secret, people=People( {'user_selectors': [{}]})).group_by( keys=[ "e.properties.$email", "e.properties.$username", "e.properties.$distinct_id", "e.properties.creator", #pulling current creator status "e.properties.active_user" ], #pulling current active user status accumulator=Reducer.count()) #initialize lists to record email, user ID, creator status, active user status, and distinct ID email_list = [] user_id_list = [] creator_list = [] active_user_list = [] distinct_id_list = [] #process query results for row in query.send(): email_list.append(row['key'][0]) user_id_list.append(row['key'][1]) distinct_id_list.append(row['key'][2]) creator_list.append(row['key'][3]) active_user_list.append(row['key'][4]) #create dataframe data = { 'email': email_list, 'app_user_id': user_id_list, 'distinct_id': distinct_id_list, 'creator_current': creator_list, 'active_user_current': active_user_list } df_app_users = pd.DataFrame(data=data) df_app_users = df_app_users.dropna() df_app_users.app_user_id = df_app_users.app_user_id.astype(int) return df_app_users
def get_users_stats_from_mixpanel(user_data_dict, is_single_user=False, include_stats=False): """ This method will fetch user stats from MixPanel using JQL and Candidate Table using SQL :param user_data_dict: Dict containing data for all users in system :param is_single_user: Are we getting stats for a single user :param include_stats: Include statistics of user in response :return: Dict containing data for all users in system :rtype: dict """ if not include_stats: return user_data_dict if is_single_user: user_data_dict['candidates_count'] = 0 user_data_dict['logins_per_month'] = 0 user_data_dict['searches_per_month'] = 0 user_data_dict['campaigns_count'] = 0 user_data_dict['pipelines_count'] = 0 user_data_dict['emails_count'] = 0 else: for user_id, user_data in user_data_dict.iteritems(): user_data['candidates_count'] = 0 user_data['logins_per_month'] = 0 user_data['searches_per_month'] = 0 user_data['campaigns_count'] = 0 user_data['pipelines_count'] = 0 user_data['emails_count'] = 0 request_origin = request.environ.get('HTTP_ORIGIN', '') logger.info('Request Origin for users GET request is: %s', request_origin) if not request_origin: url_prefix = 'staging.gettalent' if app.config[ TalentConfigKeys.ENV_KEY] in ( TalentEnvs.QA, TalentEnvs.DEV, TalentEnvs.JENKINS) else 'app.gettalent' else: parsed_url = urlparse(request_origin) url_prefix = parsed_url.netloc to_date = datetime.utcnow() from_date = to_date - timedelta(days=30) if is_single_user: selector = '"{}" in properties["$current_url"] and properties["id"] == {}'.format( url_prefix, user_data_dict['id']) else: selector = '"{}" in properties["$current_url"]'.format(url_prefix) params = { 'event_selectors': [{ 'event': 'Login', 'selector': selector }, { 'event': 'Search', 'selector': selector }], 'from_date': str(from_date.date()), 'to_date': str(to_date.date()) } try: query = JQL(app.config[TalentConfigKeys.MIXPANEL_API_KEY], params).group_by(keys=["e.properties.id", "e.name"], accumulator=Reducer.count()) iterator = query.send() except Exception as e: logger.error( "Error while fetching user stats from MixPanel because: %s" % e.message) raise InvalidUsage("Error while fetching user stats") for row in iterator: user_dict_key = 'logins_per_month' if row['key'][ 1] == 'Login' else 'searches_per_month' if is_single_user and row['key'][0] == user_data_dict['id']: user_data_dict[user_dict_key] = row['value'] elif (not is_single_user) and (row['key'][0] in user_data_dict): user_data_dict[row['key'][0]][user_dict_key] = row['value'] # Get Candidate, Pipeline and Campaigns Stats of a User if is_single_user: user_data_dict['pipelines_count'] = TalentPipeline.query.filter_by( user_id=user_data_dict['id']).count() user_data_dict['campaigns_count'] = EmailCampaign.query.filter_by( user_id=user_data_dict['id']).count() user_data_dict['candidates_count'] = Candidate.query.filter_by( user_id=user_data_dict['id']).count() user_data_dict['emails_count'] = EmailCampaignSend.query.join( EmailCampaign).filter( EmailCampaign.user_id == user_data_dict['id']).count() else: users_candidate_count = db.session.query( Candidate.user_id, func.count(Candidate.user_id)).group_by(Candidate.user_id).all() users_pipelines_count = db.session.query( TalentPipeline.user_id, func.count(TalentPipeline.user_id)).group_by( TalentPipeline.user_id).all() users_campaigns_count = db.session.query( EmailCampaign.user_id, func.count(EmailCampaign.user_id)).group_by( EmailCampaign.user_id).all() users_emails_count = EmailCampaignSend.query.join( EmailCampaign).with_entities(EmailCampaign.user_id, func.count( EmailCampaignSend.id)).group_by( EmailCampaign.user_id).all() for user_id, count in users_candidate_count: if user_id in user_data_dict: user_data_dict[user_id]['candidates_count'] = count for user_id, count in users_pipelines_count: if user_id in user_data_dict: user_data_dict[user_id]['pipelines_count'] = count for user_id, count in users_campaigns_count: if user_id in user_data_dict: user_data_dict[user_id]['campaigns_count'] = count for user_id, count in users_emails_count: if user_id in user_data_dict: user_data_dict[user_id]['emails_count'] = count return user_data_dict
def New_Signup_Web_pull(from_date, to_date): """ Pull app creator sign up events in Mixpanel. Parameters ---------- from_date: date Start date of query. to_date: date End date of query. Global Variables ---------- api_creator_secret: str Client secret used to make calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe contains user IDs, sign up datetime, new sign up event count, and country for app creators in Mixpanel. """ #generate JQL query query = JQL(api_creator_secret, events=Events({ 'event_selectors': [{ 'event': "New Signup Web" }], 'from_date': from_date, 'to_date': to_date })).group_by(keys=[ "e.properties.userId", "new Date(e.time).toISOString()" ], accumulator=Reducer.count()) #store emails, user IDs, sign up datetime, and country user_id_list = [] datetime_list = [] new_sign_up_list = [] for row in query.send(): if row['key'][0] is not None: user_id_list.append(int(row['key'][0])) datetime_list.append( datetime.strptime(row['key'][1][:10], '%Y-%m-%d')) new_sign_up_list.append(row['value']) #generate dataframe data = { 'app_owner_id': user_id_list, 'date': datetime_list, 'new_sign_up': new_sign_up_list } df_New_Signup_Web = pd.DataFrame(data) #only keeping ones with actual IDs df_New_Signup_Web = df_New_Signup_Web[df_New_Signup_Web.app_owner_id > 1] return df_New_Signup_Web
def Editor_pull(from_date, to_date): """ Pull app creator Editor events in Mixpanel. Parameters ---------- from_date: date Start date of query. to_date: date End date of query. Global Variables ---------- api_creator_secret: str Client secret used to make calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe contains user IDs, Editor event datetime, and Editor event count for app creators in Mixpanel. """ #generate JQL query query = JQL(api_creator_secret, events=Events({ 'event_selectors': [{ 'event': "Editor" }], 'from_date': from_date, 'to_date': to_date })).group_by(keys=[ "e.properties.zUserId", "new Date(e.time).toISOString()" ], accumulator=Reducer.count()) #store user IDs, Editor count, and Editor datetime user_id_list = [] datetime_list = [] Editor_list = [] for row in query.send(): if row['key'][0] is not None: user_id_list.append(int(row['key'][0])) datetime_list.append( datetime.strptime(row['key'][1][:10], '%Y-%m-%d')) Editor_list.append(row['value']) #generate dataframe data = { 'user_id': user_id_list, 'Editor_datetime': datetime_list, 'Editor': Editor_list } df_Editor = pd.DataFrame(data) return df_Editor
def user_appstart_pull(from_date, to_date): """ Pull app user AppStart events in Mixpanel. Parameters ---------- from_date: date Start date of query. to_date: date End date of query. Global Variables ---------- api_user_secret: str Client secret used to make calls to Mixpanel User Project. Returns ---------- df_user_AppStart: dataframe Dataframe contains app creator user ID and number of app users. """ #generate JQL query query = JQL(api_user_secret, events=Events({ 'event_selectors': [{ 'event': "AppStart" }], 'from_date': from_date, 'to_date': to_date })).group_by(keys=[ "e.properties.zUserId", "e.properties.zAppOwnerId", "new Date(e.time).toISOString()" ], accumulator=Reducer.count()) #initialize lists to record app user IDs, app creator IDs, app name, and number of AppStarts app_user_id_list = [] owner_id_list = [] AppStart_list = [] date_list = [] #process query results for row in query.send(): if (row['key'][0] is not None) & (row['key'][1] is not None): app_user_id_list.append(int(row['key'][0])) owner_id_list.append(int(row['key'][1])) date_list.append(datetime.strptime(row['key'][2][:10], '%Y-%m-%d')) AppStart_list.append(row['value']) #generate email data = { 'date': date_list, 'app_user_id': app_user_id_list, 'user_id': owner_id_list, 'AppStart': AppStart_list } df_AppStart = pd.DataFrame(data) #make sure IDs are valid df_AppStart = df_AppStart[(df_AppStart.app_user_id > 1) & (df_AppStart.user_id > 1)] #remove duplicate users on the same day. Since it was pulled based on timestamp, AppStarts in one day can be on multiple roads df_AppStart = df_AppStart.drop_duplicates( ['user_id', 'date', 'app_user_id']) #get total users and list of user emails for each app creator df_user_AppStart = df_AppStart.groupby( ['user_id', 'date']).app_user_id.count().reset_index() df_user_AppStart = df_user_AppStart.rename( columns={'app_user_id': 'num_app_users'}) return df_user_AppStart
def New_Signup_App_pull(yesterday): """ Pull app user sign up events in Mixpanel. Parameters ---------- yesterday: date Yesterday's date. Global Variables ---------- api_user_secret: str Client secret used to make calls to Mixpanel User Project. Returns ---------- dataframe Dataframe contains user IDs, sign up datetime, and new sign up event count for app users in Mixpanel. """ #generate JQL query query = JQL(api_user_secret, events=Events({ 'event_selectors': [{ 'event': "New Signup App" }], 'from_date': yesterday, 'to_date': yesterday })).group_by(keys=[ "e.properties.zUserId", "new Date(e.time).toISOString()" ], accumulator=Reducer.count()) #initialize lists to store emails, user IDs, and sign up datetime user_id_list = [] datetime_list = [] new_sign_up_list = [] #process query results for row in query.send(): if row['key'][0] is not None: user_id_list.append(int(row['key'][0])) datetime_list.append( datetime.strptime(row['key'][1][:10], '%Y-%m-%d')) new_sign_up_list.append(row['value']) #generate dataframe data = { 'app_user_id': user_id_list, 'sign_up_datetime': datetime_list, 'new_signup_app': new_sign_up_list } df_New_Signup_App = pd.DataFrame(data) return df_New_Signup_App
def pull_usage(from_date, to_date): """ Pull usage events in Mixpanel. Parameters ---------- from_date: date Start date of query. to_date: date End date of query. Global Variables ---------- api_creator_secret: str Client secret used to make calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe contains app ID, user IDs, owner ID, and usage event count in Mixpanel. """ #generate JQL query query = JQL(api_creator_secret, events=Events({ 'event_selectors': [{ 'event': "Usage" }], 'from_date': from_date, 'to_date': to_date })).group_by( keys=["e.properties.OwnerId", "e.properties.UserId"], accumulator=Reducer.count()) #store app owner, app, and app user IDs app_owner_id_list = [] app_user_id_list = [] usage_list = [] for row in query.send(): if row['key'][0] is not None: app_owner_id_list.append(int(row['key'][0])) app_user_id_list.append(int(row['key'][1])) usage_list.append(row['value']) #generate dataframe data = { 'app_owner_id': app_owner_id_list, 'app_user_id': app_user_id_list, 'usage': usage_list } df_usage = pd.DataFrame(data) #only keep app owners and users with proper IDs df_usage = df_usage[(df_usage.app_owner_id > 1) & (df_usage.app_user_id > 1)] df_usage = df_usage[~df_usage.app_owner_id.isin( [10305, 71626])] #remove for demo accounts return df_usage
def user_appstart_pull(from_date, to_date, df_app_users): """ Pull app user AppStart events in Mixpanel. Parameters ---------- from_date: date Start date of query. to_date: date End date of query. Global Variables ---------- api_user_secret: str Client secret used to make calls to Mixpanel User Project. Returns ---------- df_user_AppStart: dataframe Dataframe contains app creator user ID, list of app user email domains, and number of app users df_top_appstarts: dataframe Dataframe contains app creator user ID and number of AppStarts from top apps """ #generate JQL query query = JQL(api_user_secret, events=Events({ 'event_selectors': [{ 'event': "AppStart" }], 'from_date': from_date, 'to_date': to_date })).group_by(keys=[ "e.properties.zUserId", "e.properties.zAppOwnerId", "e.properties.zAppName" ], accumulator=Reducer.count()) #initialize lists to record app user IDs, app creator IDs, app name, and number of AppStarts app_user_id_list = [] owner_id_list = [] app_name_list = [] AppStart_list = [] #process query results for row in query.send(): if row['key'][0] is not None: app_user_id_list.append(row['key'][0]) owner_id_list.append(row['key'][1]) app_name_list.append(row['key'][2]) AppStart_list.append(row['value']) #generate email data = { 'app_user_id': app_user_id_list, 'user_id': owner_id_list, 'app_name': app_name_list, 'AppStart': AppStart_list } df_AppStart = pd.DataFrame(data) #merge AppStart and app user dataframes to associate app user emails to number of AppStarts df_AppStart = pd.merge(df_AppStart, df_app_users, on='app_user_id', how='left') #get total users and list of user emails for each app creator df_user_AppStart = df_AppStart.groupby( ['user_id', 'app_user_id'])['app_name'].count().reset_index() df_user_AppStart = df_user_AppStart.groupby( 'user_id')['app_user_id'].count().reset_index() df_user_AppStart = df_user_AppStart.rename( columns={'app_user_id': 'num_app_users'}) #get app user domain list for each app creator df_AppStart_temp = df_AppStart.groupby( ['user_id', 'app_user_id', 'user_email'])['app_name'].count().reset_index() df_AppStart_temp['user_email_domains'] = df_AppStart_temp[ 'user_email'].str.split('@').str[1].fillna('') df_AppStart_temp = df_AppStart_temp[ df_AppStart_temp['user_email_domains'] != ''] df_user_domains_by_creators = df_AppStart_temp.groupby( 'user_id')['user_email_domains'].apply(list).reset_index() df_user_AppStart = pd.merge(df_user_AppStart, df_user_domains_by_creators, on='user_id', how='left') df_user_AppStart.loc[df_user_AppStart['user_email_domains'].isnull(), ['user_email_domains']] = df_user_AppStart.loc[ df_user_AppStart['user_email_domains'].isnull(), 'user_email_domains'].apply(lambda x: []) #only keep the app with the highest number of AppStarts for each app creator df_top_appstarts = df_AppStart.groupby( ['user_id', 'app_name']).AppStart.sum().reset_index().sort_values( 'AppStart', ascending=False).drop_duplicates('user_id', keep='first') df_top_appstarts = df_top_appstarts.rename( columns={'AppStart': 'appstart_by_top_app'}) return df_user_AppStart, df_top_appstarts