def get_timing_for_funnel(eventsfull: DataFrame, funnel: list, useResolvedUrls: bool) -> list: """Get a list of funnel step times (amounts of time users spend before navigating to next step) for a funnel :param eventsfull: full events DataFrame (that includes non-navigate events) :param funnel: funnel of interest :param useResolvedUrls: indicates whether original or resolved URLs should be used :return: list of funnel step times for each step """ funneltimes = [] for i in range(len(funnel)): funneltimes.append([]) events = analyze_clicks.remove_non_navigation(eventsfull) if useResolvedUrls: columnToUse = analyze_traffic.RESOLVEDURL else: columnToUse = analyze_traffic.PAGEURL if useResolvedUrls: url_regex_resolver.resolve_urls(events, manage_resolutions.get_regex_dict(), analyze_traffic.PAGEURL, analyze_traffic.RESOLVEDURL) si = analyze_traffic.build_session_index(events, columnToUse) sessFound = analyze_traffic.get_unordered_sessions_for_funnel(si, funnel) sessOrdered = analyze_traffic.get_sessions_with_ordered(events, sessFound, funnel, columnToUse, strict=True) for sid in sessOrdered: sess_df = events.loc[sid] indices = analyze_traffic.get_sublist_indices(funnel, sess_df[columnToUse].tolist(), True) for index in indices: timestamps = [] timespent = [] for i in range(len(funnel)): timestamps.append(sess_df.iloc[index + i].loc[EVENTSTART]) if i > 0: delta = timestamps[i] - timestamps[i - 1] funneltimes[i-1].append(delta.total_seconds()) return funneltimes
def get_funnel_stats(events: DataFrame, funnel: list, useResolvedUrls: bool, limit_rows: int = 0) -> list: """Get conversion statistics for a funnel :param events: events DataFrame :param funnel: funnel of interest :param useResolvedUrls: indicates whether original or resolved URLs should be used :param limit_rows: number of rows of events DataFrame to use (use all rows if 0) :return: sorted list of funnel conversions by step """ if useResolvedUrls: columnToUse = analyze_traffic.RESOLVEDURL else: columnToUse = analyze_traffic.PAGEURL if limit_rows != 0: events = events.head(limit_rows) if useResolvedUrls: url_regex_resolver.resolve_urls(events, manage_resolutions.get_regex_dict(), analyze_traffic.PAGEURL, analyze_traffic.RESOLVEDURL) si = analyze_traffic.build_session_index(events, columnToUse) funnelCounts = analyze_traffic.get_funnel_conversion_stats( events, si, funnel, columnToUse) funnelCounts = list(funnelCounts) return funnelCounts
def get_sessions_for_funnel( events: pd.DataFrame, funnel: list, useResolvedUrls: bool, OrgId: str = None, is_staging: bool = False, strict: bool = True, numSessions: int = 0, ) -> list: """Get a list of sessions where each session contains the specified funnel :param events: events DataFrame :param funnel: funnel of interest :param useResolvedUrls: indicates whether original or resolved URLs should be used :param OrgId: FullStory OrgId for the organization :param is_staging: set to True if FullStory staging environment should be used (for debugging purposes) :param strict: If `True`, the session has to follow the funnel steps in exact order (with no diversions between the steps). The `False` option is currently not supported. :param numSessions: number of sessions to return (if 0, return all available) :return: list of session URLs """ if useResolvedUrls: columnToUse = RESOLVEDURL else: columnToUse = PAGEURL if useResolvedUrls: url_regex_resolver.resolve_urls(events, manage_resolutions.get_regex_dict(), PAGEURL, RESOLVEDURL) sids = build_and_get_sids_for_funnel(events, funnel, columnToUse, strict) if numSessions != 0: sids = sids[:numSessions] sessions = list(map(lambda p: get_session_link(p, OrgId, is_staging), sids)) return sessions
def get_in_outs(events: DataFrame, funnel: list, useResolvedUrls: bool, limit_rows: int = 0) -> (dict, dict): """Get information about inflows and outflows for a funnel :param events: events DataFrame :param funnel: funnel of interest :param useResolvedUrls: indicates whether original or resolved URLs should be used :param limit_rows: number of rows of events DataFrame to use (use all rows if 0) :return: a pair of dictionaries, with inflow and outflow URL frequency counts """ if useResolvedUrls: columnToUse = analyze_traffic.RESOLVEDURL else: columnToUse = analyze_traffic.PAGEURL if limit_rows != 0: events = events.head(limit_rows) if useResolvedUrls: url_regex_resolver.resolve_urls(events, manage_resolutions.get_regex_dict(), analyze_traffic.PAGEURL, analyze_traffic.RESOLVEDURL) si = analyze_traffic.build_session_index(events, columnToUse) ingressCounts, egressCounts = analyze_traffic.get_funnel_in_outs(events, si, funnel, columnToUse, analyze_traffic.REFERAL) return ingressCounts, egressCounts
def get_top_funnels_df(funurl: str, funlen: int, useResolvedUrls: bool, events: DataFrame, limit_rows: int = 0) -> dict: """Get top funnels of specified length which contain the specified URL :param funurl: URL that should be contained in the funnel :param funlen: funnel length :param useResolvedUrls: indicates whether original or resolved URLs should be used :param events: events DataFrame :param limit_rows: number of rows of events DataFrame to use (use all rows if 0) :return: dictionary of funnels and their frequencies """ if useResolvedUrls: columnToUse = analyze_traffic.RESOLVEDURL else: columnToUse = analyze_traffic.PAGEURL if limit_rows != 0: events = events.head(limit_rows) if useResolvedUrls: url_regex_resolver.resolve_urls(events, manage_resolutions.get_regex_dict(), analyze_traffic.PAGEURL, analyze_traffic.RESOLVEDURL) si = analyze_traffic.build_session_index(events, columnToUse) funnelCounts = get_funnel_lists(events, si, funurl, funlen, columnToUse) return funnelCounts
def get_popular(events: pd.DataFrame, useResolvedUrls: bool, limit_rows: int = 0) -> dict: """Returns a dictionary of visited URLs and visit counts for each URL :param events: events DataFrame :param useResolvedUrls: boolean indicating whether original or resolved URLs should be used :param limit_rows: number of rows from the original DataFrame to use (if 0, then use entire DataFrame) :return: """ if useResolvedUrls: columnToUse = analyze_traffic.RESOLVEDURL else: columnToUse = analyze_traffic.PAGEURL if limit_rows != 0: events = events.head(limit_rows) if useResolvedUrls: url_regex_resolver.resolve_urls(events, manage_resolutions.get_regex_dict(), analyze_traffic.PAGEURL, analyze_traffic.RESOLVEDURL) si = analyze_traffic.build_session_index(events, columnToUse) urlCounts = analyze_traffic.get_counts_for_url(si) return urlCounts