コード例 #1
0
def get_timing_for_funnel(eventsfull: DataFrame, funnel: list, useResolvedUrls: bool) -> list:
    """Get a list of funnel step times (amounts of time users spend before navigating to next step) for a funnel

    :param eventsfull: full events DataFrame (that includes non-navigate events)
    :param funnel: funnel of interest
    :param useResolvedUrls: indicates whether original or resolved URLs should be used
    :return: list of funnel step times for each step
    """
    funneltimes = []
    for i in range(len(funnel)):
        funneltimes.append([])
    events = analyze_clicks.remove_non_navigation(eventsfull)
    if useResolvedUrls:
        columnToUse = analyze_traffic.RESOLVEDURL
    else:
        columnToUse = analyze_traffic.PAGEURL
    if useResolvedUrls:
        url_regex_resolver.resolve_urls(events, manage_resolutions.get_regex_dict(), analyze_traffic.PAGEURL, analyze_traffic.RESOLVEDURL)
    si = analyze_traffic.build_session_index(events, columnToUse)
    sessFound = analyze_traffic.get_unordered_sessions_for_funnel(si, funnel)
    sessOrdered = analyze_traffic.get_sessions_with_ordered(events, sessFound, funnel, columnToUse, strict=True)
    for sid in sessOrdered:
        sess_df = events.loc[sid]
        indices = analyze_traffic.get_sublist_indices(funnel, sess_df[columnToUse].tolist(), True)
        for index in indices:
            timestamps = []
            timespent = []
            for i in range(len(funnel)):
                timestamps.append(sess_df.iloc[index + i].loc[EVENTSTART])
                if i > 0:
                    delta = timestamps[i] - timestamps[i - 1]
                    funneltimes[i-1].append(delta.total_seconds())
    return funneltimes
コード例 #2
0
def get_funnel_stats(events: DataFrame,
                     funnel: list,
                     useResolvedUrls: bool,
                     limit_rows: int = 0) -> list:
    """Get conversion statistics for a funnel

    :param events: events DataFrame
    :param funnel: funnel of interest
    :param useResolvedUrls: indicates whether original or resolved URLs should be used
    :param limit_rows: number of rows of events DataFrame to use (use all rows if 0)
    :return: sorted list of funnel conversions by step
    """
    if useResolvedUrls:
        columnToUse = analyze_traffic.RESOLVEDURL
    else:
        columnToUse = analyze_traffic.PAGEURL
    if limit_rows != 0:
        events = events.head(limit_rows)
    if useResolvedUrls:
        url_regex_resolver.resolve_urls(events,
                                        manage_resolutions.get_regex_dict(),
                                        analyze_traffic.PAGEURL,
                                        analyze_traffic.RESOLVEDURL)
    si = analyze_traffic.build_session_index(events, columnToUse)
    funnelCounts = analyze_traffic.get_funnel_conversion_stats(
        events, si, funnel, columnToUse)
    funnelCounts = list(funnelCounts)
    return funnelCounts
コード例 #3
0
def get_sessions_for_funnel(
    events: pd.DataFrame,
    funnel: list,
    useResolvedUrls: bool,
    OrgId: str = None,
    is_staging: bool = False,
    strict: bool = True,
    numSessions: int = 0,
) -> list:
    """Get a list of sessions where each session contains the specified funnel

    :param events: events DataFrame
    :param funnel: funnel of interest
    :param useResolvedUrls: indicates whether original or resolved URLs should be used
    :param OrgId: FullStory OrgId for the organization
    :param is_staging: set to True if FullStory staging environment should be used (for debugging purposes)
    :param strict: If `True`, the session has to follow the funnel steps in exact order (with no diversions between the steps). The `False` option is currently not supported.
    :param numSessions: number of sessions to return (if 0, return all available)
    :return: list of session URLs
    """
    if useResolvedUrls:
        columnToUse = RESOLVEDURL
    else:
        columnToUse = PAGEURL
    if useResolvedUrls:
        url_regex_resolver.resolve_urls(events,
                                        manage_resolutions.get_regex_dict(),
                                        PAGEURL, RESOLVEDURL)
    sids = build_and_get_sids_for_funnel(events, funnel, columnToUse, strict)
    if numSessions != 0:
        sids = sids[:numSessions]
    sessions = list(map(lambda p: get_session_link(p, OrgId, is_staging),
                        sids))
    return sessions
コード例 #4
0
def get_in_outs(events: DataFrame, funnel: list, useResolvedUrls: bool, limit_rows: int = 0) -> (dict, dict):
    """Get information about inflows and outflows for a funnel

    :param events: events DataFrame
    :param funnel: funnel of interest
    :param useResolvedUrls: indicates whether original or resolved URLs should be used
    :param limit_rows: number of rows of events DataFrame to use (use all rows if 0)
    :return: a pair of dictionaries, with inflow and outflow URL frequency counts
    """
    if useResolvedUrls:
        columnToUse = analyze_traffic.RESOLVEDURL
    else:
        columnToUse = analyze_traffic.PAGEURL
    if limit_rows != 0:
        events = events.head(limit_rows)
    if useResolvedUrls:
        url_regex_resolver.resolve_urls(events, manage_resolutions.get_regex_dict(), analyze_traffic.PAGEURL, analyze_traffic.RESOLVEDURL)
    si = analyze_traffic.build_session_index(events, columnToUse)
    ingressCounts, egressCounts = analyze_traffic.get_funnel_in_outs(events, si, funnel, columnToUse, analyze_traffic.REFERAL)
    return ingressCounts, egressCounts
コード例 #5
0
def get_top_funnels_df(funurl: str, funlen: int, useResolvedUrls: bool, events: DataFrame, limit_rows: int = 0) -> dict:
    """Get top funnels of specified length which contain the specified URL

    :param funurl: URL that should be contained in the funnel
    :param funlen: funnel length
    :param useResolvedUrls: indicates whether original or resolved URLs should be used
    :param events: events DataFrame
    :param limit_rows: number of rows of events DataFrame to use (use all rows if 0)
    :return: dictionary of funnels and their frequencies
    """
    if useResolvedUrls:
        columnToUse = analyze_traffic.RESOLVEDURL
    else:
        columnToUse = analyze_traffic.PAGEURL
    if limit_rows != 0:
        events = events.head(limit_rows)
    if useResolvedUrls:
        url_regex_resolver.resolve_urls(events, manage_resolutions.get_regex_dict(), analyze_traffic.PAGEURL, analyze_traffic.RESOLVEDURL)
    si = analyze_traffic.build_session_index(events, columnToUse)
    funnelCounts = get_funnel_lists(events, si, funurl, funlen, columnToUse)
    return funnelCounts
コード例 #6
0
def get_popular(events: pd.DataFrame,
                useResolvedUrls: bool,
                limit_rows: int = 0) -> dict:
    """Returns a dictionary of visited URLs and visit counts for each URL

    :param events: events DataFrame
    :param useResolvedUrls: boolean indicating whether original or resolved URLs should be used
    :param limit_rows: number of rows from the original DataFrame to use (if 0, then use entire DataFrame)
    :return:
    """
    if useResolvedUrls:
        columnToUse = analyze_traffic.RESOLVEDURL
    else:
        columnToUse = analyze_traffic.PAGEURL
    if limit_rows != 0:
        events = events.head(limit_rows)
    if useResolvedUrls:
        url_regex_resolver.resolve_urls(events,
                                        manage_resolutions.get_regex_dict(),
                                        analyze_traffic.PAGEURL,
                                        analyze_traffic.RESOLVEDURL)
    si = analyze_traffic.build_session_index(events, columnToUse)
    urlCounts = analyze_traffic.get_counts_for_url(si)
    return urlCounts