Esempio n. 1
0
    def plot_cluster_track(self, bbox):
        """
        Plots graph for users in selected area

        :param bbox: coordinates of top-left and bottom-right angles of area
        :type bbox: List[List[float]]
        :return: None
        """
        data = self._get_data_from_plot(bbox)
        data_agg = get_all_agg(data, ['trans_count'])
        plot.plot_graph(data_agg, 'trans_count', {'export_folder': self.export_folder})
Esempio n. 2
0
def export_tracks(df, settings, users='all', task='lost', order='all', treshold=0.5,
                  start_event=None, end_event=None):
    settings = check_folder(settings)
    export_folder = settings['export_folder']
    if task == 'lost' and start_event is None:
        settings['start_event'] = 'welcome_see_screen'
    # else:
        # settings['start_event'] = 'start'
        # df = df.sort_values(['user_pseudo_id', 'event_timestamp'])
        # first = df.groupby('user_pseudo_id', as_index=False).first()
        # first.event_timestamp -= 1
        # first.event_name = 'start'
        # df = df.append(first, ignore_index=True, sort=False)

    agg_list = ['trans_count', 'dt_mean', 'dt_median', 'dt_min', 'dt_max']

    if type(users) != str:
        df = df[df.user_pseudo_id.isin(users)]
        settings['users']['userlist'] = list(users)
    else:
        if settings.get('users') is None:
            settings['users'] = {}
        settings['users']['userlist'] = 'all'

    df = get_session(df, order=order, treshold=treshold)
    if settings.get('events') is None:
        settings['events'] = {}
    settings['events']['session_order'] = order
    settings['total_count'] = df.user_pseudo_id.nunique()
    df = get_all_agg(df, agg_list)

    settings['events']['session_thr_time'] = treshold * 1e8 * 36

    for i in os.listdir(settings['export_folder']):
        if 'settings' in i:
            set_name = i

    with open(os.path.join(settings['export_folder'], set_name), 'w') as f:
        json.dump(settings, f)
    graph_name = 'graph_{}.csv'.format(datetime.now())
    df.to_csv(os.path.join(export_folder, graph_name), index=False)
    return export_folder, graph_name, set_name
Esempio n. 3
0
def export_tracks(df, settings, users='all', task='lost', order='all', treshold=0.5,
                  start_event=None, end_event=None):
    """
    Visualize trajectories from event clickstream (with Mathematica)

    :param df: event clickstream
    :param settings: experiment config (can be empty dict here)
    :param users: `all` or list of user ids to plot specific group
    :param task: type of task for different visualization (can be `lost` or `prunned_welcome`)
    :param order: depth in sessions for filtering
    :param threshold: threshold for session splitting
    :param start_event: name of start event in trajectory
    :param end_event: name of last event in trajectory

    :param df: pd.DataFrame
    :param settings: dict
    :param users: str or list
    :param task: str
    :param order: int
    :param threshold: float
    :param start_event: str
    :param end_event: str

    :return: None
    """
    settings = _check_folder(settings)
    export_folder = settings['export_folder']
    if task == 'lost' and start_event is None:
        settings['start_event'] = 'welcome_see_screen'
    # else:
    # settings['start_event'] = 'start'
    # df = df.sort_values(['user_pseudo_id', 'event_timestamp'])
    # first = df.groupby('user_pseudo_id', as_index=False).first()
    # first.event_timestamp -= 1
    # first.event_name = 'start'
    # df = df.append(first, ignore_index=True, sort=False)

    agg_list = ['trans_count', 'dt_mean', 'dt_median', 'dt_min', 'dt_max']

    if type(users) != str:
        df = df[df.user_pseudo_id.isin(users)]
        settings['users']['userlist'] = list(users)
    else:
        if settings.get('users') is None:
            settings['users'] = {}
        settings['users']['userlist'] = 'all'

    df = _get_session(df, order=order, treshold=treshold)
    if settings.get('events') is None:
        settings['events'] = {}
    settings['events']['session_order'] = order
    settings['total_count'] = df.user_pseudo_id.nunique()
    df = get_all_agg(df, agg_list)

    settings['events']['session_thr_time'] = treshold * 1e8 * 36

    for i in os.listdir(settings['export_folder']):
        if 'settings' in i:
            set_name = i

    with open(os.path.join(settings['export_folder'], set_name), 'w') as f:
        json.dump(settings, f)
    graph_name = 'graph_{}.csv'.format(datetime.now())
    df.to_csv(os.path.join(export_folder, graph_name), index=False)
    return export_folder, graph_name, set_name
 def plot_cluster_track(self, bbox):
     data = self._get_data_from_plot(bbox)
     data_agg = get_all_agg(data, ['trans_count'])
     plot_graph_python(data_agg, 'trans_count',
                       {'export_folder': self.export_folder})