Ejemplo n.º 1
0
    def _group_set(self, method, interaction):
        filtered_records = bc.helper.group.filter_user(self.user,
                                                       interaction=interaction)
        chunks = group_records(filtered_records, groupby=method)

        new_records = set(r for c in chunks for r in c)
        return new_records
Ejemplo n.º 2
0
    def _group_set(self, method, interaction):
        filtered_records = bc.helper.group.filter_user(
            self.user, interaction=interaction)
        chunks = group_records(filtered_records, groupby=method)

        new_records = set(r for c in chunks for r in c)
        return new_records
Ejemplo n.º 3
0
def all(user,
        groupby='week',
        summary='default',
        attributes=True,
        flatten=False):
    """
    Returns a dictionary containing all bandicoot indicators for the user,
    as well as reporting variables.
    """

    scalar_type = 'distribution_scalar' if groupby == 'week' else 'scalar'
    summary_type = 'distribution_summarystats' if groupby == 'week' else 'summarystats'

    number_of_interactions_in = partial(bc.individual.number_of_interactions,
                                        direction='in')
    number_of_interactions_in.__name__ = 'number_of_interaction_in'
    number_of_interactions_out = partial(bc.individual.number_of_interactions,
                                         direction='out')
    number_of_interactions_out.__name__ = 'number_of_interaction_out'

    functions = [
        (bc.individual.active_days, scalar_type),
        (bc.individual.number_of_contacts, scalar_type),
        (bc.individual.call_duration, summary_type),
        (bc.individual.percent_nocturnal, scalar_type),
        (bc.individual.percent_initiated_conversation, scalar_type),
        (bc.individual.percent_initiated_interactions, scalar_type),
        (bc.individual.response_delay_text, summary_type),
        (bc.individual.response_rate_text, scalar_type),
        (bc.individual.entropy_of_contacts, scalar_type),
        (bc.individual.balance_contacts, summary_type),
        (bc.individual.interactions_per_contact, summary_type),
        (bc.individual.interevents_time, summary_type),
        (bc.individual.number_of_contacts_xpercent_interactions, scalar_type),
        (bc.individual.number_of_contacts_xpercent_durations, scalar_type),
        (bc.individual.number_of_interactions, scalar_type),
        (number_of_interactions_in, scalar_type),
        (number_of_interactions_out, scalar_type),
        (bc.spatial.number_of_places, scalar_type),
        (bc.spatial.entropy_places, scalar_type),
        (bc.spatial.percent_at_home, scalar_type),
        (bc.spatial.radius_of_gyration, scalar_type),
        (bc.spatial.frequent_locations, scalar_type)
    ]

    groups = [[r for r in g] for g in group_records(user, groupby=groupby)]

    reporting = OrderedDict([
        ('antennas_path', user.antennas_path),
        ('attributes_path', user.attributes_path),
        ('version', bc.__version__),
        ('groupby', groupby),
        ('start_time', user.start_time and str(user.start_time)),
        ('end_time', user.end_time and str(user.end_time)),
        ('bins', len(groups)),
        ('has_call', user.has_call),
        ('has_text', user.has_text),
        ('has_home', user.has_home),
        ('percent_records_missing_location',
         bc.helper.tools.percent_records_missing_location(user)),
        ('antennas_missing_locations',
         bc.helper.tools.antennas_missing_locations(user)),
        ('percent_outofnetwork_calls', user.percent_outofnetwork_calls),
        ('percent_outofnetwork_texts', user.percent_outofnetwork_texts),
        ('percent_outofnetwork_contacts', user.percent_outofnetwork_contacts),
        ('percent_outofnetwork_call_durations',
         user.percent_outofnetwork_call_durations),
    ])

    if user.records is not None:
        reporting['number_of_records'] = sum(map(len, groups))
    else:
        reporting['number_of_records'] = 0.

    if user.ignored_records is not None:
        reporting['ignored_records'] = user.ignored_records

    returned = OrderedDict([('name', user.name), ('reporting', reporting)])

    for fun, datatype in functions:
        try:
            metric = fun(user,
                         groupby=groupby,
                         summary=summary,
                         datatype=datatype)
        except ValueError:
            metric = fun(user, groupby=groupby, datatype=datatype)
        returned[fun.__name__] = metric

    if attributes and user.attributes != {}:
        returned['attributes'] = user.attributes

    if flatten is True:
        return globals()['flatten'](returned)

    return returned
Ejemplo n.º 4
0
def all(user, groupby='week', summary='default', network=False, split_week=False, split_day=False, attributes=True, flatten=False):
    """
    Returns a dictionary containing all bandicoot indicators for the user,
    as well as reporting variables.

    Relevant indicators are defined in the 'individual', and 'spatial' modules.

    =================================== =======================================================================
    Reporting variables                 Description
    =================================== =======================================================================
    antennas_path                       path of the CSV file containing antennas locations
    attributes_path                     directory where attributes were loaded
    version                             bandicoot version
    groupby                             grouping method ('week' or None)
    split_week                          whether or not indicators are also computed for weekday and weekend
    split_day                           whether or not indicators are also computed for day and night
    start_time                          time of the first record
    end_time                            time of the last record
    night_start, night_end              start and end time to define nights
    weekend                             days used to define the weekend (``[6, 7]`` by default, where 1 is Monday)
    bins                                number of weeks if the record are grouped
    has_call                            whether or not records include calls
    has_text                            whether or not records include texts
    has_home                            whether or not a :meth:`home location <bandicoot.core.User.recompute_home>` has been found
    has_network                         whether or not correspondents where loaded
    percent_records_missing_location    percentage of records without location
    antennas_missing_locations          number of antennas missing a location
    percent_outofnetwork_calls          percentage of calls, received or emitted, made with a correspondant not loaded in the network
    percent_outofnetwork_texts          percentage of texts with contacts not loaded in the network
    percent_outofnetwork_contacts       percentage of contacts not loaded in the network
    percent_outofnetwork_call_durations percentage of minutes of calls where the contact was not loaded in the network
    number_of_records                   total number of records
    =================================== =======================================================================

    We also include a last set of reporting variables, for the records ignored
    at load-time. Values can be ignored due to missing or inconsistent fields  
    (e.g., not including a valid 'datetime' value).  

    .. code-block:: python

        {
            'all': 0,
            'interaction': 0,
            'direction': 0,
            'correspondent_id': 0,
            'datetime': 0,
            'call_duration': 0
        }

    with the total number of records ignored (key ``'all'``), as well as the
    number of records with faulty values for each columns.
    """

    # Warn the user if they are selecting weekly and there's only one week
    if groupby is not None:
        if len(set(DATE_GROUPERS[groupby](r.datetime) for r in user.records)) <= 1:
            print warning_str('Grouping by week, but all data is from the same week!')
    scalar_type = 'distribution_scalar' if groupby == 'week' else 'scalar'
    summary_type = 'distribution_summarystats' if groupby == 'week' else 'summarystats'

    number_of_interactions_in = partial(bc.individual.number_of_interactions, direction='in')
    number_of_interactions_in.__name__ = 'number_of_interaction_in'
    number_of_interactions_out = partial(bc.individual.number_of_interactions, direction='out')
    number_of_interactions_out.__name__ = 'number_of_interaction_out'

    functions = [
        (bc.individual.active_days, scalar_type),
        (bc.individual.number_of_contacts, scalar_type),
        (bc.individual.call_duration, summary_type),
        (bc.individual.percent_nocturnal, scalar_type),
        (bc.individual.percent_initiated_conversations, scalar_type),
        (bc.individual.percent_initiated_interactions, scalar_type),
        (bc.individual.response_delay_text, summary_type),
        (bc.individual.response_rate_text, scalar_type),
        (bc.individual.entropy_of_contacts, scalar_type),
        (bc.individual.balance_of_contacts, summary_type),
        (bc.individual.interactions_per_contact, summary_type),
        (bc.individual.interevent_time, summary_type),
        (bc.individual.percent_pareto_interactions, scalar_type),
        (bc.individual.percent_pareto_durations, scalar_type),
        (bc.individual.number_of_interactions, scalar_type),
        (number_of_interactions_in, scalar_type),
        (number_of_interactions_out, scalar_type),
        (bc.spatial.number_of_antennas, scalar_type),
        (bc.spatial.entropy_of_antennas, scalar_type),
        (bc.spatial.percent_at_home, scalar_type),
        (bc.spatial.radius_of_gyration, scalar_type),
        (bc.spatial.frequent_antennas, scalar_type),
        (bc.spatial.churn_rate, scalar_type)
    ]

    network_functions = [
        bc.network.clustering_coefficient_unweighted,
        bc.network.clustering_coefficient_weighted,
        bc.network.assortativity_attributes,
        bc.network.assortativity_indicators
    ]

    groups = [[r for r in g] for g in group_records(user, groupby=groupby)]

    reporting = OrderedDict([
        ('antennas_path', user.antennas_path),
        ('attributes_path', user.attributes_path),
        ('version', bc.__version__),
        ('groupby', groupby),
        ('split_week', split_week),
        ('split_day', split_day),
        ('start_time', user.start_time and str(user.start_time)),
        ('end_time', user.end_time and str(user.end_time)),
        ('night_start', str(user.night_start)),
        ('night_end', str(user.night_end)),
        ('weekend', user.weekend),
        ('bins', len(groups)),
        ('has_call', user.has_call),
        ('has_text', user.has_text),
        ('has_home', user.has_home),
        ('has_network', user.has_network),
        ('percent_records_missing_location', bc.helper.tools.percent_records_missing_location(user)),
        ('antennas_missing_locations', bc.helper.tools.antennas_missing_locations(user)),
        ('percent_outofnetwork_calls', user.percent_outofnetwork_calls),
        ('percent_outofnetwork_texts', user.percent_outofnetwork_texts),
        ('percent_outofnetwork_contacts', user.percent_outofnetwork_contacts),
        ('percent_outofnetwork_call_durations', user.percent_outofnetwork_call_durations),
    ])

    if user.records is not None:
        reporting['number_of_records'] = len(user.records)
    else:
        reporting['number_of_records'] = 0.

    if user.ignored_records is not None:
        reporting['ignored_records'] = user.ignored_records

    returned = OrderedDict([
        ('name', user.name),
        ('reporting', reporting)
    ])

    for fun, datatype in functions:
        try:
            metric = fun(user, groupby=groupby, summary=summary, datatype=datatype, split_week=split_week, split_day=split_day)
        except ValueError:
            metric = fun(user, groupby=groupby, datatype=datatype, split_week=split_week, split_day=split_day)

        returned[fun.__name__] = metric

    if network and user.has_network:
        for fun in network_functions:
            returned[fun.__name__] = fun(user)

    if attributes and user.attributes != {}:
        returned['attributes'] = user.attributes

    if flatten is True:
        return globals()['flatten'](returned)

    return returned
Ejemplo n.º 5
0
 def _group_set(self, method, interaction):
     chunks = group_records(self.user,
                            groupby='method',
                            interaction=interaction)
     new_records = set(r for c in chunks for r in c)
     return new_records
Ejemplo n.º 6
0
 def _group_set(self, method, interaction):
     chunks = group_records(self.user, groupby=method,
                            interaction=interaction)
     new_records = set(r for c in chunks for r in c)
     return new_records
Ejemplo n.º 7
0
def create_punchcards(user, split_interval=60):
    """
    Computes raw indicators (e.g. number of outgoing calls) for intervals of ~1 hour
    across each week of user data. These "punchcards" are returned in a nested list
    with each sublist containing [user.name, channel, weekday, section, value].

    Parameters
    ----------
    user : object
        The user to create punchcards for.
    split_interval : int
        The interval in minutes for which each indicator is computed. Defaults to 60.
        Needs to be able to split a day (24*60 minutes) evenly.
    """

    if not float(24 * 60 / split_interval).is_integer():
        raise ValueError(
            "The minute interval set for the punchcard structure does not evenly divide the day!"
        )

    contacts_in = partial(bc.individual.number_of_contacts,
                          direction='in',
                          interaction='callandtext',
                          summary=None)
    contacts_out = partial(bc.individual.number_of_contacts,
                           direction='out',
                           interaction='callandtext',
                           summary=None)
    calls_in = partial(bc.individual.number_of_interactions,
                       direction='in',
                       interaction='call',
                       summary=None)
    calls_out = partial(bc.individual.number_of_interactions,
                        direction='out',
                        interaction='call',
                        summary=None)
    texts_in = partial(bc.individual.number_of_interactions,
                       direction='in',
                       interaction='text',
                       summary=None)
    texts_out = partial(bc.individual.number_of_interactions,
                        direction='out',
                        interaction='text',
                        summary=None)
    time_spent_in = partial(bc.individual.call_duration,
                            direction='in',
                            interaction='call',
                            summary=None)
    time_spent_out = partial(bc.individual.call_duration,
                             direction='out',
                             interaction='call',
                             summary=None)

    core_func = [(contacts_in, "scalar"), (contacts_out, "scalar"),
                 (calls_in, "scalar"), (calls_out, "scalar"),
                 (texts_in, "scalar"), (texts_out, "scalar")]

    time_func = [(time_spent_in, "summarystats"),
                 (time_spent_out, "summarystats")]

    pc = []
    sections = [(i + 1) * split_interval
                for i in range(7 * 24 * 60 / split_interval)]
    temp_user = _extract_user_info(user)

    for grouped_records in group_records(user, groupby='week'):
        week_records = list(grouped_records)
        time_spent_rec = _transform_to_time_spent(week_records, split_interval,
                                                  sections)
        pc.extend(
            _calculate_channels(week_records, sections, split_interval,
                                core_func, temp_user))
        pc.extend(
            _calculate_channels(time_spent_rec, sections, split_interval,
                                time_func, temp_user, len(core_func)))

    return pc
Ejemplo n.º 8
0
def create_punchcards(user, split_interval=60):
    """
    Computes raw indicators (e.g. number of outgoing calls) for intervals of ~1 hour
    across each week of user data. These "punchcards" are returned in a nested list
    with each sublist containing [user.name, channel, weekday, section, value].

    Parameters
    ----------
    user : object
        The user to create punchcards for.
    split_interval : int
        The interval in minutes for which each indicator is computed. Defaults to 60.
        Needs to be able to split a day (24*60 minutes) evenly.
    """

    if not float(24 * 60 / split_interval).is_integer():
        raise ValueError(
            "The minute interval set for the punchcard structure does not evenly divide the day!")

    contacts_in = partial(bc.individual.number_of_contacts,
                          direction='in', interaction='callandtext', summary=None)
    contacts_out = partial(bc.individual.number_of_contacts,
                           direction='out', interaction='callandtext', summary=None)
    calls_in = partial(bc.individual.number_of_interactions,
                       direction='in', interaction='call', summary=None)
    calls_out = partial(bc.individual.number_of_interactions,
                        direction='out', interaction='call', summary=None)
    texts_in = partial(bc.individual.number_of_interactions,
                       direction='in', interaction='text', summary=None)
    texts_out = partial(bc.individual.number_of_interactions,
                        direction='out', interaction='text', summary=None)
    time_spent_in = partial(bc.individual.call_duration,
                            direction='in', interaction='call', summary=None)
    time_spent_out = partial(bc.individual.call_duration,
                             direction='out', interaction='call', summary=None)

    core_func = [
        (contacts_in, "scalar"),
        (contacts_out, "scalar"),
        (calls_in, "scalar"),
        (calls_out, "scalar"),
        (texts_in, "scalar"),
        (texts_out, "scalar")
    ]

    time_func = [
        (time_spent_in, "summarystats"),
        (time_spent_out, "summarystats")
    ]

    pc = []
    sections = [
        (i + 1) * split_interval for i in range(7 * 24 * 60 / split_interval)]
    temp_user = _extract_user_info(user)

    for grouped_records in group_records(user, groupby='week'):
        week_records = list(grouped_records)
        time_spent_rec = _transform_to_time_spent(
            week_records, split_interval, sections)
        pc.extend(_calculate_channels(
            week_records, sections, split_interval, core_func, temp_user))
        pc.extend(_calculate_channels(
            time_spent_rec, sections, split_interval, time_func, temp_user, len(core_func)))

    return pc
Ejemplo n.º 9
0
def all(user, groupby='week', summary='default', split_week=False, split_day=False, attributes=True, flatten=False):
    """
    Returns a dictionary containing all bandicoot indicators for the user,
    as well as reporting variables.

    The reporting variables include:

    * the path of files containting the antennas and attributes,
    * the current version of bandicoot,
    * the *groupby* method (``'week'`` or ``None``) and the day/night, weekday/weekend filters,
    * the date and time for the first and last records,
    * the range of hours used to detect interactions at night, and the weekend range,
    * the number of bins if the records are grouped weekly,
    * the binary properties ``has_call``, ``has_text``, ``has_home``,
    * the percentage of records missing antennas, and antennas missing (lat, lon) locations,
    * the percentage of contacts not in the network, as well as interactions (for calls, texts, and call durations),
    * the total number of records for the user

    We also include a last set of reporting variables, for the records ignored
    at the loading, due to faulty or incorrect values:

    .. code-block:: python

        {
            'all': 0,
            'interaction': 0,
            'direction': 0,
            'correspondent_id': 0,
            'datetime': 0,
            'call_duration': 0
        }

    with the total number of records ignored (key ``'all'``), as well as the
    number of records with faulty values for each columns.
    """

    # Warn the user if they are selecting weekly and there's only one week
    if groupby == 'week':
        if len(set(r.datetime.isocalendar()[:2] for r in user.records)) <= 1:
            print warning_str('Grouping by week, but all data is from the same week!')

    scalar_type = 'distribution_scalar' if groupby == 'week' else 'scalar'
    summary_type = 'distribution_summarystats' if groupby == 'week' else 'summarystats'

    number_of_interactions_in = partial(bc.individual.number_of_interactions, direction='in')
    number_of_interactions_in.__name__ = 'number_of_interaction_in'
    number_of_interactions_out = partial(bc.individual.number_of_interactions, direction='out')
    number_of_interactions_out.__name__ = 'number_of_interaction_out'

    functions = [
        (bc.individual.active_days, scalar_type),
        (bc.individual.number_of_contacts, scalar_type),
        (bc.individual.call_duration, summary_type),
        (bc.individual.percent_nocturnal, scalar_type),
        (bc.individual.percent_initiated_conversations, scalar_type),
        (bc.individual.percent_initiated_interactions, scalar_type),
        (bc.individual.response_delay_text, summary_type),
        (bc.individual.response_rate_text, scalar_type),
        (bc.individual.entropy_of_contacts, scalar_type),
        (bc.individual.balance_of_contacts, summary_type),
        (bc.individual.interactions_per_contact, summary_type),
        (bc.individual.interevent_time, summary_type),
        (bc.individual.percent_pareto_interactions, scalar_type),
        (bc.individual.percent_pareto_durations, scalar_type),
        (bc.individual.number_of_interactions, scalar_type),
        (number_of_interactions_in, scalar_type),
        (number_of_interactions_out, scalar_type),
        (bc.spatial.number_of_antennas, scalar_type),
        (bc.spatial.entropy_of_antennas, scalar_type),
        (bc.spatial.percent_at_home, scalar_type),
        (bc.spatial.radius_of_gyration, scalar_type),
        (bc.spatial.frequent_antennas, scalar_type)
    ]

    groups = [[r for r in g] for g in group_records(user, groupby=groupby)]

    reporting = OrderedDict([
        ('antennas_path', user.antennas_path),
        ('attributes_path', user.attributes_path),
        ('version', bc.__version__),
        ('groupby', groupby),
        ('split_week', split_week),
        ('split_day', split_day),
        ('start_time', user.start_time and str(user.start_time)),
        ('end_time', user.end_time and str(user.end_time)),
        ('night_start', str(user.night_start)),
        ('night_end', str(user.night_end)),
        ('weekend', user.weekend),
        ('bins', len(groups)),
        ('has_call', user.has_call),
        ('has_text', user.has_text),
        ('has_home', user.has_home),
        ('percent_records_missing_location', bc.helper.tools.percent_records_missing_location(user)),
        ('antennas_missing_locations', bc.helper.tools.antennas_missing_locations(user)),
        ('percent_outofnetwork_calls', user.percent_outofnetwork_calls),
        ('percent_outofnetwork_texts', user.percent_outofnetwork_texts),
        ('percent_outofnetwork_contacts', user.percent_outofnetwork_contacts),
        ('percent_outofnetwork_call_durations', user.percent_outofnetwork_call_durations),
    ])

    if user.records is not None:
        reporting['number_of_records'] = len(user.records)
    else:
        reporting['number_of_records'] = 0.

    if user.ignored_records is not None:
        reporting['ignored_records'] = user.ignored_records

    returned = OrderedDict([
        ('name', user.name),
        ('reporting', reporting)
    ])

    for fun, datatype in functions:
        try:
            metric = fun(user, groupby=groupby, summary=summary, datatype=datatype, split_week=split_week, split_day=split_day)
        except ValueError:
            metric = fun(user, groupby=groupby, datatype=datatype, split_week=split_week, split_day=split_day)

        returned[fun.__name__] = metric

    if attributes and user.attributes != {}:
        returned['attributes'] = user.attributes

    if flatten is True:
        return globals()['flatten'](returned)

    return returned
Ejemplo n.º 10
0
def all(user, groupby='week', summary='default', attributes=True, flatten=False):
    """
    Returns a dictionary containing all bandicoot indicators for the user,
    as well as reporting variables.
    """

    scalar_type = 'distribution_scalar' if groupby == 'week' else 'scalar'
    summary_type = 'distribution_summarystats' if groupby == 'week' else 'summarystats'

    number_of_interactions_in = partial(bc.individual.number_of_interactions, direction='in')
    number_of_interactions_in.__name__ = 'number_of_interaction_in'
    number_of_interactions_out = partial(bc.individual.number_of_interactions, direction='out')
    number_of_interactions_out.__name__ = 'number_of_interaction_out'

    functions = [
        (bc.individual.active_days, scalar_type),
        (bc.individual.number_of_contacts, scalar_type),
        (bc.individual.call_duration, summary_type),
        (bc.individual.percent_nocturnal, scalar_type),
        (bc.individual.percent_initiated_conversation, scalar_type),
        (bc.individual.percent_initiated_interactions, scalar_type),
        (bc.individual.response_delay_text, summary_type),
        (bc.individual.response_rate_text, scalar_type),
        (bc.individual.entropy_of_contacts, scalar_type),
        (bc.individual.balance_contacts, summary_type),
        (bc.individual.interactions_per_contact, summary_type),
        (bc.individual.interevents_time, summary_type),
        (bc.individual.number_of_contacts_xpercent_interactions, scalar_type),
        (bc.individual.number_of_contacts_xpercent_durations, scalar_type),
        (bc.individual.number_of_interactions, scalar_type),
        (number_of_interactions_in, scalar_type),
        (number_of_interactions_out, scalar_type),
        (bc.spatial.number_of_places, scalar_type),
        (bc.spatial.entropy_places, scalar_type),
        (bc.spatial.percent_at_home, scalar_type),
        (bc.spatial.radius_of_gyration, scalar_type),
        (bc.spatial.frequent_locations, scalar_type)
    ]

    groups = [[r for r in g] for g in group_records(user, groupby=groupby)]

    reporting = OrderedDict([
        ('antennas_path', user.antennas_path),
        ('attributes_path', user.attributes_path),
        ('version', bc.__version__),
        ('groupby', groupby),
        ('start_time', user.start_time and str(user.start_time)),
        ('end_time', user.end_time and str(user.end_time)),
        ('bins', len(groups)),
        ('has_call', user.has_call),
        ('has_text', user.has_text),
        ('has_home', user.has_home),
        ('percent_records_missing_location', bc.helper.tools.percent_records_missing_location(user)),
        ('antennas_missing_locations', bc.helper.tools.antennas_missing_locations(user)),
        ('percent_outofnetwork_calls', user.percent_outofnetwork_calls),
        ('percent_outofnetwork_texts', user.percent_outofnetwork_texts),
        ('percent_outofnetwork_contacts', user.percent_outofnetwork_contacts),
        ('percent_outofnetwork_call_durations', user.percent_outofnetwork_call_durations),
    ])

    if user.records is not None:
        reporting['number_of_records'] = sum(map(len, groups))
    else:
        reporting['number_of_records'] = 0.

    if user.ignored_records is not None:
        reporting['ignored_records'] = user.ignored_records

    returned = OrderedDict([
        ('name', user.name),
        ('reporting', reporting)
    ])

    for fun, datatype in functions:
        try:
            metric = fun(user, groupby=groupby, summary=summary, datatype=datatype)
        except ValueError:
            metric = fun(user, groupby=groupby, datatype=datatype)
        returned[fun.__name__] = metric

    if attributes and user.attributes != {}:
        returned['attributes'] = user.attributes

    if flatten is True:
        return globals()['flatten'](returned)

    return returned