def _group_set(self, method, interaction): filtered_records = bc.helper.group.filter_user(self.user, interaction=interaction) chunks = group_records(filtered_records, groupby=method) new_records = set(r for c in chunks for r in c) return new_records
def _group_set(self, method, interaction): filtered_records = bc.helper.group.filter_user( self.user, interaction=interaction) chunks = group_records(filtered_records, groupby=method) new_records = set(r for c in chunks for r in c) return new_records
def all(user, groupby='week', summary='default', attributes=True, flatten=False): """ Returns a dictionary containing all bandicoot indicators for the user, as well as reporting variables. """ scalar_type = 'distribution_scalar' if groupby == 'week' else 'scalar' summary_type = 'distribution_summarystats' if groupby == 'week' else 'summarystats' number_of_interactions_in = partial(bc.individual.number_of_interactions, direction='in') number_of_interactions_in.__name__ = 'number_of_interaction_in' number_of_interactions_out = partial(bc.individual.number_of_interactions, direction='out') number_of_interactions_out.__name__ = 'number_of_interaction_out' functions = [ (bc.individual.active_days, scalar_type), (bc.individual.number_of_contacts, scalar_type), (bc.individual.call_duration, summary_type), (bc.individual.percent_nocturnal, scalar_type), (bc.individual.percent_initiated_conversation, scalar_type), (bc.individual.percent_initiated_interactions, scalar_type), (bc.individual.response_delay_text, summary_type), (bc.individual.response_rate_text, scalar_type), (bc.individual.entropy_of_contacts, scalar_type), (bc.individual.balance_contacts, summary_type), (bc.individual.interactions_per_contact, summary_type), (bc.individual.interevents_time, summary_type), (bc.individual.number_of_contacts_xpercent_interactions, scalar_type), (bc.individual.number_of_contacts_xpercent_durations, scalar_type), (bc.individual.number_of_interactions, scalar_type), (number_of_interactions_in, scalar_type), (number_of_interactions_out, scalar_type), (bc.spatial.number_of_places, scalar_type), (bc.spatial.entropy_places, scalar_type), (bc.spatial.percent_at_home, scalar_type), (bc.spatial.radius_of_gyration, scalar_type), (bc.spatial.frequent_locations, scalar_type) ] groups = [[r for r in g] for g in group_records(user, groupby=groupby)] reporting = OrderedDict([ ('antennas_path', user.antennas_path), ('attributes_path', user.attributes_path), ('version', bc.__version__), ('groupby', groupby), ('start_time', user.start_time and str(user.start_time)), ('end_time', user.end_time and str(user.end_time)), ('bins', len(groups)), ('has_call', user.has_call), ('has_text', user.has_text), ('has_home', user.has_home), ('percent_records_missing_location', bc.helper.tools.percent_records_missing_location(user)), ('antennas_missing_locations', bc.helper.tools.antennas_missing_locations(user)), ('percent_outofnetwork_calls', user.percent_outofnetwork_calls), ('percent_outofnetwork_texts', user.percent_outofnetwork_texts), ('percent_outofnetwork_contacts', user.percent_outofnetwork_contacts), ('percent_outofnetwork_call_durations', user.percent_outofnetwork_call_durations), ]) if user.records is not None: reporting['number_of_records'] = sum(map(len, groups)) else: reporting['number_of_records'] = 0. if user.ignored_records is not None: reporting['ignored_records'] = user.ignored_records returned = OrderedDict([('name', user.name), ('reporting', reporting)]) for fun, datatype in functions: try: metric = fun(user, groupby=groupby, summary=summary, datatype=datatype) except ValueError: metric = fun(user, groupby=groupby, datatype=datatype) returned[fun.__name__] = metric if attributes and user.attributes != {}: returned['attributes'] = user.attributes if flatten is True: return globals()['flatten'](returned) return returned
def all(user, groupby='week', summary='default', network=False, split_week=False, split_day=False, attributes=True, flatten=False): """ Returns a dictionary containing all bandicoot indicators for the user, as well as reporting variables. Relevant indicators are defined in the 'individual', and 'spatial' modules. =================================== ======================================================================= Reporting variables Description =================================== ======================================================================= antennas_path path of the CSV file containing antennas locations attributes_path directory where attributes were loaded version bandicoot version groupby grouping method ('week' or None) split_week whether or not indicators are also computed for weekday and weekend split_day whether or not indicators are also computed for day and night start_time time of the first record end_time time of the last record night_start, night_end start and end time to define nights weekend days used to define the weekend (``[6, 7]`` by default, where 1 is Monday) bins number of weeks if the record are grouped has_call whether or not records include calls has_text whether or not records include texts has_home whether or not a :meth:`home location <bandicoot.core.User.recompute_home>` has been found has_network whether or not correspondents where loaded percent_records_missing_location percentage of records without location antennas_missing_locations number of antennas missing a location percent_outofnetwork_calls percentage of calls, received or emitted, made with a correspondant not loaded in the network percent_outofnetwork_texts percentage of texts with contacts not loaded in the network percent_outofnetwork_contacts percentage of contacts not loaded in the network percent_outofnetwork_call_durations percentage of minutes of calls where the contact was not loaded in the network number_of_records total number of records =================================== ======================================================================= We also include a last set of reporting variables, for the records ignored at load-time. Values can be ignored due to missing or inconsistent fields (e.g., not including a valid 'datetime' value). .. code-block:: python { 'all': 0, 'interaction': 0, 'direction': 0, 'correspondent_id': 0, 'datetime': 0, 'call_duration': 0 } with the total number of records ignored (key ``'all'``), as well as the number of records with faulty values for each columns. """ # Warn the user if they are selecting weekly and there's only one week if groupby is not None: if len(set(DATE_GROUPERS[groupby](r.datetime) for r in user.records)) <= 1: print warning_str('Grouping by week, but all data is from the same week!') scalar_type = 'distribution_scalar' if groupby == 'week' else 'scalar' summary_type = 'distribution_summarystats' if groupby == 'week' else 'summarystats' number_of_interactions_in = partial(bc.individual.number_of_interactions, direction='in') number_of_interactions_in.__name__ = 'number_of_interaction_in' number_of_interactions_out = partial(bc.individual.number_of_interactions, direction='out') number_of_interactions_out.__name__ = 'number_of_interaction_out' functions = [ (bc.individual.active_days, scalar_type), (bc.individual.number_of_contacts, scalar_type), (bc.individual.call_duration, summary_type), (bc.individual.percent_nocturnal, scalar_type), (bc.individual.percent_initiated_conversations, scalar_type), (bc.individual.percent_initiated_interactions, scalar_type), (bc.individual.response_delay_text, summary_type), (bc.individual.response_rate_text, scalar_type), (bc.individual.entropy_of_contacts, scalar_type), (bc.individual.balance_of_contacts, summary_type), (bc.individual.interactions_per_contact, summary_type), (bc.individual.interevent_time, summary_type), (bc.individual.percent_pareto_interactions, scalar_type), (bc.individual.percent_pareto_durations, scalar_type), (bc.individual.number_of_interactions, scalar_type), (number_of_interactions_in, scalar_type), (number_of_interactions_out, scalar_type), (bc.spatial.number_of_antennas, scalar_type), (bc.spatial.entropy_of_antennas, scalar_type), (bc.spatial.percent_at_home, scalar_type), (bc.spatial.radius_of_gyration, scalar_type), (bc.spatial.frequent_antennas, scalar_type), (bc.spatial.churn_rate, scalar_type) ] network_functions = [ bc.network.clustering_coefficient_unweighted, bc.network.clustering_coefficient_weighted, bc.network.assortativity_attributes, bc.network.assortativity_indicators ] groups = [[r for r in g] for g in group_records(user, groupby=groupby)] reporting = OrderedDict([ ('antennas_path', user.antennas_path), ('attributes_path', user.attributes_path), ('version', bc.__version__), ('groupby', groupby), ('split_week', split_week), ('split_day', split_day), ('start_time', user.start_time and str(user.start_time)), ('end_time', user.end_time and str(user.end_time)), ('night_start', str(user.night_start)), ('night_end', str(user.night_end)), ('weekend', user.weekend), ('bins', len(groups)), ('has_call', user.has_call), ('has_text', user.has_text), ('has_home', user.has_home), ('has_network', user.has_network), ('percent_records_missing_location', bc.helper.tools.percent_records_missing_location(user)), ('antennas_missing_locations', bc.helper.tools.antennas_missing_locations(user)), ('percent_outofnetwork_calls', user.percent_outofnetwork_calls), ('percent_outofnetwork_texts', user.percent_outofnetwork_texts), ('percent_outofnetwork_contacts', user.percent_outofnetwork_contacts), ('percent_outofnetwork_call_durations', user.percent_outofnetwork_call_durations), ]) if user.records is not None: reporting['number_of_records'] = len(user.records) else: reporting['number_of_records'] = 0. if user.ignored_records is not None: reporting['ignored_records'] = user.ignored_records returned = OrderedDict([ ('name', user.name), ('reporting', reporting) ]) for fun, datatype in functions: try: metric = fun(user, groupby=groupby, summary=summary, datatype=datatype, split_week=split_week, split_day=split_day) except ValueError: metric = fun(user, groupby=groupby, datatype=datatype, split_week=split_week, split_day=split_day) returned[fun.__name__] = metric if network and user.has_network: for fun in network_functions: returned[fun.__name__] = fun(user) if attributes and user.attributes != {}: returned['attributes'] = user.attributes if flatten is True: return globals()['flatten'](returned) return returned
def _group_set(self, method, interaction): chunks = group_records(self.user, groupby='method', interaction=interaction) new_records = set(r for c in chunks for r in c) return new_records
def _group_set(self, method, interaction): chunks = group_records(self.user, groupby=method, interaction=interaction) new_records = set(r for c in chunks for r in c) return new_records
def create_punchcards(user, split_interval=60): """ Computes raw indicators (e.g. number of outgoing calls) for intervals of ~1 hour across each week of user data. These "punchcards" are returned in a nested list with each sublist containing [user.name, channel, weekday, section, value]. Parameters ---------- user : object The user to create punchcards for. split_interval : int The interval in minutes for which each indicator is computed. Defaults to 60. Needs to be able to split a day (24*60 minutes) evenly. """ if not float(24 * 60 / split_interval).is_integer(): raise ValueError( "The minute interval set for the punchcard structure does not evenly divide the day!" ) contacts_in = partial(bc.individual.number_of_contacts, direction='in', interaction='callandtext', summary=None) contacts_out = partial(bc.individual.number_of_contacts, direction='out', interaction='callandtext', summary=None) calls_in = partial(bc.individual.number_of_interactions, direction='in', interaction='call', summary=None) calls_out = partial(bc.individual.number_of_interactions, direction='out', interaction='call', summary=None) texts_in = partial(bc.individual.number_of_interactions, direction='in', interaction='text', summary=None) texts_out = partial(bc.individual.number_of_interactions, direction='out', interaction='text', summary=None) time_spent_in = partial(bc.individual.call_duration, direction='in', interaction='call', summary=None) time_spent_out = partial(bc.individual.call_duration, direction='out', interaction='call', summary=None) core_func = [(contacts_in, "scalar"), (contacts_out, "scalar"), (calls_in, "scalar"), (calls_out, "scalar"), (texts_in, "scalar"), (texts_out, "scalar")] time_func = [(time_spent_in, "summarystats"), (time_spent_out, "summarystats")] pc = [] sections = [(i + 1) * split_interval for i in range(7 * 24 * 60 / split_interval)] temp_user = _extract_user_info(user) for grouped_records in group_records(user, groupby='week'): week_records = list(grouped_records) time_spent_rec = _transform_to_time_spent(week_records, split_interval, sections) pc.extend( _calculate_channels(week_records, sections, split_interval, core_func, temp_user)) pc.extend( _calculate_channels(time_spent_rec, sections, split_interval, time_func, temp_user, len(core_func))) return pc
def create_punchcards(user, split_interval=60): """ Computes raw indicators (e.g. number of outgoing calls) for intervals of ~1 hour across each week of user data. These "punchcards" are returned in a nested list with each sublist containing [user.name, channel, weekday, section, value]. Parameters ---------- user : object The user to create punchcards for. split_interval : int The interval in minutes for which each indicator is computed. Defaults to 60. Needs to be able to split a day (24*60 minutes) evenly. """ if not float(24 * 60 / split_interval).is_integer(): raise ValueError( "The minute interval set for the punchcard structure does not evenly divide the day!") contacts_in = partial(bc.individual.number_of_contacts, direction='in', interaction='callandtext', summary=None) contacts_out = partial(bc.individual.number_of_contacts, direction='out', interaction='callandtext', summary=None) calls_in = partial(bc.individual.number_of_interactions, direction='in', interaction='call', summary=None) calls_out = partial(bc.individual.number_of_interactions, direction='out', interaction='call', summary=None) texts_in = partial(bc.individual.number_of_interactions, direction='in', interaction='text', summary=None) texts_out = partial(bc.individual.number_of_interactions, direction='out', interaction='text', summary=None) time_spent_in = partial(bc.individual.call_duration, direction='in', interaction='call', summary=None) time_spent_out = partial(bc.individual.call_duration, direction='out', interaction='call', summary=None) core_func = [ (contacts_in, "scalar"), (contacts_out, "scalar"), (calls_in, "scalar"), (calls_out, "scalar"), (texts_in, "scalar"), (texts_out, "scalar") ] time_func = [ (time_spent_in, "summarystats"), (time_spent_out, "summarystats") ] pc = [] sections = [ (i + 1) * split_interval for i in range(7 * 24 * 60 / split_interval)] temp_user = _extract_user_info(user) for grouped_records in group_records(user, groupby='week'): week_records = list(grouped_records) time_spent_rec = _transform_to_time_spent( week_records, split_interval, sections) pc.extend(_calculate_channels( week_records, sections, split_interval, core_func, temp_user)) pc.extend(_calculate_channels( time_spent_rec, sections, split_interval, time_func, temp_user, len(core_func))) return pc
def all(user, groupby='week', summary='default', split_week=False, split_day=False, attributes=True, flatten=False): """ Returns a dictionary containing all bandicoot indicators for the user, as well as reporting variables. The reporting variables include: * the path of files containting the antennas and attributes, * the current version of bandicoot, * the *groupby* method (``'week'`` or ``None``) and the day/night, weekday/weekend filters, * the date and time for the first and last records, * the range of hours used to detect interactions at night, and the weekend range, * the number of bins if the records are grouped weekly, * the binary properties ``has_call``, ``has_text``, ``has_home``, * the percentage of records missing antennas, and antennas missing (lat, lon) locations, * the percentage of contacts not in the network, as well as interactions (for calls, texts, and call durations), * the total number of records for the user We also include a last set of reporting variables, for the records ignored at the loading, due to faulty or incorrect values: .. code-block:: python { 'all': 0, 'interaction': 0, 'direction': 0, 'correspondent_id': 0, 'datetime': 0, 'call_duration': 0 } with the total number of records ignored (key ``'all'``), as well as the number of records with faulty values for each columns. """ # Warn the user if they are selecting weekly and there's only one week if groupby == 'week': if len(set(r.datetime.isocalendar()[:2] for r in user.records)) <= 1: print warning_str('Grouping by week, but all data is from the same week!') scalar_type = 'distribution_scalar' if groupby == 'week' else 'scalar' summary_type = 'distribution_summarystats' if groupby == 'week' else 'summarystats' number_of_interactions_in = partial(bc.individual.number_of_interactions, direction='in') number_of_interactions_in.__name__ = 'number_of_interaction_in' number_of_interactions_out = partial(bc.individual.number_of_interactions, direction='out') number_of_interactions_out.__name__ = 'number_of_interaction_out' functions = [ (bc.individual.active_days, scalar_type), (bc.individual.number_of_contacts, scalar_type), (bc.individual.call_duration, summary_type), (bc.individual.percent_nocturnal, scalar_type), (bc.individual.percent_initiated_conversations, scalar_type), (bc.individual.percent_initiated_interactions, scalar_type), (bc.individual.response_delay_text, summary_type), (bc.individual.response_rate_text, scalar_type), (bc.individual.entropy_of_contacts, scalar_type), (bc.individual.balance_of_contacts, summary_type), (bc.individual.interactions_per_contact, summary_type), (bc.individual.interevent_time, summary_type), (bc.individual.percent_pareto_interactions, scalar_type), (bc.individual.percent_pareto_durations, scalar_type), (bc.individual.number_of_interactions, scalar_type), (number_of_interactions_in, scalar_type), (number_of_interactions_out, scalar_type), (bc.spatial.number_of_antennas, scalar_type), (bc.spatial.entropy_of_antennas, scalar_type), (bc.spatial.percent_at_home, scalar_type), (bc.spatial.radius_of_gyration, scalar_type), (bc.spatial.frequent_antennas, scalar_type) ] groups = [[r for r in g] for g in group_records(user, groupby=groupby)] reporting = OrderedDict([ ('antennas_path', user.antennas_path), ('attributes_path', user.attributes_path), ('version', bc.__version__), ('groupby', groupby), ('split_week', split_week), ('split_day', split_day), ('start_time', user.start_time and str(user.start_time)), ('end_time', user.end_time and str(user.end_time)), ('night_start', str(user.night_start)), ('night_end', str(user.night_end)), ('weekend', user.weekend), ('bins', len(groups)), ('has_call', user.has_call), ('has_text', user.has_text), ('has_home', user.has_home), ('percent_records_missing_location', bc.helper.tools.percent_records_missing_location(user)), ('antennas_missing_locations', bc.helper.tools.antennas_missing_locations(user)), ('percent_outofnetwork_calls', user.percent_outofnetwork_calls), ('percent_outofnetwork_texts', user.percent_outofnetwork_texts), ('percent_outofnetwork_contacts', user.percent_outofnetwork_contacts), ('percent_outofnetwork_call_durations', user.percent_outofnetwork_call_durations), ]) if user.records is not None: reporting['number_of_records'] = len(user.records) else: reporting['number_of_records'] = 0. if user.ignored_records is not None: reporting['ignored_records'] = user.ignored_records returned = OrderedDict([ ('name', user.name), ('reporting', reporting) ]) for fun, datatype in functions: try: metric = fun(user, groupby=groupby, summary=summary, datatype=datatype, split_week=split_week, split_day=split_day) except ValueError: metric = fun(user, groupby=groupby, datatype=datatype, split_week=split_week, split_day=split_day) returned[fun.__name__] = metric if attributes and user.attributes != {}: returned['attributes'] = user.attributes if flatten is True: return globals()['flatten'](returned) return returned
def all(user, groupby='week', summary='default', attributes=True, flatten=False): """ Returns a dictionary containing all bandicoot indicators for the user, as well as reporting variables. """ scalar_type = 'distribution_scalar' if groupby == 'week' else 'scalar' summary_type = 'distribution_summarystats' if groupby == 'week' else 'summarystats' number_of_interactions_in = partial(bc.individual.number_of_interactions, direction='in') number_of_interactions_in.__name__ = 'number_of_interaction_in' number_of_interactions_out = partial(bc.individual.number_of_interactions, direction='out') number_of_interactions_out.__name__ = 'number_of_interaction_out' functions = [ (bc.individual.active_days, scalar_type), (bc.individual.number_of_contacts, scalar_type), (bc.individual.call_duration, summary_type), (bc.individual.percent_nocturnal, scalar_type), (bc.individual.percent_initiated_conversation, scalar_type), (bc.individual.percent_initiated_interactions, scalar_type), (bc.individual.response_delay_text, summary_type), (bc.individual.response_rate_text, scalar_type), (bc.individual.entropy_of_contacts, scalar_type), (bc.individual.balance_contacts, summary_type), (bc.individual.interactions_per_contact, summary_type), (bc.individual.interevents_time, summary_type), (bc.individual.number_of_contacts_xpercent_interactions, scalar_type), (bc.individual.number_of_contacts_xpercent_durations, scalar_type), (bc.individual.number_of_interactions, scalar_type), (number_of_interactions_in, scalar_type), (number_of_interactions_out, scalar_type), (bc.spatial.number_of_places, scalar_type), (bc.spatial.entropy_places, scalar_type), (bc.spatial.percent_at_home, scalar_type), (bc.spatial.radius_of_gyration, scalar_type), (bc.spatial.frequent_locations, scalar_type) ] groups = [[r for r in g] for g in group_records(user, groupby=groupby)] reporting = OrderedDict([ ('antennas_path', user.antennas_path), ('attributes_path', user.attributes_path), ('version', bc.__version__), ('groupby', groupby), ('start_time', user.start_time and str(user.start_time)), ('end_time', user.end_time and str(user.end_time)), ('bins', len(groups)), ('has_call', user.has_call), ('has_text', user.has_text), ('has_home', user.has_home), ('percent_records_missing_location', bc.helper.tools.percent_records_missing_location(user)), ('antennas_missing_locations', bc.helper.tools.antennas_missing_locations(user)), ('percent_outofnetwork_calls', user.percent_outofnetwork_calls), ('percent_outofnetwork_texts', user.percent_outofnetwork_texts), ('percent_outofnetwork_contacts', user.percent_outofnetwork_contacts), ('percent_outofnetwork_call_durations', user.percent_outofnetwork_call_durations), ]) if user.records is not None: reporting['number_of_records'] = sum(map(len, groups)) else: reporting['number_of_records'] = 0. if user.ignored_records is not None: reporting['ignored_records'] = user.ignored_records returned = OrderedDict([ ('name', user.name), ('reporting', reporting) ]) for fun, datatype in functions: try: metric = fun(user, groupby=groupby, summary=summary, datatype=datatype) except ValueError: metric = fun(user, groupby=groupby, datatype=datatype) returned[fun.__name__] = metric if attributes and user.attributes != {}: returned['attributes'] = user.attributes if flatten is True: return globals()['flatten'](returned) return returned