def setUp(self): if not getattr(TestRegressions, '_dir_changed', False): abspath = os.path.abspath(__file__) name = abspath.index(os.path.basename(__file__)) abspath = abspath[:name] os.chdir(abspath) TestRegressions._dir_changed = True self.empty_user = bc.User() self.empty_user.attributes['empty'] = True self.sample_user = bc.tests.generate_user.sample_user() self.network_ego = bc.read_csv( 'ego', 'samples/network', 'samples/towers.csv', attributes_path='samples/attributes', network=True, describe=False) # Manual users self.user_a = bc.read_csv( 'A', 'samples/manual', 'samples/towers.csv', recharges_path='samples/manual/recharges', network=False, describe=False) self.user_a_network = bc.read_csv( 'A', 'samples/manual', 'samples/towers.csv', attributes_path='samples/attributes', network=True, describe=False) self.user_a_orange = bc.io.read_orange( 'A_orange', 'samples/manual', 'sample/towers.csv', recharges_path='samples/manual/recharges', network=False, describe=False) self.user_a_orange_network = bc.io.read_orange( 'A_orange', 'samples/manual', network=True, attributes_path='samples/attributes', describe=False)
def main(): # read in attributes for all user if args.attributes_input_file: user_attributes = read_user_attributes(args.attributes_input_file) indicators = [] for dir in os.listdir(args.cdrs_input_dir): cdrs_dir = os.path.join(args.cdrs_input_dir, dir) if not os.path.isdir(cdrs_dir): continue for file in os.listdir(cdrs_dir): cdr_input_file = os.path.join(cdrs_dir, file) user_name = file # should we add atributes to indicators? if args.attributes_input_file: # find the attributes for this user and write them to an in-memory file if user_name not in user_attributes: sys.exit('Could not find attributes for user ' + user_name) gender = user_attributes[user_name][0] age = user_attributes[user_name][1] if user_attributes[ user_name][1] else "None" attributes_file = tempfile.NamedTemporaryFile() attributes_file.write("key,value\n") attributes_file.write("gender," + str(gender) + "\n") attributes_file.write("age," + str(age)) # don't forget to seek to beginning so that what you wrote can be read! attributes_file.seek(0) user = bandicoot.read_csv(cdr_input_file, args.antennas_input_file, attributes_path=attributes_file.name, describe=False) attributes_file.close() else: user = bandicoot.read_csv(cdr_input_file, args.antennas_input_file, describe=False) # set the username to filename user.name = user_name # Extended summary also computes skewness and kurtosis user_indicators = bandicoot.utils.all(user, weekly=True, summary='extended', attributes=True) indicators.append(user_indicators) bandicoot.io.to_csv(indicators, args.indicators_output) if args.delete_bad_columns: delete_bad_columns(args.indicators_output)
def load_and_compute(f): user_id = os.path.basename(f)[:-4] try: B = bc.read_csv(user_id, records_path, antenna_file, describe=False) metrics_dic = bc.utils.all(B) except Exception as e: metrics_dic = {'name': user_id, 'error': True} return metrics_dic
def test_read_csv_attributes(self): user = bc.read_csv("u_test2", "samples", attributes_path="samples/attributes", describe=False) self.assertEqual(user.attributes, { 'gender': 'male', 'age': '42', 'is_subscriber': 'True', 'individual_id': '7atr8f53fg41' })
def setUp(self): if not getattr(TestRegressions, '_dir_changed', False): abspath = os.path.abspath(__file__) name = abspath.index(os.path.basename(__file__)) abspath = abspath[:name] os.chdir(abspath) TestRegressions._dir_changed = True self.empty_user = bc.User() self.empty_user.attributes['empty'] = True self.sample_user = bc.tests.generate_user.sample_user() self.network_ego = bc.read_csv('ego', 'samples/network', 'samples/towers.csv', attributes_path='samples/attributes', network=True, warnings=False, describe=False) # Manual users self.user_a = bc.read_csv('A', 'samples/manual', 'samples/towers.csv', network=False, warnings=False, describe=False) self.user_a_network = bc.read_csv('A', 'samples/manual', 'samples/towers.csv', attributes_path='samples/attributes', network=True, warnings=False, describe=False) self.user_a_orange = bc.io.read_orange('A_orange', 'samples/manual', network=False, warnings=False, describe=False) self.user_a_orange_network = bc.io.read_orange( 'A_orange', 'samples/manual', network=True, attributes_path='samples/attributes', warnings=False, describe=False)
def test_read_csv_no_position(self): user = bc.read_csv("u_test_no_position", "samples", describe=False) self.assertEqual(user.records[1], Record(interaction='call', direction='in', correspondent_id='770000001', datetime=datetime.datetime(2013, 12, 16, 5, 39, 30), call_duration=0, position=Position()))
def test_read_csv_no_position(self): user = bc.read_csv("u_test_no_position", "samples", describe=False) self.assertEqual( user.records[1], Record(interaction='call', direction='in', correspondent_id='770000001', datetime=datetime.datetime(2013, 12, 16, 5, 39, 30), duration=0, position=Position()))
def test_read_csv_antenna_id_no_places(self): user = bc.read_csv("u_test_antennas", "samples", describe=False) self.assertEqual(user.records[1], Record(interaction='call', direction='in', correspondent_id='770000001', datetime=datetime.datetime(2013, 12, 16, 5, 39, 30), call_duration=0, position=Position('13084', None))) self.assertEqual(bc.spatial.radius_of_gyration(user).values(), [None] * 2)
def test_read_csv_antenna_id(self): user = bc.read_csv("u_test_antennas", "samples", antennas_path="samples/towers.csv", describe=False) self.assertEqual(user.records[1], Record(interaction='call', direction='in', correspondent_id='770000001', datetime=datetime.datetime(2013, 12, 16, 5, 39, 30), call_duration=0, position=Position('13084', None))) radius = bc.spatial.radius_of_gyration(user)['mean'] self.assertGreater(radius, 0)
def test_read_csv_antenna_id_no_places(self): user = bc.read_csv("u_test_antennas", "samples", describe=False) self.assertEqual(user.records[1], Record(interaction='call', direction='in', correspondent_id='770000001', datetime=datetime.datetime(2013, 12, 16, 5, 39, 30), duration=0, position=Position('13084', None))) result = {'allweek': {'allday': None}} self.assertEqual(bc.spatial.radius_of_gyration(user, groupby=None), result)
def network_sampling(n, filename, directory=None, snowball=False, user=None): """ Selects a few users and exports a CSV of indicators for them. TODO: Returns the network/graph between the selected users. Parameters ---------- n : int Number of users to select. filename : string File to export to. directory: string Directory to select users from if using the default random selection. Selection options ----------------- random (default): selects n users at random snowball: starts from a specified user, iterates over neighbors, and does a BFS until n neighbors are reached """ if snowball: if user is None: raise ValueError( "Must specify a starting user from whom to initiate the snowball" ) else: users, agenda = [user], [user] while len(agenda) > 0: parent = agenda.pop() dealphebetized_network = sorted(parent.network.items(), key=lambda k: random.random()) for neighbor in dealphebetized_network: if neighbor[1] not in users and neighbor[ 1] is not None and len(users) < n: users.append(neighbor[1]) if neighbor[1].network: agenda.push(neighbor[1]) else: files = [ x for x in os.listdir(directory) if os.path.isfile(os.path.join(directory, x)) ] shuffled_files = sorted(files, key=lambda k: random.random()) user_names = shuffled_files[:n] users = [bc.read_csv(u[:-4], directory) for u in user_names] if len(users) < n: raise ValueError( "Specified more users than records that exist, only {} records available" .format(len(users))) bc.to_csv([bc.utils.all(u) for u in users], filename)
def test_read_csv_antenna_id_no_places(self): user = bc.read_csv("u_test_antennas", "samples", describe=False) self.assertEqual(user.records[1], Record(interaction='call', direction='in', correspondent_id='770000001', datetime=dt(2013, 12, 16, 5, 39, 30), call_duration=0, position=Position('13084', None))) result = {'allweek': {'allday': None}} radius = bc.spatial.radius_of_gyration(user, groupby=None) self.assertEqual(radius, result)
def test_read_csv_antenna_id_no_places(self): user = bc.read_csv("u_test_antennas", "samples", describe=False) self.assertEqual( user.records[1], Record(interaction='call', direction='in', correspondent_id='770000001', datetime=datetime.datetime(2013, 12, 16, 5, 39, 30), call_duration=0, position=Position('13084', None))) self.assertEqual( bc.spatial.radius_of_gyration(user).values(), [None] * 2)
def test_read_csv_antenna_id(self): user = bc.read_csv("u_test_antennas", "samples", antennas_path="samples/towers.csv", describe=False) self.assertEqual(user.records[1], Record(interaction='call', direction='in', correspondent_id='770000001', datetime=dt(2013, 12, 16, 5, 39, 30), call_duration=0, position=Position('13084', None))) radius = bc.spatial.radius_of_gyration(user, groupby=None) self.assertGreater(radius['allweek']['allday'], 0)
def metrics(): records_path = 'records/' antenna_file = 'antennas.csv' indicators = [] for f in glob.glob(records_path + '*.csv'): user_id = os.path.basename(f)[:-4] try: B = bc.read_csv(user_id, records_path, antenna_file, describe=False) metrics_dict = bc.utils.all(B) except Exception as e: metrics_dict = {'name': user_id, 'error': True} indicators.append(metrics_dict) bc.io.to_csv(indicators, 'bandicoot_indicators_full.csv')
def network_sampling(n, filename, directory=None, snowball=False, user=None): """ Selects a few users and exports a CSV of indicators for them. TODO: Returns the network/graph between the selected users. Parameters ---------- n : int Number of users to select. filename : string File to export to. directory: string Directory to select users from if using the default random selection. Selection options ----------------- random (default): selects n users at random snowball: starts from a specified user, iterates over neighbors, and does a BFS until n neighbors are reached """ if snowball: if user is None: raise ValueError("Must specify a starting user from whom to initiate the snowball") else: users, agenda = [user], [user] while len(agenda) > 0: parent = agenda.pop() dealphebetized_network = sorted(parent.network.items(), key=lambda k: random.random()) for neighbor in dealphebetized_network: if neighbor[1] not in users and neighbor[1] is not None and len(users) < n: users.append(neighbor[1]) if neighbor[1].network: agenda.push(neighbor[1]) else: files = [x for x in os.listdir(directory) if os.path.isfile(os.path.join(directory, x))] shuffled_files = sorted(files, key=lambda k: random.random()) user_names = shuffled_files[:n] users = [bc.read_csv(u[:-4], directory) for u in user_names] if len(users) < n: raise ValueError("Specified more users than records that exist, only {} records available".format(len(users))) bc.to_csv([bc.utils.all(u) for u in users], filename)
def load_and_compute(user_id, attributes): """ Bandicoot helper function with inputs based on the predefined attributes in the attributes class. Inputs ------ user_id : ID of user on which bandicoot features are being calculated. attributes : Attributes class with specific options for current run. Output ------ Dictionary of calculated bandicoot indicators. """ try: # create user object & # ignore massive warnings output for better speed B = bc.read_csv(user_id=user_id, records_path=attributes.bandicoot_path, antennas_path=attributes.bandicoot_path+'antennas.csv', describe=False, warnings=False) # change weekend days and nocturnal time B.weekend = attributes.weekend_days B.night_start = datetime.time(attributes.noct_time['begin']) B.night_end = datetime.time(attributes.noct_time['end']) # group by month (aka insert user data in chunks of months) # & calculate for weekend and workweek separately # & calculate for day and night separately: metrics_dic = bc.utils.all(B, groupby=None, split_week=True, split_day=True, summary='extended') except Exception as e: metrics_dic = {'name': user_id, 'error': True} return metrics_dic
def test_read_csv(self): user = bc.read_csv("u_test2", "samples", describe=False) self.assertEqual(len(user.records), 500)
def setUp(self): self.user = bc.read_csv('user_ignored', 'samples')
import bandicoot as bc import pprint u = bc.read_csv("sanjay", "C:\\Users\\Sahil\\Desktop") rec = u.records #other features that can be included - call duration... the people who are being called -- the number of times they have been called in total t = [] c = 0 #print(rec[1].datetime.timestamp()) for i in rec: #format i.datetime t.append(i.datetime.timestamp()) c += 1 def grouper(iterable): prev = None group = [] for item in iterable: if not prev or item - prev <= 1000: group.append(item) else: yield group group = [item] prev = item if group: yield group di = dict(enumerate(grouper(t), 1))
import sys sys.path.append("../") import bandicoot as bc import glob import os records_path = 'users_bandicoot/' antenna_file = 'towers.csv' indicators = [] for f in glob.glob(records_path + '*.csv'): user_id = os.path.basename(f)[:-4] try: B = bc.read_csv(user_id, records_path, antenna_file, describe=False) metrics_dict = bc.utils.all(B) except Exception as e: metrics_dic = {'name': user_id, 'error': True} indicators.append(metrics_dict) bc.io.to_csv(indicators, 'bandicoot_indicators_full.csv')
from bandicoot.helper.group import grouping import bandicoot as bc # Loading a User U = bc.read_csv('ego', 'data/', 'data/antennas.csv') ####################### # Export visulization # ####################### bc.visualization.export(U, 'my-viz-path') ######################################### # Run individual and spatial indicators # ######################################### bc.individual.percent_initiated_conversations(U) bc.spatial.number_of_antennas(U) bc.spatial.radius_of_gyration(U) ###################################### # Group indicators by weeks or month # ###################################### # The groupby keyword controls the aggregation: # - groupby='week' to divide by week (by default), # - groupby='month' to divide by month,
def test_read_csv_with_recharges(self): user = bc.read_csv("A", "samples/manual", describe=False, recharges_path="samples/manual/recharges") self.assertEqual(len(user.recharges), 5)