def test_weekday_filter(self): records = [ Record("test_itr", "in", "1", datetime.datetime(2014, 8, 22), 1, Position()), Record("test_itr", "in", "1", datetime.datetime(2014, 8, 31), 1, Position()), Record("test_itr", "in", "1", datetime.datetime(2014, 10, 18), 1, Position()) ] user = bc.User() user.records = records filtered_records = bc.helper.group.filter_user(user, part_of_week='weekday') self.assertEqual(filtered_records, [records[0]])
def test_weekend_filter(self): records = [ Record("test_itr", "in", "1", dt(2014, 8, 22), 1, Position()), Record("test_itr", "in", "1", dt(2014, 8, 31), 1, Position()), Record("test_itr", "in", "1", dt(2014, 10, 18), 1, Position()) ] user = bc.User() user.records = records rv = bc.helper.group.filter_user(user, part_of_week='weekend') self.assertEqual(rv, [records[1], records[2]])
def test_none_group(self): records = [ Record("call", "in", "1", dt(2014, 9, 4), 1, Position()), Record("call", "in", "1", dt(2014, 9, 5), 1, Position()), Record("call", "in", "1", dt(2014, 9, 11), 1, Position()), Record("call", "in", "1", dt(2014, 9, 12), 1, Position()) ] grouping = bc.helper.group.group_records(records, groupby=None) self.assertEqual(records, list(next(grouping))) with self.assertRaises(StopIteration): next(grouping)
def test_weekly_group(self): records = [ Record("test_itr", "in", "1", dt(2014, 8, 24), 1, Position()), Record("test_itr", "in", "1", dt(2014, 9, 4), 1, Position()), Record("test_itr", "in", "1", dt(2014, 9, 11), 1, Position()) ] user = bc.User() user.records = records grouping = bc.helper.group.group_records(user.records, groupby='week') groups = [[r for r in l] for l in grouping] self.assertEqual(groups, [[records[0]], [records[1]], [records[2]]])
def test_weekend_group(self): records = [ Record("test_itr", "in", "1", datetime.datetime(2014, 8, 23), 1, Position()), Record("test_itr", "in", "1", datetime.datetime(2014, 8, 31), 1, Position()), Record("test_itr", "in", "1", datetime.datetime(2014, 10, 18), 1, Position()) ] user = bc.User() user.records = records grouping = bc.helper.group.group_records(user, groupby='week', part_of_week='weekend') groups = [[r for r in l] for l in grouping] self.assertEqual(groups, [[records[0]], [records[1]], [records[2]]])
def test_none_group(self): records = [ Record("call", "in", "1", datetime.datetime(2014, 9, 4), 1, Position()), Record("call", "in", "1", datetime.datetime(2014, 9, 5), 1, Position()), Record("call", "in", "1", datetime.datetime(2014, 9, 11), 1, Position()), Record("call", "in", "1", datetime.datetime(2014, 9, 12), 1, Position()) ] user = bc.User() user.records = records grouping = bc.helper.group.group_records(user, groupby=None) self.assertEqual(records, list(next(grouping))) self.assertRaises(StopIteration, grouping.next)
def test_daily_filter(self): records = [ Record("test_itr", "in", "1", datetime.datetime(2014, 8, 22, 10, 00), 1, Position()), Record("test_itr", "in", "1", datetime.datetime(2014, 8, 23, 10, 00), 1, Position()), Record("test_itr", "in", "1", datetime.datetime(2014, 9, 7, 11, 00), 1, Position()), Record("test_itr", "in", "1", datetime.datetime(2014, 10, 18, 2, 00), 1, Position()) ] user = bc.User() user.records = records filtered_records = bc.helper.group.filter_user(user, part_of_day='night') self.assertEqual(filtered_records, [records[3]]) filtered_records = bc.helper.group.filter_user(user, part_of_day='day') self.assertEqual(filtered_records, [records[0], records[1], records[2]])
def _transform_to_time_spent(records, split_interval, sections): """ Each call that crosses a boundary of the sections in the punchcard is split. These new records contain the amount of time (in record.call_duration) spent talking in that specific section. """ t_records = [] week_nr = records[0].datetime.isocalendar()[1] # contrary to the rest of the binning process, this is done with second # precision for r in filter( lambda rec: rec.interaction == 'call' and rec.call_duration > 0, records): t_left = r.call_duration t_to_next_section = _seconds_to_section_split(r, sections) t_spent_total = 0 while (t_left > 0): t_spent = min(t_to_next_section, t_left) dt_new = r.datetime + dt.timedelta(seconds=t_spent_total) if dt_new.isocalendar()[1] > week_nr: dt_new -= dt.timedelta(days=7) t_records.append( Record('call', r.direction, None, dt_new, t_spent, None)) t_left -= t_spent t_spent_total += t_spent t_to_next_section = split_interval * 60 return sorted(t_records, key=lambda r: _find_weektime(r.datetime))
def _parse(reader): records = [] antennas = dict() for row in reader: direction = 'out' if row[0] == '1' else 'in' interaction = 'call' if row[1] in ['11', '12'] else 'text' contact = row[3] date = datetime.strptime(row[4], "%Y-%m-%d %H:%M:%S") call_duration = float(row[5]) lon, lat = float(row[6]), float(row[7]) latlon = (lat, lon) antenna = None for key, value in antennas.items(): if latlon == value: antenna = key break if antenna is None: antenna = len(antennas) + 1 antennas[antenna] = latlon position = Position(antenna=antenna, location=latlon) record = Record(direction=direction, interaction=interaction, correspondent_id=contact, call_duration=call_duration, datetime=date, position=position) records.append(record) return records, antennas
def random_record(**kwargs): n_users = 48 rate = 1e-4 year = 2012 month = _choice(range(1, 3)) day = _choice(range(1, 28)) # ensures that some correspondents have more interactions than others correspondent = int(random.random() * n_users) r = { 'datetime': datetime.datetime(year, month, day) + datetime.timedelta( seconds=math.floor(-1 / rate * math.log(random.random()))), 'interaction': _choice(['text', 'text', 'text', 'call', 'call']), 'correspondent_id': "correspondent_{}".format(correspondent), 'direction': _choice(['in', 'in', 'out']), 'call_duration': int(random.random() * 1000), 'position': Position(location=(_uniform(-5, 5), _uniform(-5, 5))) } if r['interaction'] == "text": r['call_duration'] = None r.update(kwargs) return Record(**r)
def test_daily_group(self): records = [ Record("test_itr", "in", "1", datetime.datetime(2014, 8, 22, 10, 00), 1, Position()), Record("test_itr", "in", "1", datetime.datetime(2014, 8, 23, 10, 00), 1, Position()), Record("test_itr", "in", "1", datetime.datetime(2014, 9, 7, 11, 00), 1, Position()), Record("test_itr", "in", "1", datetime.datetime(2014, 10, 18, 2, 00), 1, Position()) ] user = bc.User() user.records = records grouping = bc.helper.group.group_records(user, groupby='week', part_of_day='night') groups = [[r for r in l] for l in grouping] self.assertEqual(groups, [[records[3]]]) grouping = bc.helper.group.group_records(user, groupby='week', part_of_day='day') groups = [[r for r in l] for l in grouping] self.assertEqual(groups, [[records[0], records[1]], [records[2]]])
def random_records(n, antennas, number_of_users=150, ingoing=0.7, percent_text=0.3, rate=1e-4): current_date = datetime.datetime(2013, 01, 01, 00, 00, 00) results = [] for _ in range(n): current_date += datetime.timedelta(seconds=math.floor(-1/rate*math.log(random.random()))) interaction = "text" if random.random() < percent_text else "call" r = Record( interaction=interaction, direction='in' if random.random() < ingoing else 'out', correspondent_id=hashlib.md5(str(random.randint(1, number_of_users))).hexdigest(), datetime=str(current_date), call_duration=random.randint(1, 1000) if interaction == "call" else '', position=random.choice(antennas) ) results.append(r._asdict()) return results
def test_read_csv_no_position(self): user = bc.read_csv("u_test_no_position", "samples", describe=False) self.assertEqual( user.records[1], Record(interaction='call', direction='in', correspondent_id='770000001', datetime=datetime.datetime(2013, 12, 16, 5, 39, 30), duration=0, position=Position()))
def test_read_orange_stdin(self): user = bc.io.read_orange(describe=False) self.assertEqual( user.records[0], Record(interaction='text', direction='out', correspondent_id='770000005', datetime=datetime.datetime(2013, 12, 16, 7, 30, 30), call_duration=0.0, position=Position(1, (42.3987, -71.575))))
def test_weekly_group(self): records = [ Record("test_itr", "in", "1", datetime.datetime(2014, 8, 24), 1, Position()), Record("test_itr", "in", "1", datetime.datetime(2014, 9, 4), 1, Position()), Record("test_itr", "in", "1", datetime.datetime(2014, 9, 11), 1, Position()) ] user = bc.User() user.records = records grouping = bc.helper.group.group_records(user, groupby='week') record = grouping.next() self.assertTrue(record.next().datetime, records[0].datetime) record = grouping.next() self.assertTrue(record.next().datetime, records[1].datetime) record = grouping.next() self.assertTrue(record.next().datetime, records[2].datetime)
def random_records(n, antennas, number_of_users=150, ingoing=0.7, percent_text=0.3, rate=1e-4): current_date = datetime.datetime(2013, 01, 01, 00, 00, 00) results = [] for _ in range(n): current_date += datetime.timedelta( seconds=math.floor(-1 / rate * math.log(random.random()))) interaction = "text" if random.random() < percent_text else "call" r = Record( interaction=interaction, direction='in' if random.random() < ingoing else 'out', correspondent_id=hashlib.md5( str(random.randint(1, number_of_users))).hexdigest(), datetime=str(current_date), duration=random.randint( 1, 1000) if interaction == "call" else '', position=random.choice(antennas) ) results.append(r._asdict()) return results
def test_none_group(self): records = [ Record("call", "in", "1", datetime.datetime(2014, 9, 5), 1, Position()), Record("call", "in", "1", datetime.datetime(2014, 9, 4), 1, Position()), Record("call", "in", "1", datetime.datetime(2014, 9, 11), 1, Position()), Record("call", "in", "1", datetime.datetime(2014, 9, 12), 1, Position()) ] user = bc.User() user.records = records grouping = bc.helper.group.group_records(user, groupby=None) record = grouping.next() self.assertTrue(record.next().datetime, records[0].datetime) self.assertTrue(record.next().datetime, records[1].datetime) self.assertTrue(record.next().datetime, records[2].datetime) self.assertTrue(record.next().datetime, records[3].datetime) self.assertRaises(StopIteration, record.next) self.assertRaises(StopIteration, grouping.next)
def test_read_csv_antenna_id_no_places(self): user = bc.read_csv("u_test_antennas", "samples", describe=False) self.assertEqual( user.records[1], Record(interaction='call', direction='in', correspondent_id='770000001', datetime=datetime.datetime(2013, 12, 16, 5, 39, 30), call_duration=0, position=Position('13084', None))) self.assertEqual( bc.spatial.radius_of_gyration(user).values(), [None] * 2)
def test_read_csv_antenna_id_no_places(self): user = bc.read_csv("u_test_antennas", "samples", describe=False) self.assertEqual(user.records[1], Record(interaction='call', direction='in', correspondent_id='770000001', datetime=dt(2013, 12, 16, 5, 39, 30), call_duration=0, position=Position('13084', None))) result = {'allweek': {'allday': None}} radius = bc.spatial.radius_of_gyration(user, groupby=None) self.assertEqual(radius, result)
def test_read_csv_antenna_id(self): user = bc.read_csv("u_test_antennas", "samples", antennas_path="samples/towers.csv", describe=False) self.assertEqual(user.records[1], Record(interaction='call', direction='in', correspondent_id='770000001', datetime=dt(2013, 12, 16, 5, 39, 30), call_duration=0, position=Position('13084', None))) radius = bc.spatial.radius_of_gyration(user, groupby=None) self.assertGreater(radius['allweek']['allday'], 0)
def random_record(**kwargs): n_users = 150 rate = 1e-4 year = random.choice(range(2012, 2015)) month = random.choice(range(1, 12)) day = random.choice(range(1, 28)) r = {'datetime': datetime.datetime(year, month, day) + datetime.timedelta(seconds=math.floor(-1/rate*math.log(random.random()))), 'interaction': random.choice(['text', 'call']), 'correspondent_id': random.randint(1, n_users), 'direction': random.choice(['in', 'out']), 'call_duration': random.randint(1, 1000), 'position': Position(location=(random.uniform(-5, 5), random.uniform(-5, 5)))} if r['interaction'] == "text": r['call_duration'] = '' r.update(kwargs) return Record(**r)
def _parse_record(data): _map_duration = lambda s: int(s) if s != '' else None def _map_position(data): antenna = Position() if 'antenna_id' in data: antenna.antenna = data['antenna_id'] return antenna if 'latitude' in data and 'longitude' in data: antenna.position = float(data['latitude']), float( data['longitude']) return antenna return Record(interaction=data['interaction'], direction=data['direction'], correspondent_id=data['correspondent_id'], datetime=datetime.strptime(data['datetime'], "%Y-%m-%d %H:%M:%S"), call_duration=_map_duration(data['call_duration']), position=_map_position(data))
def parse_record(raw): direction = parse_direction(raw['CALL_TYPE'].strip()) if direction == 'in': contact = raw.get('A_PARTY', raw.get('A_NUMBER')) cell_id = raw['B_CELL'] else: contact = raw.get('B_PARTY', raw.get('B_NUMBER')) cell_id = raw['A_CELL'] position = Position(antenna=cell_id, location=cells.get(cell_id)) _date_str = raw.get('CDATE', raw.get('CALL_DATE')) _time_str = raw.get('CTIME', raw.get('CALL_TIME')) _datetime = datetime.strptime(_date_str + _time_str, "%Y%m%d%H:%M:%S") r = Record(interaction='call', direction=direction, correspondent_id=contact, call_duration=float(raw['DURATION'].strip()), datetime=_datetime, position=position) return r