コード例 #1
0
ファイル: test_regressions.py プロジェクト: jcgdata/bandicoot
    def setUp(self):
        if not getattr(TestRegressions, '_dir_changed', False):
            abspath = os.path.abspath(__file__)
            name = abspath.index(os.path.basename(__file__))
            abspath = abspath[:name]
            os.chdir(abspath)
            TestRegressions._dir_changed = True

        self.empty_user = bc.User()
        self.empty_user.attributes['empty'] = True
        self.sample_user = bc.tests.generate_user.sample_user()
        self.network_ego = bc.read_csv(
            'ego', 'samples/network', 'samples/towers.csv',
            attributes_path='samples/attributes', network=True, describe=False)

        # Manual users
        self.user_a = bc.read_csv(
            'A', 'samples/manual', 'samples/towers.csv',
            recharges_path='samples/manual/recharges', network=False,
            describe=False)
        self.user_a_network = bc.read_csv(
            'A', 'samples/manual', 'samples/towers.csv',
            attributes_path='samples/attributes', network=True, describe=False)
        self.user_a_orange = bc.io.read_orange(
            'A_orange', 'samples/manual', 'sample/towers.csv',
            recharges_path='samples/manual/recharges', network=False,
            describe=False)
        self.user_a_orange_network = bc.io.read_orange(
            'A_orange', 'samples/manual', network=True,
            attributes_path='samples/attributes', describe=False)
コード例 #2
0
def main():
    # read in attributes for all user
    if args.attributes_input_file:
        user_attributes = read_user_attributes(args.attributes_input_file)

    indicators = []
    for dir in os.listdir(args.cdrs_input_dir):
        cdrs_dir = os.path.join(args.cdrs_input_dir, dir)

        if not os.path.isdir(cdrs_dir):
            continue
        for file in os.listdir(cdrs_dir):
            cdr_input_file = os.path.join(cdrs_dir, file)
            user_name = file

            # should we add atributes to indicators?
            if args.attributes_input_file:
                # find the attributes for this user and write them to an in-memory file
                if user_name not in user_attributes:
                    sys.exit('Could not find attributes for user ' + user_name)
                gender = user_attributes[user_name][0]
                age = user_attributes[user_name][1] if user_attributes[
                    user_name][1] else "None"
                attributes_file = tempfile.NamedTemporaryFile()
                attributes_file.write("key,value\n")
                attributes_file.write("gender," + str(gender) + "\n")
                attributes_file.write("age," + str(age))
                # don't forget to seek to beginning so that what you wrote can be read!
                attributes_file.seek(0)
                user = bandicoot.read_csv(cdr_input_file,
                                          args.antennas_input_file,
                                          attributes_path=attributes_file.name,
                                          describe=False)
                attributes_file.close()
            else:
                user = bandicoot.read_csv(cdr_input_file,
                                          args.antennas_input_file,
                                          describe=False)

            # set the username to filename
            user.name = user_name

            # Extended summary also computes skewness and kurtosis
            user_indicators = bandicoot.utils.all(user,
                                                  weekly=True,
                                                  summary='extended',
                                                  attributes=True)

            indicators.append(user_indicators)

    bandicoot.io.to_csv(indicators, args.indicators_output)

    if args.delete_bad_columns:
        delete_bad_columns(args.indicators_output)
コード例 #3
0
def load_and_compute(f):
    user_id = os.path.basename(f)[:-4]
    try:
        B = bc.read_csv(user_id, records_path, antenna_file, describe=False)
        metrics_dic = bc.utils.all(B)
    except Exception as e:
        metrics_dic = {'name': user_id, 'error': True}
    return metrics_dic
コード例 #4
0
ファイル: test_parsers.py プロジェクト: aujacquet/bandicoot
 def test_read_csv_attributes(self):
     user = bc.read_csv("u_test2", "samples", attributes_path="samples/attributes", describe=False)
     self.assertEqual(user.attributes, {
         'gender': 'male',
         'age': '42',
         'is_subscriber': 'True',
         'individual_id': '7atr8f53fg41'
     })
コード例 #5
0
def load_and_compute(f):
    user_id = os.path.basename(f)[:-4]
    try:
        B = bc.read_csv(user_id, records_path, antenna_file, describe=False)
        metrics_dic = bc.utils.all(B)
    except Exception as e:
        metrics_dic = {'name': user_id, 'error': True}
    return metrics_dic
コード例 #6
0
    def setUp(self):
        if not getattr(TestRegressions, '_dir_changed', False):
            abspath = os.path.abspath(__file__)
            name = abspath.index(os.path.basename(__file__))
            abspath = abspath[:name]
            os.chdir(abspath)
            TestRegressions._dir_changed = True

        self.empty_user = bc.User()
        self.empty_user.attributes['empty'] = True
        self.sample_user = bc.tests.generate_user.sample_user()
        self.network_ego = bc.read_csv('ego',
                                       'samples/network',
                                       'samples/towers.csv',
                                       attributes_path='samples/attributes',
                                       network=True,
                                       warnings=False,
                                       describe=False)

        # Manual users
        self.user_a = bc.read_csv('A',
                                  'samples/manual',
                                  'samples/towers.csv',
                                  network=False,
                                  warnings=False,
                                  describe=False)
        self.user_a_network = bc.read_csv('A',
                                          'samples/manual',
                                          'samples/towers.csv',
                                          attributes_path='samples/attributes',
                                          network=True,
                                          warnings=False,
                                          describe=False)
        self.user_a_orange = bc.io.read_orange('A_orange',
                                               'samples/manual',
                                               network=False,
                                               warnings=False,
                                               describe=False)
        self.user_a_orange_network = bc.io.read_orange(
            'A_orange',
            'samples/manual',
            network=True,
            attributes_path='samples/attributes',
            warnings=False,
            describe=False)
コード例 #7
0
ファイル: test_parsers.py プロジェクト: aujacquet/bandicoot
 def test_read_csv_no_position(self):
     user = bc.read_csv("u_test_no_position", "samples", describe=False)
     self.assertEqual(user.records[1],
                      Record(interaction='call',
                             direction='in',
                             correspondent_id='770000001',
                             datetime=datetime.datetime(2013, 12, 16, 5, 39, 30),
                             call_duration=0,
                             position=Position()))
 def test_read_csv_attributes(self):
     user = bc.read_csv("u_test2", "samples",
                        attributes_path="samples/attributes", describe=False)
     self.assertEqual(user.attributes, {
         'gender': 'male',
         'age': '42',
         'is_subscriber': 'True',
         'individual_id': '7atr8f53fg41'
     })
コード例 #9
0
 def test_read_csv_no_position(self):
     user = bc.read_csv("u_test_no_position", "samples", describe=False)
     self.assertEqual(
         user.records[1],
         Record(interaction='call',
                direction='in',
                correspondent_id='770000001',
                datetime=datetime.datetime(2013, 12, 16, 5, 39, 30),
                duration=0,
                position=Position()))
コード例 #10
0
ファイル: test_parsers.py プロジェクト: aujacquet/bandicoot
    def test_read_csv_antenna_id_no_places(self):
        user = bc.read_csv("u_test_antennas", "samples", describe=False)
        self.assertEqual(user.records[1],
                         Record(interaction='call',
                                direction='in',
                                correspondent_id='770000001',
                                datetime=datetime.datetime(2013, 12, 16, 5, 39, 30),
                                call_duration=0,
                                position=Position('13084', None)))

        self.assertEqual(bc.spatial.radius_of_gyration(user).values(), [None] * 2)
コード例 #11
0
ファイル: test_parsers.py プロジェクト: aujacquet/bandicoot
    def test_read_csv_antenna_id(self):
        user = bc.read_csv("u_test_antennas", "samples", antennas_path="samples/towers.csv", describe=False)
        self.assertEqual(user.records[1],
                         Record(interaction='call',
                                direction='in',
                                correspondent_id='770000001',
                                datetime=datetime.datetime(2013, 12, 16, 5, 39, 30),
                                call_duration=0,
                                position=Position('13084', None)))

        radius = bc.spatial.radius_of_gyration(user)['mean']
        self.assertGreater(radius, 0)
コード例 #12
0
ファイル: test_parsers.py プロジェクト: ulfaslak/bandicoot
    def test_read_csv_antenna_id_no_places(self):
        user = bc.read_csv("u_test_antennas", "samples", describe=False)
        self.assertEqual(user.records[1],
                         Record(interaction='call',
                                direction='in',
                                correspondent_id='770000001',
                                datetime=datetime.datetime(2013, 12, 16, 5, 39, 30),
                                duration=0,
                                position=Position('13084', None)))

        result = {'allweek': {'allday': None}}
        self.assertEqual(bc.spatial.radius_of_gyration(user, groupby=None), result)
コード例 #13
0
def network_sampling(n, filename, directory=None, snowball=False, user=None):
    """
    Selects a few users and exports a CSV of indicators for them.

    TODO: Returns the network/graph between the selected users.

    Parameters
    ----------
    n : int
        Number of users to select.
    filename : string
        File to export to.
    directory: string
        Directory to select users from if using the default random selection.

    Selection options
    -----------------
    random (default): selects n users at random

    snowball: starts from a specified user, iterates over neighbors, and does a BFS until n neighbors are reached
    """
    if snowball:
        if user is None:
            raise ValueError(
                "Must specify a starting user from whom to initiate the snowball"
            )
        else:
            users, agenda = [user], [user]
            while len(agenda) > 0:
                parent = agenda.pop()
                dealphebetized_network = sorted(parent.network.items(),
                                                key=lambda k: random.random())
                for neighbor in dealphebetized_network:
                    if neighbor[1] not in users and neighbor[
                            1] is not None and len(users) < n:
                        users.append(neighbor[1])
                        if neighbor[1].network:
                            agenda.push(neighbor[1])
    else:
        files = [
            x for x in os.listdir(directory)
            if os.path.isfile(os.path.join(directory, x))
        ]
        shuffled_files = sorted(files, key=lambda k: random.random())
        user_names = shuffled_files[:n]
        users = [bc.read_csv(u[:-4], directory) for u in user_names]
    if len(users) < n:
        raise ValueError(
            "Specified more users than records that exist, only {} records available"
            .format(len(users)))

    bc.to_csv([bc.utils.all(u) for u in users], filename)
    def test_read_csv_antenna_id_no_places(self):
        user = bc.read_csv("u_test_antennas", "samples", describe=False)
        self.assertEqual(user.records[1],
                         Record(interaction='call',
                                direction='in',
                                correspondent_id='770000001',
                                datetime=dt(2013, 12, 16, 5, 39, 30),
                                call_duration=0,
                                position=Position('13084', None)))

        result = {'allweek': {'allday': None}}
        radius = bc.spatial.radius_of_gyration(user, groupby=None)
        self.assertEqual(radius, result)
コード例 #15
0
    def test_read_csv_antenna_id_no_places(self):
        user = bc.read_csv("u_test_antennas", "samples", describe=False)
        self.assertEqual(
            user.records[1],
            Record(interaction='call',
                   direction='in',
                   correspondent_id='770000001',
                   datetime=datetime.datetime(2013, 12, 16, 5, 39, 30),
                   call_duration=0,
                   position=Position('13084', None)))

        self.assertEqual(
            bc.spatial.radius_of_gyration(user).values(), [None] * 2)
    def test_read_csv_antenna_id(self):
        user = bc.read_csv("u_test_antennas", "samples",
                           antennas_path="samples/towers.csv", describe=False)

        self.assertEqual(user.records[1],
                         Record(interaction='call',
                                direction='in',
                                correspondent_id='770000001',
                                datetime=dt(2013, 12, 16, 5, 39, 30),
                                call_duration=0,
                                position=Position('13084', None)))

        radius = bc.spatial.radius_of_gyration(user, groupby=None)
        self.assertGreater(radius['allweek']['allday'], 0)
コード例 #17
0
def metrics():
	records_path = 'records/'
	antenna_file = 'antennas.csv'

	indicators = []
	for f in glob.glob(records_path + '*.csv'):
	    user_id = os.path.basename(f)[:-4]

	    try:
		B = bc.read_csv(user_id, records_path, antenna_file, describe=False)
		metrics_dict = bc.utils.all(B)
	    except Exception as e:
		metrics_dict = {'name': user_id, 'error': True}

	    indicators.append(metrics_dict)

	bc.io.to_csv(indicators, 'bandicoot_indicators_full.csv')
コード例 #18
0
ファイル: network.py プロジェクト: jcgdata/bandicoot
def network_sampling(n, filename, directory=None, snowball=False, user=None):
    """
    Selects a few users and exports a CSV of indicators for them.

    TODO: Returns the network/graph between the selected users.

    Parameters
    ----------
    n : int
        Number of users to select.
    filename : string
        File to export to.
    directory: string
        Directory to select users from if using the default random selection.

    Selection options
    -----------------
    random (default): selects n users at random

    snowball: starts from a specified user, iterates over neighbors, and does a BFS until n neighbors are reached
    """
    if snowball:
        if user is None:
            raise ValueError("Must specify a starting user from whom to initiate the snowball")
        else:
            users, agenda = [user], [user]
            while len(agenda) > 0:
                parent = agenda.pop()
                dealphebetized_network = sorted(parent.network.items(), key=lambda k: random.random())
                for neighbor in dealphebetized_network:
                    if neighbor[1] not in users and neighbor[1] is not None and len(users) < n:
                        users.append(neighbor[1])
                        if neighbor[1].network:
                            agenda.push(neighbor[1])
    else:
        files = [x for x in os.listdir(directory) if os.path.isfile(os.path.join(directory, x))]
        shuffled_files = sorted(files, key=lambda k: random.random())
        user_names = shuffled_files[:n]
        users = [bc.read_csv(u[:-4], directory) for u in user_names]
    if len(users) < n:
        raise ValueError("Specified more users than records that exist, only {} records available".format(len(users)))

    bc.to_csv([bc.utils.all(u) for u in users], filename)
コード例 #19
0
def load_and_compute(user_id, attributes):
    """
    Bandicoot helper function with inputs based on the predefined attributes
    in the attributes class.

    Inputs
    ------
    user_id : ID of user on which bandicoot features are being calculated.
    attributes : Attributes class with specific options for current run.

    Output
    ------
    Dictionary of calculated bandicoot indicators.
    """
    try:
        # create user object &
        # ignore massive warnings output for better speed
        B = bc.read_csv(user_id=user_id,
                        records_path=attributes.bandicoot_path,
                        antennas_path=attributes.bandicoot_path+'antennas.csv',
                        describe=False,
                        warnings=False)
        # change weekend days and nocturnal time
        B.weekend = attributes.weekend_days
        B.night_start = datetime.time(attributes.noct_time['begin'])
        B.night_end = datetime.time(attributes.noct_time['end'])
        # group by month (aka insert user data in chunks of months)
        # & calculate for weekend and workweek separately
        # & calculate for day and night separately:
        metrics_dic = bc.utils.all(B,
                                   groupby=None,
                                   split_week=True,
                                   split_day=True,
                                   summary='extended')
    except Exception as e:
        metrics_dic = {'name': user_id, 'error': True}
    return metrics_dic
コード例 #20
0
 def test_read_csv(self):
     user = bc.read_csv("u_test2", "samples", describe=False)
     self.assertEqual(len(user.records), 500)
コード例 #21
0
ファイル: test_group.py プロジェクト: aujacquet/bandicoot
 def setUp(self):
     self.user = bc.read_csv('user_ignored', 'samples')
コード例 #22
0
ファイル: bc.py プロジェクト: shahsahil/panic-detector
import bandicoot as bc
import pprint
u = bc.read_csv("sanjay", "C:\\Users\\Sahil\\Desktop")

rec = u.records
#other features that can be included - call duration... the people who are being called -- the number of times they have been called in total
t = []
c = 0
#print(rec[1].datetime.timestamp())
for i in rec:
    #format i.datetime
    t.append(i.datetime.timestamp())
    c += 1


def grouper(iterable):
    prev = None
    group = []
    for item in iterable:
        if not prev or item - prev <= 1000:
            group.append(item)
        else:
            yield group
            group = [item]
        prev = item
    if group:
        yield group


di = dict(enumerate(grouper(t), 1))
コード例 #23
0
ファイル: full_pipeline.py プロジェクト: aujacquet/bandicoot
import sys
sys.path.append("../")

import bandicoot as bc
import glob
import os

records_path = 'users_bandicoot/'
antenna_file = 'towers.csv'

indicators = []
for f in glob.glob(records_path + '*.csv'):
    user_id = os.path.basename(f)[:-4]

    try:
        B = bc.read_csv(user_id, records_path, antenna_file, describe=False)
        metrics_dict = bc.utils.all(B)
    except Exception as e:
        metrics_dic = {'name': user_id, 'error': True}

    indicators.append(metrics_dict)

bc.io.to_csv(indicators, 'bandicoot_indicators_full.csv')
コード例 #24
0
ファイル: test_group.py プロジェクト: ThomasRoca/bandicoot
 def setUp(self):
     self.user = bc.read_csv('user_ignored', 'samples')
コード例 #25
0
from bandicoot.helper.group import grouping
import bandicoot as bc


# Loading a User
U = bc.read_csv('ego', 'data/', 'data/antennas.csv')


#######################
# Export visulization #
#######################

bc.visualization.export(U, 'my-viz-path')


#########################################
# Run individual and spatial indicators #
#########################################

bc.individual.percent_initiated_conversations(U)
bc.spatial.number_of_antennas(U)
bc.spatial.radius_of_gyration(U)


######################################
# Group indicators by weeks or month #
######################################

# The groupby keyword controls the aggregation:
# - groupby='week' to divide by week (by default),
# - groupby='month' to divide by month,
コード例 #26
0
 def test_read_csv_with_recharges(self):
     user = bc.read_csv("A",
                        "samples/manual",
                        describe=False,
                        recharges_path="samples/manual/recharges")
     self.assertEqual(len(user.recharges), 5)
コード例 #27
0
ファイル: test_parsers.py プロジェクト: jcgdata/bandicoot
 def test_read_csv_with_recharges(self):
     user = bc.read_csv("A", "samples/manual", describe=False,
                        recharges_path="samples/manual/recharges")
     self.assertEqual(len(user.recharges), 5)
コード例 #28
0
ファイル: test_parsers.py プロジェクト: aujacquet/bandicoot
 def test_read_csv(self):
     user = bc.read_csv("u_test2", "samples", describe=False)
     self.assertEqual(len(user.records), 500)