Beispiel #1
0
def add_basic_data(entry):
    """
    Add and format some basic data on a dict
    """
    entry[ORIGIN_TYPE] = int(entry[ORIGIN_TYPE])
    entry[DESTINATION_TYPE] = int(entry[DESTINATION_TYPE])
    entry[PEER_LATENCY] = float(entry[PEER_LATENCY])
    entry[ORIGIN_GEOIP] = get_geoip_data(entry[ORIGIN_IP])
    entry[DESTINATION_GEOIP] =  get_geoip_data(entry[DESTINATION_IP])
    try:
        entry[DISTANCE] = geoip_distance(entry[ORIGIN_GEOIP],entry[DESTINATION_GEOIP])
    except:
        entry[DISTANCE] = None
Beispiel #2
0
def add_basic_data(entry):
    """
    Add and format some basic data on a dict
    """
    entry[ORIGIN_TYPE] = int(entry[ORIGIN_TYPE])
    entry[DESTINATION_TYPE] = int(entry[DESTINATION_TYPE])
    entry[PEER_LATENCY] = float(entry[PEER_LATENCY])
    entry[ORIGIN_GEOIP] = get_geoip_data(entry[ORIGIN_IP])
    entry[DESTINATION_GEOIP] = get_geoip_data(entry[DESTINATION_IP])
    try:
        entry[DISTANCE] = geoip_distance(entry[ORIGIN_GEOIP],
                                         entry[DESTINATION_GEOIP])
    except:
        entry[DISTANCE] = None
Beispiel #3
0
 def __init__(self, ip, geoip=None, altitude=default_altitude):
     self.ip = ip
     if geoip is None:
         geoip = get_geoip_data(ip)
     if geoip is None:
         print "None geoip " + ip
         self.latitude = 0
         self.longitude = 0
         self.altitude = 0
         self.record = [altitude]
         self.distance_to = lambda x: 0            
         return
     self.geoip = geoip
     self.latitude = geoip['latitude']
     self.longitude = geoip['longitude']
     self.altitude = altitude
     self.record = [altitude]
Beispiel #4
0
def create_ic2012_pkl(set_id, skip_if_exists=False):
    file_name = '../data/ubi-data2/ic2012/%03i.csv' % set_id
    pkl_name = '../data/ubi-data2/ic2012/%03i.pkl' % set_id
    if skip_if_exists:
        if os.path.exists(pkl_name):
            return pkl_name

    if not os.path.exists(file_name):
        print 'skipping %s' % os.path.basename(file_name)
        return

    tmp_rows = []
    tmp_seen = set()
    tmp_ndup = 0
    tmp_ninvalid = 0
    tmp_nrows = 0
    for l in file(file_name).readlines():
        print '\r%i' % tmp_nrows,
        sys.stdout.flush()

        fields = l.rstrip().split(',')
        t_stamp, ip1, ip2, ping12 = fields
        t_stamp = int(t_stamp)
        ping12 = float(ping12)

        geo1 = get_geoip_data(ip1)
        geo2 = get_geoip_data(ip2)
        try:
            distance = geoip_distance(geo1, geo2)
        except:
            distance = 8927011  # average distance
        if not geo1:
            geo1 = defaultdict(unicode)
        if not geo2:
            geo2 = defaultdict(unicode)
        try:
            int_ip1, int_ip2 = map(get_int_from_ip, (ip1, ip2))
        except:
            tmp_ninvalid += 1
            continue
        type1, type2 = 3, 3  # different from mobile data

        _id = (t_stamp, int_ip1, int_ip2, ping12)
        if _id not in tmp_seen:
            tmp_seen.add(_id)
            try:
                row = [
                    geo1['country_code'], geo1['region_name'], geo1['city'],
                    type1, geo2['country_code'], geo2['region_name'],
                    geo2['city'], type2, distance, ping12, 0, 0, 0, int_ip1,
                    int_ip2, t_stamp
                ]
            except:
                import pdb
                pdb.set_trace()

            tmp_rows.append(row)
            tmp_nrows += 1
        else:
            tmp_ndup += 1

    print ' in %s, n_duplicates: %d, n_invalid: %d, total: %d' % (
        os.path.basename(file_name), tmp_ndup, tmp_ninvalid,
        tmp_ndup + tmp_ninvalid + tmp_nrows)
    print 'pickling to %s' % os.path.basename(pkl_name)
    pickle.dump(tmp_rows, open(pkl_name, 'wb'), -1)
    print '  done'
    return pkl_name
Beispiel #5
0
def create_ic2012_pkl(set_id, skip_if_exists=False):
    file_name = '../data/ubi-data2/ic2012/%03i.csv' % set_id
    pkl_name = '../data/ubi-data2/ic2012/%03i.pkl' % set_id
    if skip_if_exists:
        if os.path.exists(pkl_name):
            return pkl_name

    if not os.path.exists(file_name):
        print 'skipping %s' % os.path.basename(file_name)
        return

    tmp_rows = []
    tmp_seen = set()
    tmp_ndup = 0
    tmp_ninvalid = 0
    tmp_nrows = 0
    for l in file(file_name).readlines():
        print '\r%i' % tmp_nrows,
        sys.stdout.flush()

        fields = l.rstrip().split(',')
        t_stamp, ip1, ip2, ping12 = fields
        t_stamp = int(t_stamp)
        ping12 = float(ping12)

        geo1 = get_geoip_data(ip1)
        geo2 = get_geoip_data(ip2)
        try:
            distance = geoip_distance(geo1, geo2)
        except:
            distance = 8927011  # average distance
        if not geo1:
            geo1 = defaultdict(unicode)
        if not geo2:
            geo2 = defaultdict(unicode)
        try:
            int_ip1, int_ip2 = map(get_int_from_ip, (ip1, ip2))
        except:
            tmp_ninvalid += 1
            continue
        type1, type2 = 3, 3  # different from mobile data

        _id = (t_stamp, int_ip1, int_ip2, ping12)
        if _id not in tmp_seen:
            tmp_seen.add(_id)
            try:
                row = [geo1['country_code'], geo1['region_name'],
                   geo1['city'], type1,
                   geo2['country_code'], geo2['region_name'],
                   geo2['city'], type2,
                   distance, ping12, 0, 0, 0, int_ip1, int_ip2, t_stamp]
            except:
                import pdb; pdb.set_trace()

            tmp_rows.append(row)
            tmp_nrows += 1
        else:
            tmp_ndup += 1

    print ' in %s, n_duplicates: %d, n_invalid: %d, total: %d' % (
        os.path.basename(file_name),
        tmp_ndup,
        tmp_ninvalid,
        tmp_ndup + tmp_ninvalid + tmp_nrows)
    print 'pickling to %s' % os.path.basename(pkl_name)
    pickle.dump(tmp_rows, open(pkl_name, 'wb'), -1)
    print '  done'
    return pkl_name