def process_measure(measure_id, data, session): cell_measures = [] wifi_measures = [] measure_data = dict( measure_id=measure_id, lat=to_precise_int(data['lat']), lon=to_precise_int(data['lon']), time=encode_datetime(data['time']), accuracy=data['accuracy'], altitude=data['altitude'], altitude_accuracy=data['altitude_accuracy'], ) measure_radio = RADIO_TYPE.get(data['radio'], -1) if data.get('cell'): # flatten measure / cell data into a single dict for c in data['cell']: c.update(measure_data) # use more specific cell type or # fall back to less precise measure if c['radio'] != '': c['radio'] = RADIO_TYPE.get(c['radio'], -1) else: c['radio'] = measure_radio cell_measures = data['cell'] if data.get('wifi'): # filter out old-style sha1 hashes invalid_wifi_key = False for w in data['wifi']: w['key'] = key = normalize_wifi_key(w['key']) if not valid_wifi_pattern(key): invalid_wifi_key = True break if not invalid_wifi_key: # flatten measure / wifi data into a single dict for w in data['wifi']: w.update(measure_data) wifi_measures = data['wifi'] return (cell_measures, wifi_measures)
def load_file(session, source_file, batch_size=100, userid=None): utcnow = util.utcnow() utcmin = utcnow - datetime.timedelta(120) with open(source_file, 'r') as fd: reader = csv.reader(fd, delimiter='\t', quotechar=None) counter = 0 items = [] for fields in reader: try: time = int(fields[0]) if time == 0: # pragma: no cover # unknown time gets an old date time = utcmin else: # convert from unixtime to utc time = datetime.datetime.utcfromtimestamp(time) key = normalized_wifi_key(str(fields[1])) if not valid_wifi_pattern(key): # pragma: no cover continue lat = float(fields[2]) lon = float(fields[3]) accuracy = int(fields[4]) altitude = int(fields[5]) altitude_accuracy = int(fields[6]) channel = int(fields[7]) signal = int(fields[8]) wifi = dict( key=key, channel=channel, signal=signal, ) data = dict( lat=lat, lon=lon, time=time, accuracy=accuracy, altitude=altitude, altitude_accuracy=altitude_accuracy, radio='', cell=(), wifi=[wifi], # not sure if the importer has an actual file # specification anywhere heading=-255, speed=-255, ) except (ValueError, IndexError): continue items.append(data) counter += 1 # flush every batch_size records if counter % batch_size == 0: process_measures(items, session, userid=userid) items = [] session.flush() print('Added %s records.' % counter) # process the remaining items process_measures(items, session, userid=userid) print('Added %s records.' % counter) session.flush() return counter
def load_file(session, source_file, batch_size=100, userid=None): utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC) utcmin = utcnow - datetime.timedelta(120) with open(source_file, 'r') as fd: reader = csv.reader(fd, delimiter='\t', quotechar=None) counter = 0 items = [] for fields in reader: try: time = int(fields[0]) if time == 0: # pragma: no cover # unknown time gets an old date time = utcmin else: # convert from unixtime to utc time = datetime.datetime.utcfromtimestamp(time) key = normalize_wifi_key(str(fields[1])) if not valid_wifi_pattern(key): # pragma: no cover continue lat = fields[2] lon = fields[3] accuracy = int(fields[4]) altitude = int(fields[5]) altitude_accuracy = int(fields[6]) channel = int(fields[7]) signal = int(fields[8]) wifi = dict( key=key, channel=channel, signal=signal, ) data = dict( lat=lat, lon=lon, time=time, accuracy=accuracy, altitude=altitude, altitude_accuracy=altitude_accuracy, radio='', cell=(), wifi=[wifi], ) except (ValueError, IndexError): continue items.append(data) counter += 1 # flush every batch_size records if counter % batch_size == 0: process_measures(items, session, userid=userid) items = [] session.flush() print('Added %s records.' % counter) # process the remaining items process_measures(items, session, userid=userid) print('Added %s records.' % counter) session.flush() return counter