def add_file(self, filename): for line_num, line in enumerate(file(filename)): if len(line) < 10 or line[0] == '#': continue line = line.rstrip() #if line_num > 100: # break #if line_num % 5000 == 0: # print '%s: line %d' % (filename,line_num) try: timestamp = float(line.split(',')[-1]) except: sys.stderr.write('skipping line: %s\n' % (line, ) ) continue self.up.add_time(timestamp) if False: match = uscg_ais_nmea_regex.search(line).groupdict() if match['senNum'] != '1': #print 'skipping senNum',match['senNum'],line.strip() continue # later sentences in an un-normalized stream bits = binary.ais6tobitvec(match['body']) try: self.pos_stats.add_message(match, bits) except AisErrorPositionTooFar, e: print 'ERROR: too far', str(e) print ' Line:', line.rstrip() continue except AisErrorBadNumBits, e: print 'ERROR: bad bit count', str(e) print ' Line:', line.rstrip() continue
def nmea_summary(filename): msgs = dict([(val,0) for val in ais.binary.encode]) station_counts = {} channel_counts = {'A':0, 'B':0} for line in file(filename): match = uscg_ais_nmea_regex.search(line) if match is None: continue match = match.groupdict() msg = match if msg['senNum']=='1': first_char = msg['body'][0] msgs[first_char] += 1 if match['chan'] is not None: channel_counts[match['chan']] += 1 if match['station'] is not None: station = match['station'] if station in station_counts: station_counts[station] += 1 else: station_counts[station] = 1 return {'msgs':msgs, 'stations':station_counts, 'channels':channel_counts}
def nmea_summary(filename): msgs = dict([(val, 0) for val in binary.encode]) station_counts = {} channel_counts = {'A': 0, 'B': 0} for line in file(filename): match = uscg_ais_nmea_regex.search(line) if match is None: continue match = match.groupdict() msg = match if msg['senNum'] == '1': first_char = msg['body'][0] msgs[first_char] += 1 if match['chan'] is not None: channel_counts[match['chan']] += 1 if match['station'] is not None: station = match['station'] if station in station_counts: station_counts[station] += 1 else: station_counts[station] = 1 return { 'msgs': msgs, 'stations': station_counts, 'channels': channel_counts }
def parse_msgs(infile, verbose=False): for line in infile: try: match = uscg_ais_nmea_regex.search(line).groupdict() except AttributeError: continue #if not match: continue if match['body'][0]!='8': continue #print line, bv = binary.ais6tobitvec(match['body']) r = {} r['MessageID']=int(bv[0:6]) r['RepeatIndicator']=int(bv[6:8]) r['UserID']=int(bv[8:38]) r['Spare']=int(bv[38:50]) #r['BinaryData']=bv[40:] r['dac']=int(bv[40:50]) r['fid']=int(bv[50:56]) #if 34==r['fid']: if verbose: print r['dac'], r['fid'], r['UserID'], line.rstrip() else: print r['dac'], r['fid'], r['UserID'], match['station']
def add_file(self, filename): for line_num, line in enumerate(file(filename)): if len(line) < 10 or line[0] == '#': continue line = line.rstrip() try: timestamp = float(line.split(',')[-1]) except: sys.stderr.write('skipping line: %s\n' % (line, )) continue self.up.add_time(timestamp) if False: match = uscg_ais_nmea_regex.search(line).groupdict() if match['senNum'] != '1': #print 'skipping senNum',match['senNum'],line.strip() continue # later sentences in an un-normalized stream bits = binary.ais6tobitvec(match['body']) try: self.pos_stats.add_message(match, bits) except AisErrorPositionTooFar, e: print 'ERROR: too far', str(e) print ' Line:', line.rstrip() continue except AisErrorBadNumBits, e: print 'ERROR: bad bit count', str(e) print ' Line:', line.rstrip() continue
def parse_msgs(infile, verbose=False): for line in infile: line = line.strip() try: match = uscg_ais_nmea_regex.search(line).groupdict() except AttributeError: continue msg_type = match['body'][0] if msg_type not in ('6', '8'): continue print 'cp 1' if msg_type == '6' and len(match['body']) < 15: continue if msg_type == '8' and len(match['body']) < 10: continue try: bv = binary.ais6tobitvec(match['body'][:15]) except ValueError: sys.stderr.write('bad msg: %s\n' % line.strip()) continue r = {} r['MessageID']=int(bv[0:6]) r['UserID']=int(bv[8:38]) if '6' == msg_type: dac = int(bv[72:82]) fi = int(bv[82:88]) elif '8' == msg_type: dac = int(bv[40:50]) fi = int(bv[50:56]) elif verbose: print 'not a bbm:', line if verbose: print msg_type, dac, fi, r['UserID'], line.rstrip() else: print msg_type, dac, fi, r['UserID'], match['station']
def parse_msgs(infile, verbose=False): for line in infile: line = line.strip() try: match = uscg_ais_nmea_regex.search(line).groupdict() except AttributeError: continue msg_type = match['body'][0] if msg_type not in ('6', '8'): continue if msg_type == '6' and len(match['body']) < 15: continue if msg_type == '8' and len(match['body']) < 10: continue try: bv = binary.ais6tobitvec(match['body'][:15]) except ValueError: sys.stderr.write('bad msg: %s\n' % line.strip()) continue r = {} r['MessageID'] = int(bv[0:6]) r['UserID'] = int(bv[8:38]) if '6' == msg_type: dac = int(bv[72:82]) fi = int(bv[82:88]) elif '8' == msg_type: dac = int(bv[40:50]) fi = int(bv[50:56]) elif verbose: print 'not a bbm:', line if verbose: print msg_type, dac, fi, r['UserID'], line.rstrip() else: print msg_type, dac, fi, r['UserID'], match['station']
def build_dist_database(database_filename, log_files, verbose=False): cx = sqlite3.connect(database_filename) print 'WARNING: not saving the station name' cx.execute(''' CREATE TABLE IF NOT EXISTS distance ( -- Save space, no key -- ts INTEGER, -- Save more space julian_day INTEGER, -- x REAL, -- y REAL, dist_km REAL --, --station VARCHAR(15) ); ''') cu = cx.cursor() counts = {'nogps': 0} for filename in log_files: if verbose: print 'file:', filename sys.stdout.flush() for line_num, line in enumerate(file(filename)): if 'AIVDM,1,1' not in line: continue match = uscg_ais_nmea_regex.search(line).groupdict() message_id = match['body'][0] # First letter is the message type if message_id not in ('1', '2', '3'): continue if len(match['body']) != 28: # 6 bits per character raise AisErrorBadNumBits('expected 168, got %d' % len(match['body']) / 6) bits = binary.ais6tobitvec( match['body'][:20] ) # Don't need any of the other bits, so do not waste time x = binary.signedIntFromBV(bits[61:89]) / 600000. y = binary.signedIntFromBV(bits[89:116]) / 600000. if x > 180 or y > 90: counts['nogps'] += 1 continue station = match['station'] julian_day = int( datetime.datetime.utcfromtimestamp(int( match['timeStamp'])).strftime('%j')) d_km = dist_utm_km((x, y), station_locations[station]) #cu.execute('INSERT INTO distance VALUES (:julian_day, :x, :y, :dist_km, :station)', #{'julian_day': julian_day, 'x':x, 'y':y, 'dist_km': d_km, 'station':station} ) #cu.execute('INSERT INTO distance VALUES (:julian_day, :x, :y, :dist_km)', # {'julian_day': julian_day, 'x':x, 'y':y, 'dist_km': d_km, } ) cu.execute('INSERT INTO distance VALUES (:julian_day, :dist_km)', { 'julian_day': julian_day, 'dist_km': d_km, }) if line_num % 10000 == 9999: cx.commit() cx.commit() if False: print 'Creating indexes' try: cx.execute('CREATE INDEX idx_dist_day ON distance(julian_day);') cx.execute('CREATE INDEX idx_dist_dist ON distance(dist_km);') #cx.execute('CREATE INDEX idx_dist_station ON distance(station);') cx.commit() except sqlite3.OperationalError: print 'Appears indexes were already created' return cx, counts
@requires: U{epydoc<http://epydoc.sourceforge.net/>} > 3.0 @since: 2010-Mar-26 @var __date__: Date of last svn commit @undocumented: __version__ __author__ __doc__ parser @status: In progress ''' import sys from aisutils.uscg import uscg_ais_nmea_regex use_line_num = True # else use timestamp msg_lut = {} for line_num, line in enumerate(file(sys.argv[1])): line = line.strip() try: match = uscg_ais_nmea_regex.search(line).groupdict() except: print 'ignoring line:',line.strip() if use_line_num: msg_lut[match['body']] = line_num + 1 else: msg_lut[match['body']] = msg['timeStamp'] print 'msgs in lut:', len(msg_lut),'from',line_num+1,'lines' #o = file(sys.argv[2]+'.inboth','w') if use_line_num: o = file(sys.argv[2]+'.linenum','w') else: o = file(sys.argv[2]+'.time','w')
o = open(options.outputFilename,'w') verbose = options.verbose #print args for filename in args: print filename linenum=1 for line in file(filename): if linenum%1000==0: print 'line',linenum linenum += 1 #try: match_obj = uscg_ais_nmea_regex.search(line) if match_obj is None: sys.stderr.write(line) continue station = match_obj.group('station') #except: # sys.stderr.write('bad line: %s\n' %line) # continue fields = line.split(',')[:6] if '1'!=fields[2]: # Must be the start of a sequence #if verbose: # print 'skipping based on field 2',line continue
def load_data(cx, datafile=sys.stdin, verbose=False, uscg=True): """Try to read data from an open file object. Not yet well tested. @param cx: database connection @param verbose: pring out more if true @param uscg: Process uscg tail information to get timestamp and receive station @rtype: None @return: Nothing @note: can not handle multiline AIS messages. They must be normalized first. """ v = verbose # Hmm... "v"... the irony cu = cx.cursor() lineNum = 0 next_key = 0 max_key = get_max_key(cx) if max_key is not None: next_key = max_key + 1 print 'keys_starting_at:',next_key message_set = (1,2,3,4,5,18,19) counts = {} for msg_num in message_set: counts[msg_num] = 0 counts['checksum_failed'] = 0 track_dups = TrackDuplicates(lookback_length=1000) for line in datafile: lineNum += 1 if lineNum%1000==0: print lineNum cx.commit() if len(line)<15 or line[3:6] not in ('VDM|VDO'): continue # Not an AIS VHF message #print 'FIX: validate checksum' if not nmea.checksum.isChecksumValid(line): print >> sys.stderr, 'WARNING: invalid checksum:\n\t',line, print >> sys.stderr, ' ',nmea.checksum.checksumStr(line) counts['checksum_failed'] += 1 fields=line.split(',') # FIX: use this split throughout below... try: msg_num = int(binary.ais6tobitvec(fields[5][0])) except: print 'line would not decode',line continue if verbose: print 'msg_num:',msg_num if msg_num not in message_set: if verbose: print 'skipping',line print ' not in msg set:',str(message_set) continue try: bv = binary.ais6tobitvec(fields[5]) except: print >> sys.stderr, 'ERROR: Unable to decode bits in line:\n\t',line traceback.print_exc(file=sys.stderr) continue # FIX: need to take padding into account ... right before the * if msg_num in (1,2,3,4,18): if len(bv) != 168: print 'ERROR: skipping bad one slot message, line:',lineNum print ' ',line, print ' Got length',len(bv), 'expected', 168 continue elif msg_num == 5: # 426 has 2 pad bits if len(bv) not in (424,426): print 'ERROR: skipping bad shipdata message, line:',lineNum print ' ',line, print ' Got length',len(bv), 'expected', 424 continue ins = None try: if msg_num== 1: ins = ais.ais_msg_1_handcoded.sqlInsert(ais.ais_msg_1_handcoded.decode(bv),dbType='sqlite') elif msg_num== 2: ins = ais.ais_msg_2_handcoded.sqlInsert(ais.ais_msg_2_handcoded.decode(bv),dbType='sqlite') elif msg_num== 3: ins = ais.ais_msg_3_handcoded.sqlInsert(ais.ais_msg_3_handcoded.decode(bv),dbType='sqlite') elif msg_num== 4: ins = ais.ais_msg_4_handcoded.sqlInsert(ais.ais_msg_4_handcoded.decode(bv),dbType='sqlite') elif msg_num== 5: ins = ais.ais_msg_5.sqlInsert(ais.ais_msg_5.decode(bv),dbType='sqlite') elif msg_num==18: ins = ais.ais_msg_18.sqlInsert(ais.ais_msg_18.decode(bv),dbType='sqlite') # Class B position elif msg_num==19: ins = ais.ais_msg_19.sqlInsert(ais.ais_msg_19.decode(bv),dbType='sqlite') # Class B position else: print 'Warning... not handling type',msg_num,'line:',lineNum continue except: print 'ERROR: some decode error?','line:',lineNum print ' ',line continue counts[msg_num] += 1 if uscg: from aisutils.uscg import uscg_ais_nmea_regex match = uscg_ais_nmea_regex.search(line).groupdict() try: cg_sec = int(float(match['timeStamp'])) ins.add('cg_sec', cg_sec) #ins.add('cg_sec', int(float(match['timeStamp'])) ) ins.add('cg_timestamp', str(datetime.datetime.utcfromtimestamp(float(match['timeStamp']))) ) ins.add('cg_r', match['station'] ) except: print >> sys.stderr, match print >> sys.stderr, 'bad uscg sections',line, continue # Optional fields that are not always there if match['time_of_arrival'] is not None: try: ins.add('cg_t_arrival', float(match['time_of_arrival'])) except: print >> sys.stderr, 'WARNING: corrupted time of arrival (T) in line. T ignored\n\t',line pass # Not critical if corrupted if match['slot'] is not None: ins.add('cg_s_slotnum', int(match['slot']) ) if msg_num in (1,2,3,4): pkt_id,dup_flag = track_dups.check_packet(cg_sec,fields[5]) # Pass in the NMEA payload string of data if v: print 'dup_check:',pkt_id,dup_flag,fields[5] ins.add('pkt_id',pkt_id) ins.add('dup_flag',dup_flag) ins.add('key',next_key) next_key += 1 if verbose: print str(ins) try: cu.execute(str(ins)) except pysqlite2.dbapi2.OperationalError, params: #except OperationalError, params: if -1 != params.message.find('no such table'): print 'ERROR:',params.message sys.exit('You probably need to run with --with-create') print 'params',params print type(params) print 'ERROR: sql error?','line:',lineNum print ' ', str(ins) print ' ',line if False: # Give some debugging flexibility from IPython.Shell import IPShellEmbed ipshell = IPShellEmbed(argv=[]) ipshell() sys.exit('Gave up')
def build_dist_database(database_filename, log_files, verbose=False): cx = sqlite3.connect(database_filename) print 'WARNING: not saving the station name' cx.execute(''' CREATE TABLE IF NOT EXISTS distance ( -- Save space, no key -- ts INTEGER, -- Save more space julian_day INTEGER, -- x REAL, -- y REAL, dist_km REAL --, --station VARCHAR(15) ); ''') cu = cx.cursor() counts = {'nogps': 0} for filename in log_files: if verbose: print 'file:',filename sys.stdout.flush() for line_num, line in enumerate(file(filename)): if 'AIVDM,1,1' not in line: continue match = uscg_ais_nmea_regex.search(line).groupdict() message_id = match['body'][0] # First letter is the message type if message_id not in ('1','2','3'): continue if len(match['body']) != 28: # 6 bits per character raise AisErrorBadNumBits('expected 168, got %d' % len(match['body']) / 6) bits = binary.ais6tobitvec(match['body'][:20]) # Don't need any of the other bits, so do not waste time x = binary.signedIntFromBV(bits[61:89]) / 600000. y = binary.signedIntFromBV(bits[89:116]) / 600000. if x > 180 or y > 90: counts['nogps'] += 1 continue station = match['station'] julian_day = int(datetime.datetime.utcfromtimestamp(int(match['timeStamp'])).strftime('%j')) d_km = dist_utm_km( (x,y), station_locations[station] ) #cu.execute('INSERT INTO distance VALUES (:julian_day, :x, :y, :dist_km, :station)', #{'julian_day': julian_day, 'x':x, 'y':y, 'dist_km': d_km, 'station':station} ) #cu.execute('INSERT INTO distance VALUES (:julian_day, :x, :y, :dist_km)', # {'julian_day': julian_day, 'x':x, 'y':y, 'dist_km': d_km, } ) cu.execute('INSERT INTO distance VALUES (:julian_day, :dist_km)', {'julian_day': julian_day, 'dist_km': d_km, } ) if line_num % 10000 == 9999: cx.commit() cx.commit() if False: print 'Creating indexes' try: cx.execute('CREATE INDEX idx_dist_day ON distance(julian_day);') cx.execute('CREATE INDEX idx_dist_dist ON distance(dist_km);') #cx.execute('CREATE INDEX idx_dist_station ON distance(station);') cx.commit() except sqlite3.OperationalError: print 'Appears indexes were already created' return cx, counts
o = open(options.outputFilename, 'w') verbose = options.verbose #print args for filename in args: print filename linenum = 1 for line in file(filename): if linenum % 1000 == 0: print 'line', linenum linenum += 1 #try: match_obj = uscg_ais_nmea_regex.search(line) if match_obj is None: sys.stderr.write(line) continue station = match_obj.group('station') #except: # sys.stderr.write('bad line: %s\n' %line) # continue fields = line.split(',')[:6] if '1' != fields[2]: # Must be the start of a sequence #if verbose: # print 'skipping based on field 2',line continue if len(fields[5]) < 39:
def load_data(cx, datafile=sys.stdin, verbose=False, uscg=True): """Try to read data from an open file object. Not yet well tested. @param cx: database connection @param verbose: pring out more if true @param uscg: Process uscg tail information to get timestamp and receive station @rtype: None @return: Nothing @note: can not handle multiline AIS messages. They must be normalized first. """ v = verbose # Hmm... "v"... the irony cu = cx.cursor() lineNum = 0 next_key = 0 max_key = get_max_key(cx) if max_key is not None: next_key = max_key + 1 print 'keys_starting_at:', next_key message_set = (1, 2, 3, 4, 5, 18, 19) counts = {} for msg_num in message_set: counts[msg_num] = 0 counts['checksum_failed'] = 0 track_dups = TrackDuplicates(lookback_length=1000) for line in datafile: lineNum += 1 if lineNum % 1000 == 0: print lineNum cx.commit() if len(line) < 15 or line[3:6] not in ('VDM|VDO'): continue # Not an AIS VHF message #print 'FIX: validate checksum' if not nmea.checksum.isChecksumValid(line): print >> sys.stderr, 'WARNING: invalid checksum:\n\t', line, print >> sys.stderr, ' ', nmea.checksum.checksumStr(line) counts['checksum_failed'] += 1 fields = line.split(',') # FIX: use this split throughout below... try: msg_num = int(binary.ais6tobitvec(fields[5][0])) except: print 'line would not decode', line continue if verbose: print 'msg_num:', msg_num if msg_num not in message_set: if verbose: print 'skipping', line print ' not in msg set:', str(message_set) continue try: bv = binary.ais6tobitvec(fields[5]) except: print >> sys.stderr, 'ERROR: Unable to decode bits in line:\n\t', line traceback.print_exc(file=sys.stderr) continue # FIX: need to take padding into account ... right before the * if msg_num in (1, 2, 3, 4, 18): if len(bv) != 168: print 'ERROR: skipping bad one slot message, line:', lineNum print ' ', line, print ' Got length', len(bv), 'expected', 168 continue elif msg_num == 5: # 426 has 2 pad bits if len(bv) not in (424, 426): print 'ERROR: skipping bad shipdata message, line:', lineNum print ' ', line, print ' Got length', len(bv), 'expected', 424 continue ins = None try: if msg_num == 1: ins = ais.ais_msg_1_handcoded.sqlInsert( ais.ais_msg_1_handcoded.decode(bv), dbType='sqlite') elif msg_num == 2: ins = ais.ais_msg_2_handcoded.sqlInsert( ais.ais_msg_2_handcoded.decode(bv), dbType='sqlite') elif msg_num == 3: ins = ais.ais_msg_3_handcoded.sqlInsert( ais.ais_msg_3_handcoded.decode(bv), dbType='sqlite') elif msg_num == 4: ins = ais.ais_msg_4_handcoded.sqlInsert( ais.ais_msg_4_handcoded.decode(bv), dbType='sqlite') elif msg_num == 5: ins = ais.ais_msg_5.sqlInsert(ais.ais_msg_5.decode(bv), dbType='sqlite') elif msg_num == 18: ins = ais.ais_msg_18.sqlInsert( ais.ais_msg_18.decode(bv), dbType='sqlite') # Class B position elif msg_num == 19: ins = ais.ais_msg_19.sqlInsert( ais.ais_msg_19.decode(bv), dbType='sqlite') # Class B position else: print 'Warning... not handling type', msg_num, 'line:', lineNum continue except: print 'ERROR: some decode error?', 'line:', lineNum print ' ', line continue counts[msg_num] += 1 if uscg: from aisutils.uscg import uscg_ais_nmea_regex match = uscg_ais_nmea_regex.search(line).groupdict() try: cg_sec = int(float(match['timeStamp'])) ins.add('cg_sec', cg_sec) #ins.add('cg_sec', int(float(match['timeStamp'])) ) ins.add( 'cg_timestamp', str( datetime.datetime.utcfromtimestamp( float(match['timeStamp'])))) ins.add('cg_r', match['station']) except: print >> sys.stderr, match print >> sys.stderr, 'bad uscg sections', line, continue # Optional fields that are not always there if match['time_of_arrival'] is not None: try: ins.add('cg_t_arrival', float(match['time_of_arrival'])) except: print >> sys.stderr, 'WARNING: corrupted time of arrival (T) in line. T ignored\n\t', line pass # Not critical if corrupted if match['slot'] is not None: ins.add('cg_s_slotnum', int(match['slot'])) if msg_num in (1, 2, 3, 4): pkt_id, dup_flag = track_dups.check_packet( cg_sec, fields[5]) # Pass in the NMEA payload string of data if v: print 'dup_check:', pkt_id, dup_flag, fields[5] ins.add('pkt_id', pkt_id) ins.add('dup_flag', dup_flag) ins.add('key', next_key) next_key += 1 if verbose: print str(ins) try: cu.execute(str(ins)) except pysqlite2.dbapi2.OperationalError, params: #except OperationalError, params: if -1 != params.message.find('no such table'): print 'ERROR:', params.message sys.exit('You probably need to run with --with-create') print 'params', params print type(params) print 'ERROR: sql error?', 'line:', lineNum print ' ', str(ins) print ' ', line if False: # Give some debugging flexibility from IPython.Shell import IPShellEmbed ipshell = IPShellEmbed(argv=[]) ipshell() sys.exit('Gave up')