def report_observation_with_db(ndb, service, fp): """Insert or update an Observation record in the notary database.""" cur_time = int(time.time()) obs = ndb.get_observations(service) most_recent_time_by_key = {} most_recent_key = None most_recent_time = 0 # calculate the most recently seen key for (service, key, start, end) in obs: if key not in most_recent_time_by_key or end > most_recent_time_by_key[key]: most_recent_time_by_key[key] = end for k in most_recent_time_by_key: if most_recent_time_by_key[k] > most_recent_time: most_recent_key = k most_recent_time = most_recent_time_by_key[k] ndb.close_session() if most_recent_key == fp: # this key matches the most recently seen key before this observation. # just update the observation 'end' time. ndb.update_observation_end_time(service, fp, most_recent_time, cur_time) ndb.report_metric('ServiceScanKeyUpdated', service) else: # the key has changed or no observations exist yet for this service. # add a new entry for this key with start and end set to the current time ndb.insert_observation(service, fp, cur_time, cur_time) if most_recent_key != None: # if there was a previous key, set its 'end' timespan value to be # the current time minus one second (ending just before the new key) ndb.update_observation_end_time(service, most_recent_key, most_recent_time, cur_time -1) ndb.report_metric('ServiceScanPrevKeyUpdated', service)
def import_records(infile): """Read a file of tuples and extract service and observation data.""" lines = infile.readlines() infile.close() service_names = {} observations = [] num_lines = 0 # tuples will be formatted like this: # (domain.com:443,2, aa:bb:cc:dd:ee:ff, 123, 456) valid_tuple = re.compile("^\(([\w:,.]+), *([0-9a-fA-F:]+), *(\d)+, *(\d+)\)$") for line in lines: # remember: ALL INPUT IS EVIL! # test each line before passing to the database. if (valid_tuple.match(line)): match = valid_tuple.match(line) service = str(match.group(1)) key = str(match.group(2)) start = int(match.group(3)) end = int(match.group(4)) if (service not in service_names): service_names[service] = True observations.append((service, key, start, end)) num_lines += 1 if (num_lines) % 1000 == 0: print "Finished %s lines..." % num_lines print "Found %s services. Adding to database." % (len(service_names)) for name in service_names.keys(): ndb.insert_service(name) if not args.services_only: print "Found %s observations. Adding to database." % len(observations) for (service, key, start, end) in observations: ndb.insert_observation(service, key, start, end)