def scrape_data(self): output = subprocess.check_output( self.connection_options['command'].split()) line_buffer = list() for line in output.splitlines(): line = line.strip() if line != "---------------------------------------------------": line_buffer.append(line) else: valuemap = dict() for l in line_buffer: l_split = l.split() value_flag = False title_holder = '' for word in l_split: if value_flag: valuemap[title_holder] = word value_flag = False title_holder = '' elif word.endswith(':'): title_holder = word.strip(':').lower() value_flag = True if valuemap['ip'] != 'CannotFindIP': entry = Record(valuemap['ip']) for key in valuemap: if key != 'ip': entry.add_attribute(key, valuemap[key]) self.add_entry(entry) line_buffer = list() return self.entries
def scrape_data(self): for ip in self.inventory.get_ips(): r = Record(ip) r.add_attribute('ip', ip) self.add_entry(r) return self.entries
def scrape_data(self): for ip in self.inventory.get_ips(): logger.debug("Looking up %s" % ip) reverse_result = None forward_result = None try: reverse_result = socket.gethostbyaddr(ip)[0].lower().strip() except socket.herror as e: # ignore lookups that can't be found pass if reverse_result is not None: try: forward_result = socket.gethostbyname(reverse_result).strip() except socket.gaierror as e: # ignore lookup errors pass if None not in (forward_result, reverse_result) and ip == forward_result: # make sure the result is an fqdn if self.is_valid_hostname(reverse_result): logger.debug("Forward and reverse match for %s. Setting fqdn to %s" % (ip, reverse_result)) r = Record(ip) r.add_attribute('fqdn', reverse_result) self.add_entry(r) else: logger.debug("The reverse does not look like a valid hostname. Reverse: %s" % str(reverse_result)) else: logger.debug("Forward and reverse did not match for %s. Not adding fqdn." % ip) return self.entries
def test_store_from_file(self): res = [Record([1, 2, 3, 4, 5]), Record([1, 2, 3, 5, 6])] with patch.object(self.d, "store_records") as m: with tempfile.NamedTemporaryFile() as fd: fd.write(b'[1,2,3,4, 5]\n') fd.write(b'[1,2,3,5, 6]\n') fd.seek(0) self.d.store_from_file(fd.name) m.assert_called_with(res)
def scrape_data(self): with open(self.connection_options['path']) as csvfile: readCSV = csv.DictReader(csvfile, delimiter=',') data = [r for r in readCSV] ''' [.., {'class': 'Client', 'collection': 'Win 7 x86 Clients', 'fqdn': 'CMU-946702.GO.ECE.CMU.EDU', 'ip': '128.2.57.195'} ] ''' data_by_ip = {} ip4_pattern = re.compile("^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$") for d in data: if 'ip' in d and d['ip'] != '': good_ip = False if ' ' in d['ip']: for possible_ip4 in d['ip'].split(): if ip4_pattern.match(possible_ip4): d['ip'] = possible_ip4 good_ip = True break else: good_ip = True if good_ip: if d['ip'] in data_by_ip: data_by_ip[d['ip']]['class'] = d.get( 'class', '').lower().strip() data_by_ip[d['ip']]['fqdn'] = d.get( 'fqdn', '').lower().strip() data_by_ip[d['ip']]['collection'].append( d.get('collection', '').lower().strip()) else: data_by_ip[d['ip']] = { 'class': d.get('class', '').lower().strip(), 'fqdn': d.get('fqdn', '').lower().strip(), 'collection': [ d.get('collection', '').lower().strip(), ] } for ip in data_by_ip: entry = Record(ip) for key in data_by_ip[ip]: entry.add_attribute(key, data_by_ip[ip][key]) self.add_entry(entry) return self.entries
def scrape_data(self): for ip in self.inventory.get_ips(): r = Record(ip) logger.debug("Testing ping for %s" % str(ip)) pingok = self.pingOk(ip, self.connection_options['wait'], self.connection_options['count']) if pingok: logger.debug("Ping ok for %s" % ip) else: logger.debug("Ping failed for %s" % ip) r.add_attribute('pingable', pingok) self.add_entry(r) return self.entries
def test_store_records(self): """Kind of integrity test, we only mock the server responses.""" # Define server response # 1 - Get token url = (f"https://{config.KEYSERVER_HOSTNAME}" f":{config.KEY_API_PORT}/provider/gen_token") j = { 'success': True, 'token': 'XIu2a9SDGURRTzQnJdDg19Ii_CS7wy810s3_Lrx-TY7Wvh2Hf0U4xLH' 'NwnY_byYJ71II3kfUXpSZHOqAxA3zrw' } responses.add(responses.GET, url, json=j, status=200) # 2 - Hash Key url = f"{self.d.KEYSERVER}/hash_key" hash_key = to_base64(int(1).to_bytes(16, 'big')) j = {'success': True, 'hash_key': hash_key} responses.add(responses.GET, url, json=j, status=200) # 3 - Encryption Keys j = { 'success': True, 'port': 50000, 'host': "127.0.0.1", 'totalOTs': 10, 'tls': config.OT_TLS } url = f"https://localhost:" \ f"{config.KEY_API_PORT}/provider/key_retrieval?totalOTs=3" responses.add(responses.GET, url, json=j, status=200) # Remember to mock the OT r1 = Record([1.0, 2.1, 3.3, 4.4, 5.0]) r2 = Record([1.0532, 2.15423, 3.3453, 4.4, 5.0]) r3 = Record([1.52340, 2.1523, 3.35423, 4.4, 5.0]) records = [r1, r2, r3] # Log in user self.d.set_password("password") with patch.object(self.d, "_receive_ots", return_value=[10, 9, 8]): # Mock OT with patch.object(self.d, "_batch_store_records_on_server", return_value=True): self.d.store_records(records)
def setUpClass(cls) -> None: """Disable logging.""" logging.getLogger().setLevel(loglvl) warnings.filterwarnings("ignore", category=ResourceWarning) warnings.filterwarnings("ignore", category=ImportWarning) # Clear directory shutil.rmtree(test_dir, ignore_errors=True) os.makedirs(test_dir, exist_ok=True) # Records cls.sr: List[Record] = [ Record([1.1, 2.01, 3.3, 4.4, 5.5]), # Match Record([1.5, 4.4, 3.9, 5.0, 5.5]), # No Match Record([1.0, 7.0, 3.0, 4.0, 5.5]), # No Match Record([1.0, 2.0, 10.6, 10.0, 5.5]), # Match Record([3.0, 2.0, 3.0, 4.0, 5.5]), # No Match Record([1.1, 2.104, 5, 9, 5.5]), # Match Record([2.0, 2.0, 3.0, 4.0, 5.5]) # No Match ] cls.matches = [cls.sr[0], cls.sr[3], cls.sr[5]] # Generate hash and encryption keys key_backend = KeyServer(test_dir) cls.hash_key = key_backend._hash_key for r in cls.sr: r.set_hash_key(cls.hash_key)
def test_data_provider_int(self): # Full integrity Test including flask self.dp = data_provider.DataProvider(self.provider) self.dp.set_password(self.password) str_backend = StorageServer(test_dir) with self.str_app.app_context(): for r in self.sr: # check that bloom filter is empty b = str_backend.bloom self.assertNotIn(to_base64(r.get_long_hash()), b) # Check that DB empty res = str_backend.batch_get_records( [to_base64(r.get_long_hash()) for r in self.sr], "client") # Decrypt result = [ Record.from_ciphertext(json.loads(r), self.enc_keys[0]) for h, r in res ] self.assertEqual([], result) s = Session(True) with patch("requests.get", s.get), \ patch("requests.post", s.post), \ patch.object(self.dp, "_receive_ots", Mock(return_value=self.enc_keys_int[:len(self.sr)])): self.dp.store_records(self.sr) str_backend = StorageServer(test_dir) with self.str_app.app_context(): for r in self.sr: # check that records are in bloom filter b = str_backend.bloom self.assertIn(to_base64(r.get_long_hash()), b) # Check records in db res = str_backend.batch_get_records( [to_base64(r.get_long_hash()) for r in self.sr], "client") # Decrypt result = [ Record.from_ciphertext(json.loads(r), self.enc_keys[0]) for h, r in res ] for m in self.sr: self.assertIn(m, result) for r in result: self.assertIn(r, self.sr)
def scrape_data(self): zenoss = Zenoss(self.connection_options['url'], self.connection_options['username'], self.connection_options['password'], ssl_verify=False) vsphere_devices = zenoss.get_devices( device_class='/zport/dmd/Devices/vSphere')['devices'] for device in vsphere_devices: components = zenoss.get_components_by_uid(uid=device['uid'], limit=None)['data'] vms = [ v for v in components if v['class_label'] == 'Virtual Machine' ] hosts = [v for v in components if v['class_label'] == 'Host'] # map hosts to cluster names host_map = dict() for h in hosts: if 'cluster' in h: host_map[h['uid']] = h['cluster']['name'] vsphere_data = dict() for v in vms: ip = self.get_ip(v['guestname']) if ip is not None: vsphere_data[ip] = {'guestname': v['guestname']} if 'host' in v and 'name' in v['host']: vsphere_data[ip]['host'] = v['host']['name'] if 'host' in v and 'uid' in v['host'] and v['host'][ 'uid'] in host_map: vsphere_data[ip]['cluster'] = host_map[v['host'] ['uid']] for ip in vsphere_data: r = Record(ip) for name in vsphere_data[ip]: r.add_attribute(name, vsphere_data[ip][name]) self.add_entry(r) return self.entries
def setUpClass(cls) -> None: """Disable logging.""" logging.getLogger().setLevel(logging.FATAL) shutil.rmtree(cls.test_dir, ignore_errors=True) os.makedirs(cls.test_dir, exist_ok=True) cls.records = [ Record([0, 0, 0, 0, 0]), # not in Bloom Record([1, 2, 3, 4, 5]), # in Bloom Record([2, 2, 3, 4, 5]), # in Bloom Record([3, 2, 3, 4, 5]), # in Bloom Record([4, 2, 3, 4, 5]), # not in Bloom Record([5, 2, 3, 4, 5]), # not in Bloom ] for r in cls.records: r.set_hash_key(cls.hash_key) b = BloomFilter(100, 0.0001, cls.test_dir + "test.bloom") b.update([1, 2, 3, 4, 5, 6, 7, 8, 9, 'a', 'b', 'c']) b.add(b64encode(cls.records[1].get_long_hash()).decode()) b.add(b64encode(cls.records[2].get_long_hash()).decode()) b.add(b64encode(cls.records[3].get_long_hash()).decode()) cls.b_encoded = b.to_base64().decode() cls.b = b cls.psi_ind = [ cls.records[1].get_psi_index(), cls.records[2].get_psi_index(), cls.records[3].get_psi_index() ]
def scrape_data(self): zenoss = Zenoss(self.connection_options['url'], self.connection_options['username'], self.connection_options['password'], ssl_verify=False) interesting_data = ( 'comments', 'created_timestamp', 'description', 'id', 'firstSeen', 'groups', 'hwManufacturer', 'location', 'osModel', 'osManufacturer', 'memory', 'priority', 'priorityLabel', 'productionState', 'productionStateLabel', 'productionState', 'serialNumber', 'status', 'systems', 'tagNumber', 'uid', 'uptime', 'severity', 'uuid', ) interesting_device_class_data = ( 'uid', 'description', 'name', ) for device in zenoss.get_devices_detailed(): if 'data' in device and 'ipAddressString' in device['data'] \ and device['data']['ipAddressString'] is not None and len('ipAddressString') > 5: r = None try: r = Record(device['data']['ipAddressString']) except ValueError as e: # ip is probably not set in Zenoss pass if r is not None: for data in device['data']: if data in interesting_data: r.add_attribute(str(data), device['data'][data]) if 'deviceClass' in device['data']: for key in device['data']['deviceClass']: if key in interesting_device_class_data: r.add_attribute('dev_class_info_' + str(key), device['data']['deviceClass'][key]) self.add_entry(r) return self.entries
def store_from_file(self, file: str) -> None: """ Return all records from file and store at storage server :param file: path to the file containing the records :return: Task ID of storage command """ self.eval['start_time'] = time.monotonic() records = [] with open(file, "r") as fd: for line in fd: records.append(Record(parse_list(line))) self.eval['parsed_list_time'] = time.monotonic() log.info(f"Parsed {len(records)} records.") self.store_records(records)
def batch_get_records(self, candidates: List[Record]) -> List[Record]: """ Retrieve the records for all hashes in the list :param candidates: Record objects for all candidates (hash_set) :return: List of retrieved records (decyrpted) """ log.info("4.1 Retrieve encryption keys.") start = time.monotonic() ot_indices = [] # No duplicates for r in candidates: if r.get_ot_index() not in ot_indices: ot_indices.append(r.get_ot_index()) enc_keys = self._get_enc_keys(ot_indices) # Create mapping enc_keys = dict(zip(ot_indices, enc_keys)) self.eval['key_retrieve_time'] = time.monotonic() log.info( f"4.1 - Retrieve keys took: {print_time(time.monotonic() - start)}" ) log.info("4.2 Retrieve encrypted records.") start = time.monotonic() hash_list = [to_base64(r.get_long_hash()) for r in candidates] records = self._batch_get_encrpyted_records(hash_list) if not records: self.eval['record_retrieve_time'] = time.monotonic() self.eval['decryption_time'] = time.monotonic() return [] res_list = [] self.eval['record_retrieve_time'] = time.monotonic() log.info( f"4.2 - Retrieve records: {print_time(time.monotonic() - start)}") log.info("4.3 Decrypting.") start = time.monotonic() for h, c in records: c = json.loads(c) key = enc_keys[hash_to_index(from_base64(h), config.OT_INDEX_LEN)] log.debug(f"Using key {key} for record {h}.") res_list.append(Record.from_ciphertext(c, key)) self.eval['decryption_time'] = time.monotonic() log.info( f"4.3 - Decryption took: {print_time(time.monotonic() - start)}") return res_list
def compute_matches(t: List[float], num: int) -> List[Record]: """ Compute matching vectors around the target. :param num: # of matches :param t: Target record :return: List of matching records """ metric, ars = map_metric(METRIC) m = metric(t, *ars) for i in numpy.arange(0.1, ars[0], 0.1): m = metric(t, float(i)) if len(m) > 2 * num: break candidates = list(m) random.shuffle(candidates) candidates = candidates[:num] if len(candidates) < num: raise RuntimeError("Not enough candidates!") log.info(f"Generated {len(candidates)} candidates.") return [Record(v) for v in candidates]
def test_get_all_record_psi_hashes(self): records = [] correct = [] for i in range(10): r = Record([1, 2, 3, 4, 5]) r.set_hash_key(b'fake_key') m = Mock() m.hash = helpers.to_base64(r.get_long_hash()) records.append(m) correct.append(r.get_psi_index()) with patch("lib.storage_server_backend.StoredRecord") as c: c.query.all.return_value = records s = server.StorageServer() res = s.get_all_record_psi_hashes() self.assertEqual(correct, res)
def process(movie_path, record_dir_format, class_mapping): try: records = [] for label in class_mapping.items(): record = Record() record.dir_format = record_dir_format record.label = label record.threshold = config["threshold"] record.skip_frame_interval = config["skip_frame_interval"] record.prepare() records.append(record) model = keras.models.load_model(model_file_path, compile=False) predictor = Predictor(model, config["image_size_px"]) movie = Movie(movie_path, config["skip_frame_interval"], records, predictor) if movie.is_completed_clip(): return movie.capture() movie.write_period_to_file() except Exception as e: print("record ERROR: ", movie_path) print(e) print(traceback.format_exc())
def scrape_data(self): urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) page = 1 page_not_empty = True max_attempts = 50 attempts = 0 while page_not_empty and attempts < max_attempts: url = self.connection_options['url'] + '?page=' + str(page) page += 1 req = requests.get(url, auth=HTTPBasicAuth( self.connection_options['username'], self.connection_options['password']), verify=False) full_results = req.json() for key in req.json(): if key == 'results': if len(full_results[key]) == 0: page_not_empty = False else: results = full_results[key] for host in results: if 'ip' in host and host['ip'] is not None and len( host['ip']) >= 6: r = Record(host['ip'].strip()) for attribute_name in host: if attribute_name.lower().strip() == 'last_report' and \ isinstance(host['last_report'], basestring): # determine if actively reporting to satellite based on min_days_reported_considered_active r.add_attribute( 'is_reporting_to_satellite', self.is_reporting_to_satellite( str(host['last_report']))) elif attribute_name != 'ip': r.add_attribute( str(attribute_name), host[attribute_name]) self.add_entry(r) attempts += 1 return self.entries
except Exception as err: err = str(err) log.exception(err) return None, err ram_usage, (result, error) = memory_usage( (execDP,), interval=config.RAM_INTERVAL, include_children=True, retval=True, ) dp.eval['result'] = result dp.eval['ram_usage'] = ram_usage dp.eval['error'] = error with open(com_file, "wb") as fd: pickle.dump(dp.eval, fd) else: dp.store_from_file(args.file) print("> Successfully stored records on server.") elif args.add: string = args.add r_list = string.strip('][').split(',') r_list = [float(i) for i in r_list] log.debug(f"Got: {str(r_list)}") r = Record(r_list) dp.store_records([r]) print("> Successfully stored records on server.") except Exception as e: log.error(str(e), exc_info=True) sys.exit()
def test_full_retrieve(self): c = client.Client("userA") target = [2.0, 2.0, 3.0, 4.0, 5.0] # Server Records: sr: List[Record] = [ [2.01, 2.01, 3.3, 4.4, 5.0], # Match [2.5, 4.4, 3.9, 5.0, 5.0], # No Match [2.0, 7.0, 3.0, 4.0, 5.0], # No Match [2.0, 2.0, 10.6, 10.0, 5.0], # Match [3.0, 2.0, 3.0, 4.0, 5.0], # No Match [2.01, 2.004, 5, 9, 5.0], # Match [2.0, 2.0, 3.0, 4.0, 5.0] # No Match ] # Server Bloom Filter tmp = tempfile.NamedTemporaryFile(delete=False) b = BloomFilter(len(sr), 0.00001, tmp.name) c.metric = "offset-0.01" for i, r in enumerate(sr): sr[i]: Record = Record(r) sr[i].set_hash_key(self.hash_key) matches = [sr[0], sr[3], sr[5]] for m in matches: b.add(b64encode(m.get_long_hash()).decode()) b_encoded = b.to_base64().decode() # Responses # ----------------------------------------------------------- # 1. Hash Key url = f"https://localhost:" \ f"{config.KEY_API_PORT}/client/hash_key" j = {'success': True, 'hash_key': b64encode(self.hash_key).decode()} responses.add(responses.GET, url, json=j, status=200) # 2. PSI url = (f"https://{config.STORAGESERVER_HOSTNAME}:" f"{config.STORAGE_API_PORT}/client/psi") j = { 'success': True, 'tls': False, 'host': '127.0.0.1', 'port': 1234, 'setSize': 10 } responses.add(GET, url, status=200, json=j) # 2. Bloom filter url = (f"https://{config.STORAGESERVER_HOSTNAME}:" f"{config.STORAGE_API_PORT}/" f"{UserType.CLIENT}/bloom") j = {'success': True, 'bloom': b_encoded} responses.add(GET, url, status=200, json=j) # 3. Encryption Keys url = f"https://localhost:" \ f"{config.KEY_API_PORT}/client/key_retrieval?totalOTs=3" j = { 'success': True, 'port': 5000, 'host': "127.0.0.1", 'totalOTs': 3, 'tls': False } responses.add(responses.GET, url, json=j, status=200) # 4. Ciphertexts url = (f"https://{config.STORAGESERVER_HOSTNAME}:" f"{config.STORAGE_API_PORT}/" f"{UserType.CLIENT}/batch_retrieve_records") j = { 'success': True, 'records': [(b64encode(m.get_long_hash()).decode(), json.dumps(m.get_encrypted_record(self.enc_keys[i], b'0'))) for i, m in enumerate(matches)] } responses.add(POST, url, status=200, json=j) # --------------------------------------------------------------------- for psi in [True, False]: with patch.object( c, "_receive_psi", Mock(return_value=[m.get_psi_index() for m in matches])): c._psi_mode = psi res = c.full_retrieve(target) # Set hash key for comparison for r in res: r.set_hash_key(self.hash_key) # Compare self.assertEqual(matches, res)
def test_record_iterator(self): r = [100.0, 1.0, 3.0, 4.0] it = sm.RecordIterator([r], b"key") res = [i for i in it] self.assertEqual([Record(r, hash_key=b"key")], res)
def __next__(self) -> Record: vec = next(self._iterator) r = Record(vec, hash_key=self._hash_key) return r
def test__add_to_transaction_db(self, db, RecordRetrieval): r1 = Record([1, 2, 3, 4, 5]) r2 = Record([1.1, 2, 3, 4, 5]) r3 = Record([1, 2.2, 3, 4, 5]) r4 = Record([1.1, 2.2, 3, 4, 5]) r5 = Record([1.2, 2.22, 3, 4, 5]) recs = [r1, r2, r3, r4, r5] for r in recs: r.set_hash_key(b'hash-key') hashes = [helpers.to_base64(r.get_long_hash()) for r in recs] records = [Mock() for _ in range(5)] records[0].hash = helpers.to_base64(r1.get_long_hash()) records[1].hash = helpers.to_base64(r1.get_long_hash()) # Same records[2].hash = helpers.to_base64(r3.get_long_hash()) records[3].hash = helpers.to_base64(r4.get_long_hash()) records[4].hash = helpers.to_base64(r5.get_long_hash()) server.StorageServer._add_to_transaction_db(records, "client", hashes) self.assertEqual(1, RecordRetrieval.call_count) # 2 owners expected = { "client": "client", "enc_keys_by_hash": 5, "enc_keys_by_records": 4 } self.assertEqual(expected, RecordRetrieval.call_args[1])