def test_getUsersCount(self): s = Statistics(self.users) counts = s.getUsersCount() self.assertEqual(2, counts['male']) self.assertEqual(1, counts['female']) self.assertEqual(0, counts['undefined']) self.assertEqual(3, counts['total'])
def analyzeBlockSciCluster(self, data, ctag, addressType): filem = UtilFileManager() arqName = "ClusterLSBAddressOutput_" + self.clusterType + "_" + str( ctag) + addressType.name + ".data" countBits = 0 byteStr = "" byteOutput = 0 addressesIterator = data.addresses for ad in addressesIterator.with_type(addressType): address = ad.address_string countBits = countBits + 1 if (self.extractLSBfromAddress(address) == 1): byteOutput = (byteOutput << 1) + 1 else: byteOutput = (byteOutput << 1) if (countBits == 8): #save this in respective file filem.saveByte(arqName, byteOutput) byteStr = byteStr + char(byteOutput) byteOutput = 0 countBits = 0 #compute statistics if len(byteStr) != 0: scalc = Statistics() scalc.printStatistics("LSB Analyzer", arqName + "pubkey.data", filem, byteStr) self.keepExtractedData(arqName, byteStr, filem)
def test_getUsersSentiment(self): s = Statistics(self.users) sentiments = s.getUsersSentiment() # Conteo de sentimientos solo female self.assertEqual(1, sentiments['female']['positive']) self.assertEqual(0, sentiments['female']['negative']) self.assertEqual(0, sentiments['female']['neutral']) self.assertEqual(1, sentiments['female']['count']) # Conteo de sentimientos solo male self.assertEqual(1, sentiments['male']['positive']) self.assertEqual(1, sentiments['male']['negative']) self.assertEqual(0, sentiments['male']['neutral']) self.assertEqual(2, sentiments['male']['count']) # Conteo de sentimientos solo indefinidos self.assertEqual(0, sentiments['undefined']['positive']) self.assertEqual(0, sentiments['undefined']['negative']) self.assertEqual(0, sentiments['undefined']['neutral']) self.assertEqual(0, sentiments['undefined']['count']) # Conteo de sentimientos por todos los usuarios self.assertEqual(2, sentiments['all']['positive']) self.assertEqual(1, sentiments['all']['negative']) self.assertEqual(0, sentiments['all']['neutral']) self.assertEqual(3, sentiments['all']['count'])
def noncesRelatedToBitiodineAddresses(self, caddresses, ctag): filem = UtilFileManager() arqName = "ClusterNoncesOutput_" + self.clusterType + "_" + str(ctag) countFindings = 0 blockNumber = 0 blockchain = Blockchain( os.path.expanduser(sp.configBlockchainPath + 'blocks')) for block in blockchain.get_ordered_blocks( os.path.expanduser(sp.configBlockchainPath + "blocks/index"), start=0): blockNumber = blockNumber + 1 nonce = block.header.nonce transaction = block.transactions[0] #Get outputs from coinbase transaction for output in transaction.outputs: #Get addresses for outAddr in output.addresses: for strAddr in caddresses: if outAddr._address == strAddr: #save that nonce filem.saveInFile(arqName, nonce) self.append(nonce) countFindings = countFindings + 1 if countFindings > 0: scalc = Statistics() scalc.printStatistics("Nonces", arqName, filem) return countFindings
def __init__(self, producer_count, alpha, beta, device_count, lambda_param, buffer_size): self.__producers = [ Producer(i, alpha, beta) for i in range(producer_count) ] self.__devices = [Device(lambda_param) for _ in range(device_count)] self.__current_device = 0 self.__buffer = Buffer(buffer_size) self.__alpha = alpha self.__beta = beta self.__lambda = lambda_param self.__stat = Statistics(producer_count, device_count) self.__creation_log = [] self.__setting_log = [] self.__event_log = [] self.__release_log = [] self.__deny_log = [] self.__buffer_log = []
def analyzeBitIodineOrEtherclustCluster(self, data, ctag): filem = UtilFileManager() arqName = "ClusterLSBAddressOutput_" + self.clusterType + "_" + str( ctag) countBits = 0 byteStr = "" byteOutput = 0 #compute data bytes for address in data: countBits = countBits + 1 if (self.extractLSBfromBitiodineAddress(address) == 1): byteOutput = (byteOutput << 1) + 1 else: byteOutput = (byteOutput << 1) if (countBits == 8): #save this in respective file filem.saveByte(arqName, byteOutput) byteStr = byteStr + chr(byteOutput) #or .decode("utf-8") byteOutput = 0 countBits = 0 if len(byteStr) != 0: #compute statistics savedData = filem.openByteFile(arqName) scalc = Statistics() pvalue = scalc.monobitTest(savedData) print("\t\t\'Message\':" + byteStr) print("\t\tAM:" + str(scalc.computeAM(savedData))) print("\t\tEntropy:" + str(scalc.computeEntropy(savedData))) if pvalue < 0.01: monobitresult = 1 else: monobitresult = 0 print("\t\tMonobit test (p-value):" + str(pvalue) + ", PASS:" + str(monobitresult)) self.keepExtractedData(arqName, byteStr, filem)
def analyzeSequentialAddresses(self, data, ctag): filem = UtilFileManager() print("Analyzing Addresses...") arqName = "SequentialLSBAddressOutput_0.data" countBits = 0 byteOutput = 0 countAddresses = 0 countChunk = 0 #compute data bytes for address in data: countBits = countBits + 1 if (self.extractLSBfromAddress(address) == 1): byteOutput = (byteOutput << 1) + 1 else: byteOutput = (byteOutput << 1) if (countBits == 8): #save this in respective file filem.saveByte(arqName, byteOutput) byteOutput = 0 countBits = 0 countAddresses = countAddresses + 1 if countAddresses % 100000 == 0: #compute statistics savedData = filem.openByteFile(arqName) scalc = Statistics() pvalue = scalc.monobitTest(savedData) print("\t\tAM:" + str(scalc.computeAM(savedData))) print("\t\tEntropy:" + str(scalc.computeEntropy(savedData))) if pvalue < 0.01: monobitresult = 1 else: monobitresult = 0 print("\t\tMonobit test (p-value):" + str(pvalue) + ", PASS:"******"SequentialLSBAddressOutput_" + str( countChunk) + ".data"
def setUp(self): self.stats = Statistics()
class TestStatistics(unittest.TestCase): def setUp(self): self.stats = Statistics() def populate(self, data): self.stats.update(data) def tearDown(self): del self.stats def test_empty_sum(self): self.assertEqual(self.stats.sum, 0.0) def test_zero_sum(self): self.populate(0) self.assertEqual(self.stats.sum, 0.0) def test_single_sum(self): self.populate(1) self.assertEqual(self.stats.sum, 1.0) def test_double_sum(self): self.populate([1,2]) self.assertEqual(self.stats.sum, 3.0) def test_triple_sum(self): self.populate([1,2,3]) self.assertEqual(self.stats.sum, 6.0) def test_many_values_sum(self): self.populate([1,2,3,4,5,6,7,8,9,10,123,789, 1234567890]) self.assertEqual(self.stats.sum, 1234568857) def test_multiple_value_sum(self): exp = 0.0 for x in range(1000): with self.subTest(x=x): exp += x self.populate(x) self.assertEqual(self.stats.sum, exp) def test_empty_n(self): self.assertEqual(self.stats.n, 0) def test_single_n(self): self.populate(range(1)) self.assertEqual(self.stats.n, 1) def test_two_n(self): self.populate(range(2)) self.assertEqual(self.stats.n, 2) def test_three_n(self): self.populate(range(3)) self.assertEqual(self.stats.n, 3) def test_multiple_n(self): self.populate(range(1234)) self.assertEqual(self.stats.n, 1234) def test_empty_mean(self): self.assertEqual(self.stats.mean, 0.0) def test_single_mean(self): self.populate(7) self.assertEqual(self.stats.mean, 7.0) def test_two_mean_same(self): self.populate([7, 7]) self.assertEqual(self.stats.mean, 7.0) def test_three_mean_same(self): self.populate([7, 7, 7]) self.assertEqual(self.stats.mean, 7.0) def test_many_mean_same(self): self.populate([7] * 77) self.assertEqual(self.stats.mean, 7.0) def test_two_mean_diff(self): self.populate([3, 7]) self.assertEqual(self.stats.mean, (3 + 7) / 2.0) def test_three_mean_diff(self): self.populate([0, 3, 7]) self.assertEqual(self.stats.mean, (0 + 3 + 7) / 3.0) def test_four_mean_diff(self): self.populate([0, 3, 7, 145]) self.assertEqual(self.stats.mean, (0 + 3 + 7 + 145) / 4.0) def test_many_values_mean(self): exp = 0.0 for n, x in enumerate(range(3, 3000, 7), start=1): with self.subTest(x=x): exp = (exp * (n - 1) + x) / n self.populate(x) self.assertEqual(self.stats.mean, exp) def test_empty_median(self): self.assertRaises(IndexError, lambda : self.stats.median) def test_single_median(self): self.populate(5) self.assertEqual(self.stats.median, 5) def test_two_median(self): self.populate([4, 5]) self.assertEqual(self.stats.median, (4 + 5) / 2) def test_three_median(self): self.populate([4, 5, 6]) self.assertEqual(self.stats.median, 5) def test_four_median(self): self.populate([4, 5, 6, 7]) self.assertEqual(self.stats.median, (5 + 6) / 2) def test_many_median_even(self): self.populate(range(1000)) self.assertEqual(self.stats.median, (1000 - 1) / 2) def test_many_median_odd(self): self.populate(range(1001)) self.assertEqual(self.stats.median, int(1001 / 2)) def test_many_media_unordered(self): self.populate([7, 8, 4, 123, 9001, 0, -1, -2, -3, 5, 6, 9, 88, -2, 0]) self.assertEqual(self.stats.median, 5.0) def test_empty_std_dev(self): self.assertEqual(self.stats.std_dev, 0.0) def test_single_std_dev(self): self.populate(6) self.assertEqual(self.stats.std_dev, 0.0) def test_two_std_dev(self): self.populate([5, 6]) self.assertEqual(self.stats.std_dev, sqrt(1/2)) def test_three_std_dev(self): self.populate([5, 6, 7]) self.assertEqual(self.stats.std_dev, 1.0) def test_four_std_dev(self): self.populate([5, 6, 7, 8]) self.assertEqual(self.stats.std_dev, sqrt(5 / 3)) def test_multiple_std_dev(self): self.populate([-5, -2, 0, 1, 5, 6, 7, 8, 19, 30, 43, 89, 112, 772]) self.assertAlmostEqual(self.stats.std_dev, 202.99214, places=5) def test_multiple_std_dev_unordered(self): self.populate([6, 8, 1, 3, -17, 18, 32, -49, 7, 14, 21, 0, 3, -5, -9]) self.assertAlmostEqual(self.stats.std_dev, 18.66318, places=5)
def setUp(self): conf = 0.95 # Confidence n_bits = 1000 # Number of bits per packet tx_rate = 50 # Tx rate in Mbps self.stat = Statistics(n_bits, tx_rate, conf)
class StatisticsTest(unittest.TestCase): def setUp(self): conf = 0.95 # Confidence n_bits = 1000 # Number of bits per packet tx_rate = 50 # Tx rate in Mbps self.stat = Statistics(n_bits, tx_rate, conf) def test_get_conf(self): self.assertEqual(0.95, self.stat.get_conf()) def test_get_n_bits(self): self.assertEqual(1000, self.stat.get_n_bits()) def test_get_tx_rate(self): self.assertEqual(50, self.stat.get_tx_rate()) def test_get_n_pcks(self): self.assertEqual(0, self.stat.get_n_pcks()) def test_get_n_pck_errors(self): self.assertEqual(0, self.stat.get_n_pck_errors()) def test_get_per_list(self): self.assertEqual(0, len(self.stat.get_per_list())) def test_get_thrpt_list(self): self.assertEqual(0, len(self.stat.get_thrpt_list())) def test_pck_received(self): self.stat.pck_received(False) self.stat.pck_received(False) self.stat.pck_received(False) self.stat.pck_received(True) self.assertEqual(4, self.stat.get_n_pcks()) self.assertEqual(1, self.stat.get_n_pck_errors()) # Test calc_iteration_results() method per, thrpt = self.stat.calc_iteration_results() self.assertEqual(0.25, per) self.assertEqual(37.5, thrpt) self.assertEqual([0.25], self.stat.get_per_list()) self.assertEqual([37.5], self.stat.get_thrpt_list()) def test_conf_interval(self): # Data with zero standard deviation data = [5, 5, 5, 5, 5, 5, 5, 5] meanVal, interv = self.stat.conf_interval(data) self.assertEqual(5, meanVal) self.assertEqual(0, interv) # General data data = [1.0e-04, 5.0e-05, 1.0e-05, 2.0e-05] meanVal, interv = self.stat.conf_interval(data) self.assertAlmostEqual(4.5e-05, meanVal, delta=0.05e-05) self.assertAlmostEqual(6.42995e-05, interv, delta=0.05e-05) def test_wrap_up(self): # Simulating iteration: 4 packets with tha same seed self.stat.pck_received(False) self.stat.pck_received(False) self.stat.pck_received(False) self.stat.pck_received(True) self.assertEqual(4, self.stat.get_n_pcks()) self.assertEqual(1, self.stat.get_n_pck_errors()) # calc_iteration_results() should yeld PER and Tput for this seed per, thrpt = self.stat.calc_iteration_results() self.assertEqual(0.25, per) self.assertEqual(37.5, thrpt) # calc_iteration_resulst() should also reset the number of packets self.assertEqual(0, self.stat.get_n_pcks()) self.assertEqual(0, self.stat.get_n_pck_errors()) # calc_iteration_results() should not reset lists, though self.assertEqual([0.25], self.stat.get_per_list()) self.assertEqual([37.5], self.stat.get_thrpt_list()) # Simulating another iteration self.stat.pck_received(False) self.stat.pck_received(False) self.stat.pck_received(False) self.stat.pck_received(True) # calc_iteration_results() finishes the iteration per, thrpt = self.stat.calc_iteration_results() per_tpl, thrpt_tpl = self.stat.wrap_up() self.assertEqual(0.25, per_tpl[0]) self.assertEqual(0, per_tpl[1]) self.assertEqual(37.5, thrpt_tpl[0]) self.assertEqual(0, thrpt_tpl[1]) # wrap_up() should reset lists self.assertEqual(0, len(self.stat.get_per_list())) self.assertEqual(0, len(self.stat.get_thrpt_list()))
""" This file contains modules used to calculate the statistics for elastic src """ import os from src.constants import Constants from src.elastic.elastic import Elastic from src.statistics import Statistics from src.utils import Utils if __name__ == '__main__': UTILS = Utils(os.path.join("resources")) FIELDS = Constants.name_search_fields ALL_SUBSETS = UTILS.get_subsets(FIELDS) MISSPELLED_NAMES = UTILS.read_csv("names-misspelled.csv") CORRECT_NAMES = UTILS.read_csv("names-expected.csv") elastic = Elastic() STATISTICS = Statistics().calculate_statistics(CORRECT_NAMES, MISSPELLED_NAMES, ALL_SUBSETS, elastic, generate_reports=True) Statistics().generate_f1() UTILS.write_json_to_directory(STATISTICS, "reports") print(UTILS.get_shortest_fields_with_highest_f1_score(STATISTICS))
def test_exercise(): statistics = Statistics() statistics.add_number(3) assert statistics.get_count() == 1 statistics.add_number(5) statistics.add_number(1) statistics.add_number(2) assert statistics.get_count() == 4 assert statistics.sum == 11 assert statistics.average() == 2.75
class RequestManager: def __init__(self, producer_count, alpha, beta, device_count, lambda_param, buffer_size): self.__producers = [ Producer(i, alpha, beta) for i in range(producer_count) ] self.__devices = [Device(lambda_param) for _ in range(device_count)] self.__current_device = 0 self.__buffer = Buffer(buffer_size) self.__alpha = alpha self.__beta = beta self.__lambda = lambda_param self.__stat = Statistics(producer_count, device_count) self.__creation_log = [] self.__setting_log = [] self.__event_log = [] self.__release_log = [] self.__deny_log = [] self.__buffer_log = [] def get_producer_count(self): return len(self.__producers) def get_device_count(self): return len(self.__devices) def get_buffer_size(self): return self.__buffer.size() def get_alpha(self): return self.__alpha def get_beta(self): return self.__beta def get_lambda(self): return self.__lambda def get_logs(self): return self.__event_log, self.__release_log, self.__deny_log, self.__buffer_log def get_request_count(self): request_count = [] for i in self.__producers: request_count.append(i.get_request_count()) return request_count def get_deny_probability(self): return self.__stat.get_deny_probability(self.get_request_count()) def get_utilization_rate(self): return self.__stat.get_utilization_rate( [i.release_time for i in self.__devices]) def get_residence_time(self): return self.__stat.get_residence_time(self.get_request_count()) def get_statistics(self): request_count, release_times = self.get_request_count(), [] for i in self.__devices: release_times.append(i.release_time) return self.__stat.calculate_statistics(request_count, release_times) # обработка следующего события def process_next_event(self): event = self.__get_next_event() self.__event_log.append(event) if isinstance(event, CreationEvent): self.__process_creation_event(event) elif isinstance(event, SettingEvent): self.__process_setting_event(event) self.__buffer_log.append(list(self.__buffer.get_data())) self.__pop_extra_from_logs() def process_remaining_requests(self): while not self.__buffer.empty(): next_released_device = min(self.__devices, key=attrgetter('release_time')) next_released_device_time = next_released_device.release_time request = self.__buffer.pop() current = self.__current_device if request.creation_time <= next_released_device_time: current = self.__devices.index(next_released_device) else: for _ in range(len(self.__devices)): if self.__devices[ current].release_time <= request.creation_time: break current = self.__get_next_device_index(current) self.__release_log.append( ReleaseEvent(self.__devices[current].release_time, current, self.__devices[current].get_current_request())) self.__devices[current].release() e = SettingEvent( max(self.__devices[current].release_time, request.creation_time), current, request) self.__event_log.append(e) self.__process_setting_event(e) self.__buffer_log.append(list(self.__buffer.get_data())) self.__pop_extra_from_logs() def __pop_extra(self, log): while len(log) > 10: log.pop(0) # убираем устаревшие события и состояния def __pop_extra_from_logs(self): self.__pop_extra(self.__event_log) self.__pop_extra(self.__deny_log) self.__pop_extra(self.__release_log) self.__pop_extra(self.__buffer_log) # определение следующего события def __get_next_event(self): next_creating_producer = min(self.__producers, key=attrgetter('next_creation_time')) next_request_creation_time = next_creating_producer.next_creation_time next_released_device = min(self.__devices, key=attrgetter('release_time')) next_released_device_time = next_released_device.release_time if next_request_creation_time <= next_released_device_time: producer_id = self.__producers.index(next_creating_producer) self.__release_log.append(None) return CreationEvent( next_request_creation_time, producer_id, self.__producers[producer_id].get_request_count()) if self.__buffer.empty(): device_id = self.__devices.index(next_released_device) self.__release_log.append( ReleaseEvent(next_released_device_time, device_id, self.__devices[device_id].get_current_request())) self.__devices[device_id].release() producer_id = self.__producers.index(next_creating_producer) return CreationEvent( next_request_creation_time, producer_id, self.__producers[producer_id].get_request_count()) request = self.__buffer.pop() current = self.__current_device if request.creation_time <= next_released_device_time: current = self.__devices.index(next_released_device) else: for _ in range(len(self.__devices)): if self.__devices[ current].release_time <= request.creation_time: break current = self.__get_next_device_index(current) self.__release_log.append( ReleaseEvent(self.__devices[current].release_time, current, self.__devices[current].get_current_request())) self.__devices[current].release() return SettingEvent( max(self.__devices[current].release_time, request.creation_time), current, request) def __process_creation_event(self, e: CreationEvent): new_request = self.__producers[e.producer_id].produce() denied = self.__buffer.push(new_request) if denied is not None: # отказали заявке self.__stat.increase_denies(denied.producer_id) self.__stat.append_waiting_time( denied.producer_id, new_request.creation_time - denied.creation_time) self.__deny_log.append(DenyEvent(new_request.creation_time, denied)) else: self.__deny_log.append(None) def __process_setting_event(self, e: SettingEvent): self.__stat.increase_downtime_time( e.device_index, e.time - self.__devices[e.device_index].release_time) self.__stat.append_waiting_time(e.request.producer_id, e.time - e.request.creation_time) self.__devices[e.device_index].process(e.request) self.__current_device = self.__get_next_device_index(e.device_index) self.__stat.append_processing_time( e.request.producer_id, self.__devices[e.device_index].release_time - e.time) self.__deny_log.append(None) # выбор прибора по кольцу def __get_next_device_index(self, device_index): return (device_index + 1) % len(self.__devices) def __get_prev_device_index(self, device_index): return (len(self.__devices) - 1 + device_index) % len(self.__devices)