def check_cache_miss(keys, mhb, before=None): before = Counter() if before is None else before before_keys = before.keys() cnt = OrderedDict() log = logger.get_logger(check_cache_miss) was_not_cached_count = 0 for key in keys: node = mhb.find_node_for_key(key=key) value, was_cached = node.get_or_if_not_present_set(key=key) if was_cached is False: if node.name not in cnt: cnt[node.name] = 1 else: cnt[node.name] += 1 was_not_cached_count += 1 after = mhb.calculate_distribution() after_keys = after.keys() out = [] for before_key in before_keys: if before_key not in after_keys: out.append("REMOVE {} -{}".format(before_key, before[before_key])) for after_key in after_keys: if after_key in before_keys and before[after_key] != after[after_key]: out.append("CHANGE {} {}".format( after_key, after[after_key] - before[after_key])) if after_key not in before_keys: out.append("ADD {} +{}".format(after_key, after[after_key])) log.warn("CACHE TRANSFER(%) -> {} => {}".format( was_not_cached_count * 100.0 / len(keys), out)) return after
def main(number_of_nodes=15, dataset_size=10**5, number_of_virtual_nodes=1): log = logger.get_logger(main) log.info("NUMBER OF NODES: {}".format(number_of_nodes)) log.info("DATASET SIZE: {}".format(dataset_size)) log.info("NUMBER OF VIRTUAL NODES: {}".format(number_of_virtual_nodes)) log.info("NUMBER OF TOTAL NODES: {} * {} = {}".format( number_of_nodes, number_of_virtual_nodes, number_of_nodes * number_of_virtual_nodes)) mhb = ConsistentHashingBenchmark( number_of_nodes=number_of_nodes, cache_size=int(math.ceil(dataset_size * 1.2 / (number_of_nodes - 1))), number_of_virtual_nodes=number_of_virtual_nodes) keys = range(0, dataset_size) init_benchmark_data(keys, mhb) cnt = check_cache_miss(keys, mhb) mhb.add_node() cnt = check_cache_miss(keys, mhb, cnt) mhb.remove_node() cnt = check_cache_miss(keys, mhb, cnt) mhb.add_node() cnt = check_cache_miss(keys, mhb, cnt) mhb.remove_node() cnt = check_cache_miss(keys, mhb, cnt) mhb.add_node() cnt = check_cache_miss(keys, mhb, cnt) mhb.remove_node() cnt = check_cache_miss(keys, mhb, cnt) mhb.add_node() cnt = check_cache_miss(keys, mhb, cnt) mhb.remove_node() cnt = check_cache_miss(keys, mhb, cnt)
def main(number_of_nodes=15, dataset_size=10**5): log = logger.get_logger(main) log.info("Number of nodes - {}".format(number_of_nodes)) log.info("Dataset Size - {}".format(dataset_size)) mhb = ModuloHashingBenchmark(number_of_nodes=number_of_nodes, cache_size=int(math.ceil(dataset_size/(number_of_nodes-1)))) keys = range(0, dataset_size) init_benchmark_data(keys, mhb) check_cache_miss(keys, mhb) mhb.add_node() check_cache_miss(keys, mhb) mhb.remove_node() check_cache_miss(keys, mhb) mhb.add_node() check_cache_miss(keys, mhb) mhb.remove_node() check_cache_miss(keys, mhb) mhb.add_node() check_cache_miss(keys, mhb) mhb.remove_node() check_cache_miss(keys, mhb) mhb.add_node() check_cache_miss(keys, mhb) mhb.remove_node() check_cache_miss(keys, mhb)
def remove_node(self): index = random.randint(0, len(self.nodes) - 1) self.nodes.pop(index) log = logger.get_logger(self.remove_node) log.info( "Removed a random node from the cluster, Total Nodes - {} Index - {}" .format(len(self.nodes), index))
def create_node(self, cache_size): node = Node(name=fake.user_name(), ip=fake.ipv4(), cache_size=cache_size) log = logger.get_logger(self.create_node) log.info("Created a new node, Name - {}, IP - {}".format( node.name, node.ip)) return node
def check_cache_miss(keys, mhb): was_not_cached_count = 0 for key in keys: node = mhb.find_node_for_key(key=key) value, was_cached = node.get_or_if_not_present_set(key=key) if was_cached is False: was_not_cached_count += 1 log = logger.get_logger(check_cache_miss) log.info("Cache Miss % - {}".format(was_not_cached_count * 100.0 / len(keys)))
def remove_node(self): index = random.randint(0, len(self.ordered_node_locations) - 1) location_hash = self.ordered_node_locations[index] node = self.location_to_node_map[location_hash] # Remove all virtual node's locations from list and remove all virtual node mappings to # node from dict for virtual_location in node.virtual_node_locations: self.ordered_node_locations.remove(virtual_location) del self.location_to_node_map[virtual_location] log = logger.get_logger(self.remove_node) log.error("REMOVED NODE - {}".format(node.name)) log.error("ORDER - {}".format([self.location_to_node_map[i].name for i in self.ordered_node_locations])) log.error("KEY-SPACE DISTRIBUTION - {}".format(self.calculate_distribution())) log.error("CACHE FILL(%) - {}".format([(n.name, len(n.cache)*100.0/n.cache_size) for n in set(self.location_to_node_map.values())]))
def add_node(self): node = self.create_node(cache_size=self.cache_size) for i in range(self.number_of_virtual_nodes): node_location_hash = long(hashlib.sha256(str(node.ip + "virtual_node:" + str(i))).hexdigest(), 16) % KEY_SPACE self.location_to_node_map[node_location_hash] = node # Populate node's virtual_node_locations list node.virtual_node_locations.append(node_location_hash) bisect.insort_right(self.ordered_node_locations, node_location_hash) log = logger.get_logger(self.add_node) log.debug("ADDED NODE - {}".format(node.name)) log.debug("ORDER - {}".format([self.location_to_node_map[i].name for i in self.ordered_node_locations])) log.debug("KEY-SPACE DISTRIBUTION - {}".format(self.calculate_distribution())) log.debug("CACHE FILL(%) - {}".format([(n.name, len(n.cache)*100.0/n.cache_size) for n in set(self.location_to_node_map.values())]))
def init_benchmark_data(keys, mhb): for key in keys: node = mhb.find_node_for_key(key=key) node.get_or_if_not_present_set(key=key) log = logger.get_logger(init_benchmark_data) log.warn("CREATING INITIAL BENCHMARK DATA COMPLETE")
__author__ = 'aj' # -*- coding: UTF-8 -*- # Use MySQL Connector Module : pyMySQL (0.6.2) import pymysql import unittest from base import baseutil from base import logger _log = logger.get_logger(baseutil.get_filename(__file__)) # Test Class : OpenServiceDataImporter 를 테스트한다. # class _DbConnectionTest(unittest.TestCase): def test_dbConnectionTest(self): conn = pymysql.connect(host='127.0.0.1', port=3306, user='******', passwd='subway', db='apidata_subway') cur = conn.cursor() cur.execute("SELECT vid,value FROM testTable") print(cur.description) print() for row in cur: print(row) cur.close() conn.close() # self.assertNotEqual(None, None)
def add_node(self): self.nodes.append(self.create_node(cache_size=self.cache_size)) log = logger.get_logger(self.add_node) log.info("Added a new node to the cluster, Total Nodes - {}".format( len(self.nodes)))
__author__ = 'aj' # -*- coding: UTF-8 -*- from externapi.seoul.imports.import_base import * from externapi.seoul.SubwayOpenApi import * from externapi.seoul.DbSubwaySeoul import DbSeoulSubway import unittest from base import logger _log = logger.get_logger("IMPORTER-SEOUL") # 모든 지하철역 코드와 WEEK, INOUT 을 조합하여 REST 요청하고, 해당결과를 DB 에 저장한다. def importStart(): dss = DbSeoulSubway() if dss.open_api_service_connection() is False: raise OpenServiceDataImporterException("Can't not Open Database Connection") svc_nm = SB_SERVICE.SVC_SEARCHVIASTNARRIVALTIMEBYTRAINSERVICE dss.tables_truncate(svc_nm) trains = dss.get_all_train_no() inouts = SB_API_PARAM.INOUT_TAG.list() weeks = SB_API_PARAM.WEEK_TAG.list() for train in trains: for inout in inouts: for week in weeks: sub_result = _importSpecific(dss, svc_nm, train, week, inout) if sub_result is False:
def init_benchmark_data(keys, mhb): for key in keys: node = mhb.find_node_for_key(key=key) node.get_or_if_not_present_set(key=key) log = logger.get_logger(init_benchmark_data) log.info("Creating Initial Benchmark Data Complete")