def create_app(): """ Do initialization """ hostname = utils.get_host_name() admin = flask_admin.Admin(app, 'Admin', base_template='layout.html', template_mode='bootstrap3') admin.add_view(AdminCustomView(settings.Settings)) logging.info("Starting Shamash on %s", hostname) clusters = settings.get_all_clusters_settings() for cluster in clusters.iter(): met = metrics.Metrics(cluster.Cluster) met.init_metrics() client = pubsub.get_pubsub_client() pubsub.create_topic(client, 'shamash-monitoring') pubsub.create_topic(client, 'shamash-scaling') pubsub.create_subscriptions(client, 'monitoring', 'shamash-monitoring') pubsub.create_subscriptions(client, 'scaling', 'shamash-scaling') pubsub.pull(client, 'monitoring', 'https://shamash-dot-{}/get_monitoring_data'.format(hostname)) pubsub.pull(client, 'scaling', "https://shamash-dot-{}/scale".format(hostname))
def calc_slope(self, minuets): """ Calculate the slope of available memory change. :param: minuets how long to go back in time """ logging.info("calc slope") met = metrics.Metrics(self.cluster_name) series = met.read_timeseries('YARNMemoryAvailablePercentage', minuets) retlist = [] x = [] y = [] retlist.extend(series[0]['points']) i = len(retlist) for rl in retlist: x.insert(0, rl['value']['doubleValue']) y.insert(0, i) i = i - 1 try: slope, intercept = np.polyfit(x, y, 1) logging.debug('Slope is %s', slope) except np.RankWarning: # not enough data so add remove by 2 if self.scaling_direction == 'up': slope = 1 else: slope = -1 logging.debug('No Data slope is %s', slope) logging.info("Slope %s", str(slope)) return slope
def should_scale(payload): """ Make a decision to scale or not. :param payload: :return: """ cluster_settings = None data = json.loads(base64.b64decode(payload)) yarn_memory_available_percentage = data['yarn_memory_available_percentage'] container_pending_ratio = data['container_pending_ratio'] number_of_nodes = data['number_of_nodes'] cluster_name = data['cluster'] yarn_containers_pending = data['yarn_containers_pending'] workers = data['worker_nodes'] preemptible_workers = data['preemptible_workers'] s = settings.get_cluster_settings(cluster_name) for st in s: cluster_settings = st logging.info( 'Cluster %s YARNMemAvailPct %s ContainerPendingRatio %s number of ' 'nodes %s', cluster_name, yarn_memory_available_percentage, container_pending_ratio, number_of_nodes) met = metrics.Metrics(cluster_name) met.write_timeseries_value('YARNMemoryAvailablePercentage', 100 * yarn_memory_available_percentage) met.write_timeseries_value('ContainerPendingRatio', container_pending_ratio) met.write_timeseries_value('YarnNodes', str(int(workers) + int(preemptible_workers))) met.write_timeseries_value('Workers', workers) met.write_timeseries_value('PreemptibleWorkers', preemptible_workers) scaling_direction = None containerpendingratio = -1 scale_to = -1 # No memory is allocated so no needs for more nodes just scale down to the # minimum # pending containers are waiting.... if container_pending_ratio > cluster_settings.UpContainerPendingRatio: scaling_direction = 'up' containerpendingratio = container_pending_ratio logging.info( "container_pending_ratio %> cluster_settings.UpContainerPendingRatio" ) elif container_pending_ratio < cluster_settings.DownContainerPendingRatio: scaling_direction = 'down' containerpendingratio = container_pending_ratio logging.info( "container_pending_ratio < cluster_settings.DownContainerPendingRatio" ) elif yarn_memory_available_percentage == 1: logging.info("yarn_memory_available_percentage == 1") if number_of_nodes > cluster_settings.MinInstances: logging.info("number_of_nodes > cluster_settings.MinInstances") scaling_direction = 'down' scale_to = cluster_settings.MinInstances # We don't have enough memory lets go up elif yarn_memory_available_percentage < cluster_settings.UpYARNMemAvailPct: logging.info( "yarn_memory_available_percentage < cluster_settings.UpYARNMemAvailPct" ) scaling_direction = 'up' # we have too much memory :) elif yarn_memory_available_percentage > \ cluster_settings.DownYARNMemAvailePct: logging.info( "yarn_memory_available_percentage > cluster_settings.DownYARNMemAvailePct:" ) scaling_direction = 'down' body = { 'cluster': cluster_name, 'scaling_direction': scaling_direction, 'containerpendingratio': containerpendingratio, 'scale_to': scale_to } if scaling_direction is not None: trigger_scaling(body) return 'OK', 204
def _post_put_hook(self, future): met = metrics.Metrics(self.Cluster) met.init_metrics()