def process_user_connections(app_node): from opus.lib import log log = log.get_logger('vdi') user_experience = UserExperience.objects.exclude(connection_closed__isnull=False) for user_exp in user_experience: for session in app_node.sessions: if user_exp.user.username.split('++')[1].split('@')[0] == session['username']: if user_exp.connection_opened == None: log.debug("Setting the connection_opened parameter") user_exp.connection_opened = datetime.today() user_exp.save() log.debug("Connection opened after setting is: " + str(user_exp.connection_opened)) user_experience = user_experience.exclude(user=user_exp.user) user_experience = user_experience.exclude(connection_opened__isnull=True) for user_exp in user_experience: user_exp.connection_closed = datetime.today() user_exp.save() log.debug("Connection closed was set to: " + str(user_exp.connection_closed))
def process_user_connections(app_node): from opus.lib import log log = log.get_logger('vdi') user_experience = UserExperience.objects.exclude( connection_closed__isnull=False) for user_exp in user_experience: for session in app_node.sessions: if user_exp.user.username.split('++')[1].split( '@')[0] == session['username']: if user_exp.connection_opened == None: log.debug("Setting the connection_opened parameter") user_exp.connection_opened = datetime.today() user_exp.save() log.debug("Connection opened after setting is: " + str(user_exp.connection_opened)) user_experience = user_experience.exclude(user=user_exp.user) user_experience = user_experience.exclude(connection_opened__isnull=True) for user_exp in user_experience: user_exp.connection_closed = datetime.today() user_exp.save() log.debug("Connection closed was set to: " + str(user_exp.connection_closed))
def run(self, app): # Create an instance of the logger log = get_logger('vdi') # Create the cluster object to help us manage the cluster cluster = AppCluster(app.pk) # Clean up all idle users on all nodes for this application cluster log.debug('APP NAME %s' % app.name) cluster.logout_idle_users() log.debug("Checking for active clusters") for node in cluster.active: log.debug("Found active host") osutil_node = osutils.get_os_object( node.ip, settings.MEDIA_ROOT + str(node.application.ssh_key)) user_experience_tools.process_user_connections(osutil_node) # Handle vms we were waiting on to boot up booting = driver_tools.get_instances(cluster.booting) for vm in booting: dns_name = vm.public_addresses[0] log.debug('ASDF = %s' % dns_name) if dns_name.find("amazonaws.com") > -1: # Resolve the domain name into an IP address # This adds a dependancy on the 'host' command output = Popen(["host", dns_name], stdout=PIPE).communicate()[0] ip = '.'.join( re.findall( '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)', output)[0]) try: # TODO: remove the hard coded '3389' & '22' below. '3389' is for RDP and '22' is for SSH # TODO: remove the arbitrary '3' second timeout below socket.create_connection((ip, 22), 3) #socket.create_connection((ip,3389),3) except Exception as e: log.debug("Server %s is not yet available" % ip) pass else: instance = Instance.objects.filter(instanceId=vm.id)[0] booting.remove(vm) instance.ip = ip instance.state = 2 instance.save() log.debug( "Moving instance %s into enabled state with ip %s" % (instance.instanceId, ip)) num_booting = len(booting) if num_booting > 0: log.debug( "Application cluster '%s' is still waiting for %s cluster nodes to boot" % (cluster.name, len(booting))) # Consider if the cluster needs to be scaled log.debug('Considering %s app cluster for scaling ...' % cluster.name) # Should I scale up? log.debug( '%s is avail (%s) < req (%s)?' % (cluster.app.name, cluster.avail_headroom, cluster.req_headroom)) if cluster.avail_headroom < cluster.req_headroom: # Yes I should scale up space_needed = cluster.req_headroom - cluster.avail_headroom servers_needed = int( math.ceil(space_needed / float(cluster.app.users_per_small))) log.debug( 'Available headroom (%s) is less than the cluster headroom goal (%s). Starting %s additional cluster nodes now' % (cluster.avail_headroom, cluster.req_headroom, servers_needed)) for i in range(servers_needed): cluster.start_node() # Handle instances we are supposed to shut down toTerminate = [] for host in cluster.shutting_down: log.debug('ASDASDASD %s' % host.instanceId) try: osutil_node = osutils.get_os_object( host.ip, settings.MEDIA_ROOT + str(self.host.application.ssh_key)) log.debug( 'Node %s is waiting to be shut down and has %s connections' % (host.ip, osutil_node.sessions)) if osutil_node.sessions == []: toTerminate.append(host) host.shutdownDateTime = datetime.now() host.save() except HostNotConnectableError: # Ignore this host that doesn't seem to be ssh'able, but log it as an error log.warning( 'Node %s is NOT sshable and should be looked into. It is currently waiting to shutdown' ) driver_tools.terminate_instances(toTerminate) # Should I scale down? overprov_num = cluster.avail_headroom - cluster.req_headroom log.debug('overprov (%s) avail (%s) required(%s)' % (overprov_num, cluster.avail_headroom, cluster.req_headroom)) # Reverse the list to try to remove the servers at the end of the waterfall inuse_reverse = cluster.inuse_map inuse_reverse.reverse() for (host, inuse) in inuse_reverse: # The node must have 0 sessions and the cluster must be able to be smaller while still leaving enough headroom if int(inuse) == 0 and overprov_num >= cluster.app.users_per_small: overprov_num = overprov_num - cluster.app.users_per_small host.state = 4 host.save() log.debug( 'Application Server %s has no sessions. Removing that node from the cluster!' % host.ip) return 'scaling complete @TODO put scaling event summary in this output'
from datetime import datetime from django.db import models from django.contrib.auth.models import User from django.db.models import signals from django.conf import settings from opus.lib import log log = log.get_logger('vdi') from vdi.signals import create_application_permission, delete_application_permission from django.core.files.storage import FileSystemStorage fs = FileSystemStorage(location=settings.OPUS_SECURE_UPLOADS) class Application(models.Model): name = models.CharField(max_length=64) # Pretty name of the application image_id = models.CharField( max_length=32 ) # Image id of the image that the actual application lies on path = models.CharField( max_length=256, blank=True) # Path of the application to be run on the host max_concurrent_instances = models.IntegerField(default=0) users_per_small = models.IntegerField(default=10) cluster_headroom = models.IntegerField(default=0) icon_url = models.URLField() ssh_key = models.FileField("SSH Key", upload_to='vdi/sshkeys', storage=fs) scale_interarrival = models.IntegerField( default=180) # The interarrival time of the scale function running
from datetime import datetime import time from django.test import TestCase from django.contrib.auth.models import User from vdi.models import UserExperience, Application, Instance from vdi import user_experience_tools, cost_tools from opus.lib import log log = log.get_logger('vdi') class CostToolsTest(TestCase): def setUp(self): self.app1 = Application.objects.create(name="TestApp1", image_id="1234", max_concurrent_instances=3, users_per_small=4, cluster_headroom=5, icon_url="http://nopath", ssh_key='key.fake') self.app2 = Application.objects.create(name="TestApp2", image_id="12345", max_concurrent_instances=3, users_per_small=4, cluster_headroom=5, icon_url="http://nopath", ssh_key='key.fake') self.inst1 = Instance.objects.create(instanceId='10', application=self.app1, priority=0, state=5) self.inst1.shutdownDateTime=datetime(2010,5,13,18,00) self.inst1.startUpDateTime=datetime(2010,5,13,12,00) self.inst1.save() self.inst2 = Instance.objects.create(instanceId='11', application=self.app1, priority=0, state=2) self.inst2.shutdownDateTime=datetime(2010,5,14,8,00) self.inst2.startUpDateTime=datetime(2010,5,14,8,00) self.inst2.save() self.inst3 = Instance.objects.create(instanceId='12', application=self.app2, priority=0, state=5) self.inst3.shutdownDateTime=datetime(2010,5,12,18,00) self.inst3.startUpDateTime=datetime(2010,5,11,8,00) self.inst3.save() app = Application.objects.all() tmp_list = list(app)
# See the License for the specific language governing permissions and # # limitations under the License. # ############################################################################## import re from django import forms from django.forms.fields import * from django.forms.widgets import * from django.core.validators import RegexValidator from django.forms.formsets import formset_factory from django.conf import settings from opus.lib.log import get_logger log = get_logger() id_re = re.compile(r"^[a-z][a-z0-9_]+$") validate_identifier = RegexValidator( id_re, u"Enter a valid identifier consisting of letters, numbers, and underscores, not starting with a number.", "invalid", ) class IdentifierField(forms.CharField): default_error_messages = { "invalid": u"A valid identifier is letters, numbers, and " "underscores only. It cannot start with a number." } default_validators = [validate_identifier]
# http://www.apache.org/licenses/LICENSE-2.0 # # # # Unless required by applicable law or agreed to in writing, software # # distributed under the License is distributed on an "AS IS" BASIS, # # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # # See the License for the specific language governing permissions and # # limitations under the License. # ############################################################################## import string from opus.lib.ssh_tools import NodeUtil, HostNotConnectableError from generic import Generic from opus.lib import log log = log.get_logger("opus.lib.osutils") class Linux(Generic): def __init__(self, ip, ssh_key): self.ip = ip self.ssh_key = ssh_key self.node = NodeUtil(ip, ssh_key) self.check_user_load() def add_user(self, username, password): try: output = self.node.ssh_run_command(["adduser", username]) output = self.change_user_password(username, password) log.debug('THE PASSWORD WAS SET') log.debug("Added %s" % username)
def run(self, app): # Create an instance of the logger log = get_logger('vdi') # Create the cluster object to help us manage the cluster cluster = AppCluster(app.pk) # Clean up all idle users on all nodes for this application cluster log.debug('APP NAME %s'%app.name) cluster.logout_idle_users() log.debug("Checking for active clusters") for node in cluster.active: log.debug("Found active host") osutil_node = osutils.get_os_object(node.ip, settings.MEDIA_ROOT + str(node.application.ssh_key)) user_experience_tools.process_user_connections(osutil_node) # Handle vms we were waiting on to boot up booting = driver_tools.get_instances(cluster.booting) for vm in booting: dns_name = vm.public_addresses[0] log.debug('ASDF = %s' % dns_name) if dns_name.find("amazonaws.com") > -1: # Resolve the domain name into an IP address # This adds a dependancy on the 'host' command output = Popen(["host", dns_name], stdout=PIPE).communicate()[0] ip = '.'.join(re.findall('(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)', output)[0]) try: # TODO: remove the hard coded '3389' & '22' below. '3389' is for RDP and '22' is for SSH # TODO: remove the arbitrary '3' second timeout below socket.create_connection((ip,22),3) #socket.create_connection((ip,3389),3) except Exception as e: log.debug("Server %s is not yet available" % ip) pass else: instance = Instance.objects.filter(instanceId=vm.id)[0] booting.remove(vm) instance.ip = ip instance.state = 2 instance.save() log.debug("Moving instance %s into enabled state with ip %s" % (instance.instanceId,ip)) num_booting = len(booting) if num_booting > 0: log.debug("Application cluster '%s' is still waiting for %s cluster nodes to boot" % (cluster.name,len(booting))) # Consider if the cluster needs to be scaled log.debug('Considering %s app cluster for scaling ...' % cluster.name) # Should I scale up? log.debug('%s is avail (%s) < req (%s)?' % (cluster.app.name, cluster.avail_headroom, cluster.req_headroom)) if cluster.avail_headroom < cluster.req_headroom: # Yes I should scale up space_needed = cluster.req_headroom - cluster.avail_headroom servers_needed = int(math.ceil(space_needed / float(cluster.app.users_per_small))) log.debug('Available headroom (%s) is less than the cluster headroom goal (%s). Starting %s additional cluster nodes now' % (cluster.avail_headroom,cluster.req_headroom,servers_needed)) for i in range(servers_needed): cluster.start_node() # Handle instances we are supposed to shut down toTerminate = [] for host in cluster.shutting_down: log.debug('ASDASDASD %s' % host.instanceId) try: osutil_node = osutils.get_os_object(host.ip, settings.MEDIA_ROOT + str(self.host.application.ssh_key)) log.debug('Node %s is waiting to be shut down and has %s connections' % (host.ip, osutil_node.sessions)) if osutil_node.sessions == []: toTerminate.append(host) host.shutdownDateTime = datetime.now() host.save() except HostNotConnectableError: # Ignore this host that doesn't seem to be ssh'able, but log it as an error log.warning('Node %s is NOT sshable and should be looked into. It is currently waiting to shutdown') driver_tools.terminate_instances(toTerminate) # Should I scale down? overprov_num = cluster.avail_headroom - cluster.req_headroom log.debug('overprov (%s) avail (%s) required(%s)' % (overprov_num,cluster.avail_headroom,cluster.req_headroom)) # Reverse the list to try to remove the servers at the end of the waterfall inuse_reverse = cluster.inuse_map inuse_reverse.reverse() for (host,inuse) in inuse_reverse: # The node must have 0 sessions and the cluster must be able to be smaller while still leaving enough headroom if int(inuse) == 0 and overprov_num >= cluster.app.users_per_small: overprov_num = overprov_num - cluster.app.users_per_small host.state = 4 host.save() log.debug('Application Server %s has no sessions. Removing that node from the cluster!' % host.ip) return 'scaling complete @TODO put scaling event summary in this output'
import math from datetime import datetime import datetime from opus.lib import log log = log.get_logger("vdi") from vdi.app_cluster_tools import AppCluster from vdi.models import Instance def convert_to_date_time(date): """ Assumes the date comes in the form month-day-yearThour:minute:second """ year = str(date[0]) + str(date[1]) + str(date[2]) + str(date[3]) month = str(date[5]) + str(date[6]) day = str(date[8]) + str(date[9]) hour = str(date[11]) + str(date[12]) minute = str(date[14]) + str(date[15]) second = str(date[17]) + str(date[18]) new_datetime = datetime.datetime(int(year), int(month), int(day), int(hour), int(minute), int(second)) return new_datetime def get_instance_hours_in_date_range(start_date, end_date): instances = Instance.objects.exclude(shutdownDateTime__gt=end_date).exclude(startUpDateTime__lt=start_date) total_hours = get_total_instance_hours(instances, start_date, end_date) return total_hours
# distributed under the License is distributed on an "AS IS" BASIS, # # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # # See the License for the specific language governing permissions and # # limitations under the License. # ############################################################################## import subprocess import time import socket import select from django.conf import settings #Provide Logging from opus.lib import log log = log.get_logger("opus.lib.sshtools") class HostNotConnectableError(Exception): pass class NodeUtil(object): """Represents a remote machine.""" # SSH commands should use -p SSH_PORT SSH_PORT = 22 def __init__(self, ip, key, username = '******'): """Stores the ip address that we use to connect to it. """ self.ip = ip