def create_data_snapshot(self, node, prefix, zone='us-central1-c', devel=False): """ Snapshot the data disk on the given machine. Typically used for backing up very important data. """ zone = self.expand_zone(zone) instance_name = self.instance_name(node, prefix, zone, devel=devel) info = json.loads(cmd(['gcloud', 'compute', 'instances', 'describe', instance_name, '--zone', zone, '--format=json'], verbose=0)) errors = [] for disk in info['disks']: if disk.get('boot', False): continue src = disk['deviceName'] if 'swap' in src: continue target = 'data-%s-%s'%(src, time.strftime(TIMESTAMP_FORMAT)) log("%s --> %s", src, target) try: cmd(['gcloud', 'compute', 'disks', 'snapshot', '--project', self.project, src, '--snapshot-names', target, '--zone', zone], system=True) except Exception, mesg: log("WARNING: issue making snapshot %s -- %s", target, mesg) errors.append(mesg)
def create_data_snapshot(self, node, prefix, zone='us-central1-c', devel=False): """ Snapshot the data disk on the given machine. Typically used for backing up very important data. """ zone = self.expand_zone(zone) instance_name = self.instance_name(node, prefix, zone, devel=devel) info = json.loads(cmd(['gcloud', 'compute', 'instances', 'describe', instance_name, '--zone', zone, '--format=json'], verbose=0)) errors = [] for disk in info['disks']: # ignore boot disks (would be True) if disk.get('boot', False): continue # ignore read-only disks (like for globally mounted data volumes) # they would be snapshotted manually if disk.get('mode', 'READ_WRITE') == 'READ_ONLY': continue src = disk['source'].split('/')[-1] if 'swap' in src: continue if 'tmp' in src: continue target = 'data-%s-%s'%(src, time.strftime(TIMESTAMP_FORMAT)) log("%s --> %s", src, target) try: cmd(['gcloud', 'compute', 'disks', 'snapshot', '--project', self.project, src, '--snapshot-names', target, '--zone', zone], system=True) except Exception, mesg: log("WARNING: issue making snapshot %s -- %s", target, mesg) errors.append(mesg)
def start_cassandra(): log("start_cassandra...") services = admin.Services('conf/deploy_devel/', password='') services.start('cassandra') cmd("ln -sf %s/data/cassandra-0/logs/system.log %s/logs/cassandra.log"%(SALVUS_ROOT, os.environ['HOME'])) log("cassandra started") log("waiting 30 seconds...") import time; time.sleep(30)
def setup_quota(): log("quota packages") cmd("sudo apt-get install -y libatlas3gf-base liblapack-dev quota quotatool linux-image-extra-virtual cgroup-lite cgmanager-utils cgroup-bin libpam-cgroup cgmanager cgmanager-utils cgroup-bin smem", system=True) log("quota stuff") cmd("echo 'LABEL=cloudimg-rootfs / ext4 defaults,usrquota 0 0' | sudo tee /etc/fstab") cmd("sudo mount -o remount /") log("initializing quota, which will take a while") cmd("sudo quotacheck -fucm /") cmd("sudo quotaon /")
def delete_devel_instances(self): for x in cmd(['gcloud', 'compute', 'instances', 'list'], verbose=0).splitlines()[1:]: v = x.split() name = v[0] if '-devel-' in name: zone = v[1] status = v[-1] log("deleting devel instance: %s"%name) cmd(['gcloud', 'compute', 'instances', 'delete', '--zone', zone, name], system=True)
def start_devel_instances(self): for x in cmd(['gcloud', 'compute', 'instances', 'list']).splitlines()[1:]: v = x.split() name = v[0] if '-devel-' in name: zone = v[1] status = v[-1] if status == "TERMINATED": log("starting %s"%name) cmd(['gcloud', 'compute', 'instances', 'start', '--zone', zone, name])
def create_boot_snapshot(self, node, prefix, zone='us-central1-c', devel=False): """ Snapshot the boot disk on the give machine. Typically used for replicating configuration. """ zone = self.expand_zone(zone) instance_name = self.instance_name(node, prefix, zone, devel=devel) snapshot_name = "%s%s-%s"%(prefix, node, time.strftime(TIMESTAMP_FORMAT)) cmd(['gcloud', 'compute', 'disks', 'snapshot', '--project', self.project, instance_name, '--snapshot-names', snapshot_name, '--zone', zone], system=True)
def create_dev(self, node, zone='us-central1-c', machine_type='n1-standard-1', size=30, preemptible=True, address=''): zone = self.expand_zone(zone) name = self.instance_name(node=node, prefix='dev', zone=zone) log("creating %sGB hard disk root filesystem image based on last smc snapshot", size) try: cmd(['gcloud', 'compute', '--project', self.project, 'disks', 'create', name, '--zone', zone, '--source-snapshot', self.newest_snapshot('smc'), '--size', size, '--type', 'pd-standard']) except Exception, mesg: if 'already exists' not in str(mesg): raise
def autostart(self, instance): """ Ensure that each instance in the input is running. """ for x in cmd(['gcloud', 'compute', 'instances', 'list'], verbose=0).splitlines()[1:]: v = x.split() if len(v) > 2 and v[-1] != 'RUNNING': name = v[0]; zone = v[1] for x in instance: if name.startswith(x): log("Starting %s... at %s", name, time.asctime()) cmd(' '.join(['gcloud', 'compute', 'instances', 'start', '--zone', zone, name]) + '&', system=True) break
def _create_smc_server(self, node, zone='us-central1-c', machine_type='n1-highmem-2', disk_size=100, network='default', devel=False): zone = self.expand_zone(zone) name = self.instance_name(node=node, prefix='smc', zone=zone, devel=devel) disk_name = "%s-cassandra"%name log("creating hard disk root filesystem image") try: cmd(['gcloud', 'compute', '--project', self.project, 'disks', 'create', name, '--zone', zone, '--source-snapshot', self.newest_snapshot('smc'), '--type', 'pd-standard']) except Exception, mesg: if 'already exists' not in str(mesg): raise
def _create_storage_server(self, node, zone, machine_type, disk_size, network, devel): zone = self.expand_zone(zone) name = self.instance_name(node=node, prefix='storage', zone=zone, devel=devel) disk_name = "%s-projects"%name log("creating hard disk root filesystem image") try: cmd(['gcloud', 'compute', '--project', self.project, 'disks', 'create', name, '--zone', zone, '--source-snapshot', self.newest_snapshot('storage'), '--type', 'pd-standard']) except Exception, mesg: if 'already exists' not in str(mesg): raise
def devel_etc_hosts(self): hosts = [] for x in cmd(['gcloud', 'compute', 'instances', 'list'], verbose=0).splitlines()[1:]: v = x.split() name = v[0] if '-devel-' in name: i = name.find('-devel') hosts.append("%s %s %s"%(v[4], v[0], v[0][:i+6])) if hosts: print "\n".join(hosts) x = open("/etc/hosts").readlines() y = [a.strip() for a in x if '-devel-' not in a] open('/tmp/hosts','w').write('\n'.join(y+hosts)) cmd("sudo cp -v /etc/hosts /etc/hosts.0 && sudo cp -v /tmp/hosts /etc/hosts", system=True)
def set_metadata(self, prefix=''): if not prefix: for p in ['smc', 'compute', 'admin', 'storage']: self.set_metadata(p) return names = [] for x in cmd(['gcloud', 'compute', 'instances', 'list']).splitlines()[1:]: v = x.split() if v[-1] != 'RUNNING': continue name = v[0] if name.startswith(prefix) and 'devel' not in name: #TODO names.append(name) names = ' '.join(names) cmd(['gcloud', 'compute', 'project-info', 'add-metadata', '--metadata', '%s-servers=%s'%(prefix, names)])
def dev_instances(self): a = [] for x in cmd(['gcloud', 'compute', 'instances', 'list']).splitlines()[1:]: name = x.split()[0] if name.startswith('dev'): a.append(name) return a
def instance_costs(self): cost = cost_upper = 0 n_compute = 0 n_smc = 0 n_other = 0 for x in cmd(['gcloud', 'compute', 'instances', 'list'], verbose=0).splitlines()[1:]: v = x.split() zone = v[1] machine_type = v[2] status = v[-1] if v[0].startswith('compute'): n_compute += 1 elif v[0].startswith('smc'): n_smc += 1 else: n_other += 1 if status == "RUNNING": t = machine_type.split('-') if len(t) == 3: b = '-'.join(t[:2]) cpus = int(t[2]) else: b = machine_type cpus = 1 cost += PRICING[b+'-month'] * cpus * PRICING[zone.split('-')[0]] cost_upper += PRICING[b+'-hour'] *30.5*24* cpus * PRICING[zone.split('-')[0]] log("INSTANCES : compute=%s, smc=%s, other=%s: %s/month (or %s/month with sustained use)", n_compute, n_smc, n_other, money(cost_upper), money(cost)) return cost_upper
def compute_nodes(self, zone='us-central1-c'): # names of the compute nodes in the given zone, with the zone postfix and compue prefix removed. n = len("compute") def f(name): return name[n:name.rfind('-')] info = json.loads(cmd(['gcloud', 'compute', 'instances', 'list', '-r', '^compute.*', '--format=json'], verbose=0)) return [f(x['name']) for x in info if x['zone'] == zone and f(x['name'])]
def delete_secrets(): log("delete any possible sensitive info from the production install") log("wipe root ssh keys") cmd("sudo rm -f /root/.ssh/id_rsa /root/.ssh/id_rsa.pub") log("wipe salvus ssh keys") cmd("sudo rm -rf /home/salvus/.ssh/id_rsa*") log("wipe salvus secrets") cmd("sudo rm -rf /home/salvus/salvus/salvus/data/secrets/") log("wipe production logs") cmd("sudo rm -rf /home/salvus/logs/*")
def instance_costs(self): cost_lower = cost_upper = 0 n_compute = 0 n_web = 0 n_db = 0 n_other = 0 n_dev =0 n_admin =0 n_storage =0 n_preempt = 0 for x in cmd(['gcloud', 'compute', 'instances', 'list'], verbose=0).splitlines()[1:]: v = x.split() zone = v[1] machine_type = v[2] status = v[-1] if status != 'RUNNING': continue if len(v) == 7: preempt = (v[3] == 'true') n_preempt += 1 else: preempt = False if v[0].startswith('compute'): n_compute += 1 elif v[0].startswith('web'): n_web += 1 elif v[0].startswith('db'): n_db += 1 elif v[0].startswith('dev'): n_dev += 1 elif v[0].startswith('admin'): n_admin += 1 elif v[0].startswith('storage'): n_storage += 1 else: n_other += 1 t = machine_type.split('-') if len(t) == 3: b = '-'.join(t[:2]) cpus = int(t[2]) else: b = machine_type cpus = 1 if b == 'custom': print("warning -custom machine types not supported; skipping ", x) continue if preempt: pricing_hour = PRICING[b+'-hour-pre'] pricing_month = pricing_hour*24*30.5 else: pricing_hour = PRICING[b+'-hour'] pricing_month = PRICING[b+'-month'] cost_lower += pricing_month * cpus * PRICING[zone.split('-')[0]] cost_upper += pricing_hour *30.5*24* cpus * PRICING[zone.split('-')[0]] log("INSTANCES : %8s/month -- (or %8s/month without sustained!); compute=%s, web=%s, db=%s, dev=%s, admin=%s, storage=%s, other=%s (preempt=%s)", money(cost_lower), money(cost_upper), n_compute, n_web, n_db, n_dev, n_admin, n_storage, n_other, n_preempt) return {'lower':cost_lower, 'upper':cost_upper}
def _create_compute_server(self, node, zone='us-central1-c', machine_type='n1-highmem-4', network='default', projects_ssd=False, base_ssd=False, projects_size=150, devel=False): zone = self.expand_zone(zone) name = self.instance_name(node=node, prefix='compute', zone=zone, devel=devel) log("creating root filesystem image") try: opts = ['gcloud', 'compute', '--project', self.project, 'disks', 'create', name, '--zone', zone, '--source-snapshot', self.newest_snapshot('compute')] if base_ssd: opts.extend(['--type', 'pd-ssd']) cmd(opts) except Exception, mesg: if 'already exists' not in str(mesg): raise log("%s already exists", name)
def _create_storage_server(self, node, zone, machine_type, disk_size, network, devel): zone = self.expand_zone(zone) name = self.instance_name(node=node, prefix='storage', zone=zone, devel=devel) disk_name = "%s-projects" % name log("creating hard disk root filesystem image") try: cmd([ 'gcloud', 'compute', '--project', self.project, 'disks', 'create', name, '--zone', zone, '--source-snapshot', self.newest_snapshot('storage'), '--type', 'pd-standard' ]) except Exception, mesg: if 'already exists' not in str(mesg): raise
def create_data_snapshot(self, node, prefix, zone='us-central1-c', devel=False): """ Snapshot the data disk on the given machine. Typically used for backing up very important data. """ zone = self.expand_zone(zone) instance_name = self.instance_name(node, prefix, zone, devel=devel) info = json.loads( cmd([ 'gcloud', 'compute', 'instances', 'describe', instance_name, '--zone', zone, '--format=json' ], verbose=0)) errors = [] for disk in info['disks']: # ignore boot disks (would be True) if disk.get('boot', False): continue # ignore read-only disks (like for globally mounted data volumes) # they would be snapshotted manually if disk.get('mode', 'READ_WRITE') == 'READ_ONLY': continue src = disk['source'].split('/')[-1] if 'swap' in src: continue if 'tmp' in src: continue target = 'data-%s-%s' % (src, time.strftime(TIMESTAMP_FORMAT)) log("%s --> %s", src, target) try: cmd([ 'gcloud', 'compute', 'disks', 'snapshot', '--project', self.project, src, '--snapshot-names', target, '--zone', zone ], system=True) except Exception, mesg: log("WARNING: issue making snapshot %s -- %s", target, mesg) errors.append(mesg)
def create_boot_snapshot(self, node, prefix, zone='us-central1-c', devel=False): """ Snapshot the boot disk on the give machine. Typically used for replicating configuration. """ zone = self.expand_zone(zone) instance_name = self.instance_name(node, prefix, zone, devel=devel) snapshot_name = "%s%s-%s" % (prefix, node, time.strftime(TIMESTAMP_FORMAT)) cmd([ 'gcloud', 'compute', 'disks', 'snapshot', '--project', self.project, instance_name, '--snapshot-names', snapshot_name, '--zone', zone ], system=True)
def snapshot_usage(self): # in gigabytes usage = 0 for s in json.loads( cmd([ 'gcloud', 'compute', 'snapshots', 'list', '--format', 'json' ], verbose=0)): usage += float(s["storageBytes"]) / 1000 / 1000 / 1000. return int(math.ceil(usage))
def set_metadata(self, prefix=''): if not prefix: for p in ['smc', 'compute', 'admin', 'storage']: self.set_metadata(p) return names = [] for x in cmd(['gcloud', 'compute', 'instances', 'list']).splitlines()[1:]: v = x.split() if v[-1] != 'RUNNING': continue name = v[0] if name.startswith(prefix) and 'devel' not in name: #TODO names.append(name) names = ','.join(names) cmd([ 'gcloud', 'compute', 'project-info', 'add-metadata', '--metadata', "%s-servers=%s" % (prefix, names) ])
def autostart(self, instance): """ Ensure that each instance in the input is running. """ for x in cmd(['gcloud', 'compute', 'instances', 'list'], verbose=0).splitlines()[1:]: v = x.split() if len(v) > 2 and v[-1] != 'RUNNING': name = v[0] zone = v[1] for x in instance: if name.startswith(x): log("Starting %s... at %s", name, time.asctime()) cmd(' '.join([ 'gcloud', 'compute', 'instances', 'start', '--zone', zone, name ]) + '&', system=True) break
def snapshot_usage(self): # in gigabytes usage = 0 for s in json.loads( cmd([ 'gcloud', 'compute', 'snapshots', 'list', '--format', 'json' ], verbose=0)): # storageBytes need not be set, e.g., while snapshot is being made. usage += float(s.get("storageBytes", 0)) / 1000 / 1000 / 1000. return int(math.ceil(usage))
def _create_compute_server(self, node, zone='us-central1-c', machine_type='n1-highmem-4', network='default', projects_ssd=False, base_ssd=False, projects_size=150, devel=False, address=None, preemptible=False): zone = self.expand_zone(zone) name = self.instance_name(node=node, prefix='compute', zone=zone, devel=devel) log("creating root filesystem image") try: opts = ['gcloud', 'compute', '--project', self.project, 'disks', 'create', name, '--zone', zone, '--source-snapshot', self.newest_snapshot('compute')] if base_ssd: opts.extend(['--type', 'pd-ssd']) cmd(opts) except Exception, mesg: if 'already exists' not in str(mesg): raise log("%s already exists", name)
def delete_all_old_snapshots(self, max_age_days=7, quiet=False): snapshots = [x.split()[0] for x in cmd(['gcloud', 'compute', 'snapshots', 'list']).splitlines()[1:]] log("snapshots=%s", snapshots) # restrict to snapshots that end with a timestamp # and for each restructure by base w = {} n = len('2015-05-03-081013') for s in snapshots: try: time.strptime(s[-n:], TIMESTAMP_FORMAT) base = s[:-n] if base in w: w[base].append(s[-n:]) else: w[base] = [s[-n:]] except: pass print w # now decide what to delete to_delete = [] cutoff = time.strftime(TIMESTAMP_FORMAT, time.gmtime(time.time()-60*60*24*max_age_days)) for base in w: v = w[base] v.sort() if len(v) <= 1 or v[0] >= cutoff: # definitely don't delete last one or if all are new continue for x in v: if x < cutoff: to_delete.append(base + x) if len(to_delete) == 0: log("no old snapshots to delete") else: log("deleting these snapshots: %s", to_delete) a = ['gcloud', 'compute', 'snapshots', 'delete'] if quiet: a.append("--quiet") cmd(a + to_delete, system=True)
def compute_nodes(self, zone='us-central1-c'): # names of the compute nodes in the given zone, with the zone postfix and compue prefix removed. n = len("compute") def f(name): return name[n:name.rfind('-')] info = json.loads( cmd([ 'gcloud', 'compute', 'instances', 'list', '-r', '^compute.*', '--format=json' ], verbose=0)) return [f(x['name']) for x in info if x['zone'] == zone]
def create_dev(self, node, zone='us-central1-c', machine_type='n1-standard-1', size=30, preemptible=True, address=''): zone = self.expand_zone(zone) name = self.instance_name(node=node, prefix='dev', zone=zone) log( "creating %sGB hard disk root filesystem image based on last smc snapshot", size) try: cmd([ 'gcloud', 'compute', '--project', self.project, 'disks', 'create', name, '--zone', zone, '--source-snapshot', self.newest_snapshot('smc'), '--size', size, '--type', 'pd-standard' ]) except Exception, mesg: if 'already exists' not in str(mesg): raise
def init_compute_server(): log("starting compute server") cmd("compute start") log("making log link: ~/logs/compute.log") cmd("ln -sf /projects/conf/compute.log %s/logs/compute.log"%os.environ['HOME']) log("waiting a few seconds") import time; time.sleep(5) log("adding compute server to database") cmd(r"""echo "require('compute').compute_server(keyspace:'salvus', cb:(e,s)->console.log(e); s.add_server(host:'%s', cb:(e)->console.log('done',e);process.exit(0)))" | coffee """%hostname)
def snapshots(self, prefix, devel=False): w = [] usage = 0 if devel: p = 'devel-%s'%prefix else: p = prefix for x in cmd(['gcloud', 'compute', 'snapshots', 'list'], verbose=0).splitlines()[1:]: v = x.split() if len(v) > 0: if v[0].startswith(p): w.append(v[0]) usage += int(v[1]) w.sort() return w
def create_dev(self, node, zone='us-central1-c', machine_type='n1-standard-1', size=30, preemptible=True, address=''): zone = self.expand_zone(zone) name = self.instance_name(node=node, prefix='dev', zone=zone) log("creating %sGB hard disk root filesystem image based on last smc snapshot", size) try: cmd(['gcloud', 'compute', '--project', self.project, 'disks', 'create', name, '--zone', zone, '--source-snapshot', self.newest_snapshot('smc'), '--size', size, '--type', 'pd-standard']) except Exception as mesg: if 'already exists' not in str(mesg): raise log("create and starting dev compute instance") opts = ['gcloud', 'compute', '--project', self.project, 'instances', 'create', name, '--zone', zone, '--machine-type', machine_type] + \ (['--preemptible'] if preemptible else []) + \ ['--tags', 'http-server,https-server,dev', '--disk', 'name=%s,device-name=%s,mode=rw,boot=yes'%(name, name)] if address: opts.extend(["--address", address]) cmd(opts, system=True)
def _create_smc_server(self, node, zone='us-central1-c', machine_type='n1-highmem-2', disk_size=100, network='default', devel=False): zone = self.expand_zone(zone) name = self.instance_name(node=node, prefix='smc', zone=zone, devel=devel) disk_name = "%s-cassandra" % name log("creating hard disk root filesystem image") try: cmd([ 'gcloud', 'compute', '--project', self.project, 'disks', 'create', name, '--zone', zone, '--source-snapshot', self.newest_snapshot('smc'), '--type', 'pd-standard' ]) except Exception, mesg: if 'already exists' not in str(mesg): raise
def disk_costs(self): cost = 0 usage_standard = 0 usage_ssd = 0 for x in cmd(['gcloud', 'compute', 'disks', 'list'], verbose=0).splitlines()[1:]: v = x.split() size = int(v[2]) typ = v[3] if typ == 'pd-ssd': usage_ssd += size elif typ == 'pd-standard': usage_standard += size cost += size * PRICING[typ] log("DISK : %8s/month -- storage (standard=%sGB, ssd=%sGB)", money(cost), usage_standard, usage_ssd) return cost
def _create_compute_server(self, node, zone='us-central1-c', machine_type='n1-highmem-4', network='default', projects_ssd=False, base_ssd=False, projects_size=150, devel=False, address=None, preemptible=False): zone = self.expand_zone(zone) name = self.instance_name(node=node, prefix='compute', zone=zone, devel=devel) log("creating root filesystem image") try: opts = ['gcloud', 'compute', '--project', self.project, 'disks', 'create', name, '--zone', zone, '--source-snapshot', self.newest_snapshot('compute')] if base_ssd: opts.extend(['--type', 'pd-ssd']) cmd(opts) except Exception as mesg: if 'already exists' not in str(mesg): raise log("%s already exists", name) log("creating /dev/sdb persistent disk") disk_name = "%s-projects"%name try: opts = ['gcloud', 'compute', '--project', self.project, 'disks', 'create', disk_name, '--size', projects_size, '--zone', zone] if projects_ssd: opts.extend(['--type', 'pd-ssd']) cmd(opts) except Exception as mesg: if 'already exists' not in str(mesg): raise log("creating and starting compute instance") opts =['gcloud', 'compute', '--project', self.project, 'instances', 'create', name, '--zone', zone, '--machine-type', machine_type, '--network', network] if address: opts.extend(["--address", address]) if preemptible: opts.append('--preemptible') else: opts.extend(['--maintenance-policy', 'MIGRATE']) opts.extend(['--scopes', 'https://www.googleapis.com/auth/logging.write', '--disk', 'name=%s,device-name=%s,mode=rw,boot=yes'%(name, name)]) opts.extend(['--disk', 'name=%s'%disk_name, 'device-name=%s'%disk_name, 'mode=rw']) opts.extend(['--tags', 'compute']) #if local_ssd: # opts.append('--local-ssd') #else: cmd(opts, system=True) if devel: self.set_boot_auto_delete(name=name, zone=zone)
def instance_costs(self): cost_lower = cost_upper = 0 n_compute = 0 n_smc = 0 n_other = 0 n_preempt = 0 for x in cmd(['gcloud', 'compute', 'instances', 'list'], verbose=0).splitlines()[1:]: v = x.split() zone = v[1] machine_type = v[2] status = v[-1] if status != 'RUNNING': continue if len(v) == 7: preempt = (v[3] == 'true') n_preempt += 1 else: preempt = False if v[0].startswith('compute'): n_compute += 1 elif v[0].startswith('smc'): n_smc += 1 else: n_other += 1 t = machine_type.split('-') if len(t) == 3: b = '-'.join(t[:2]) cpus = int(t[2]) else: b = machine_type cpus = 1 if preempt: pricing_hour = PRICING[b + '-hour-pre'] pricing_month = pricing_hour * 24 * 30.5 else: pricing_hour = PRICING[b + '-hour'] pricing_month = PRICING[b + '-month'] cost_lower += pricing_month * cpus * PRICING[zone.split('-')[0]] cost_upper += pricing_hour * 30.5 * 24 * cpus * PRICING[zone.split( '-')[0]] log( "INSTANCES : compute=%s, smc=%s, other=%s (preempt=%s): %s/month (or %s/month with sustained use)", n_compute, n_smc, n_other, n_preempt, money(cost_upper), money(cost_lower)) return {'lower': cost_lower, 'upper': cost_upper}
def init_compute_server(): log("starting compute server") cmd("compute start") log("making log link: ~/logs/compute.log") cmd("ln -sf /projects/conf/compute.log %s/logs/compute.log" % os.environ['HOME']) log("waiting a few seconds") import time time.sleep(5) log("adding compute server to database") cmd(r"""echo "require('compute').compute_server(keyspace:'salvus', cb:(e,s)->console.log(e); s.add_server(host:'%s', cb:(e)->console.log('done',e);process.exit(0)))" | coffee """ % hostname)
def init_cassandra_users(): pw_hub = open("%s/cassandra/hub"%SECRETS).read() cmd("""echo "CREATE USER hub WITH PASSWORD '%s' SUPERUSER;" | cqlsh localhost -u cassandra -p cassandra"""%pw_hub, verbose=0) rc = "%s/.cqlshrc"%os.environ['HOME'] log("writing %s", rc) open(rc, 'w').write(""" [authentication] username=hub password=%s """%pw_hub) pw_salvus = open("%s/cassandra/salvus"%SECRETS).read() cmd("""echo "CREATE USER salvus WITH PASSWORD '%s' SUPERUSER;" | cqlsh localhost -u cassandra -p cassandra"""%pw_salvus, verbose=0) cmd("""echo "ALTER USER cassandra WITH PASSWORD '%s';" | cqlsh localhost -u cassandra -p cassandra"""%pw_hub, verbose=0)
def instance_costs(self): cost_lower = cost_upper = 0 n_compute = 0 n_smc = 0 n_other = 0 n_preempt = 0 for x in cmd(['gcloud', 'compute', 'instances', 'list'], verbose=0).splitlines()[1:]: v = x.split() zone = v[1] machine_type = v[2] status = v[-1] if status != 'RUNNING': continue if len(v) == 7: preempt = (v[3] == 'true') n_preempt += 1 else: preempt = False if v[0].startswith('compute'): n_compute += 1 elif v[0].startswith('smc'): n_smc += 1 else: n_other += 1 t = machine_type.split('-') if len(t) == 3: b = '-'.join(t[:2]) cpus = int(t[2]) else: b = machine_type cpus = 1 if preempt: pricing_hour = PRICING[b+'-hour-pre'] pricing_month = pricing_hour*24*30.5 else: pricing_hour = PRICING[b+'-hour'] pricing_month = PRICING[b+'-month'] cost_lower += pricing_month * cpus * PRICING[zone.split('-')[0]] cost_upper += pricing_hour *30.5*24* cpus * PRICING[zone.split('-')[0]] log("INSTANCES : compute=%s, smc=%s, other=%s (preempt=%s): %s/month (or %s/month with sustained use)", n_compute, n_smc, n_other, n_preempt, money(cost_upper), money(cost_lower)) return {'lower':cost_lower, 'upper':cost_upper}
def _create_storage_server(self, node, zone, machine_type, disk_size, network, devel): zone = self.expand_zone(zone) name = self.instance_name(node=node, prefix='storage', zone=zone, devel=devel) disk_name = "%s-projects"%name log("creating hard disk root filesystem image") try: cmd(['gcloud', 'compute', '--project', self.project, 'disks', 'create', name, '--zone', zone, '--source-snapshot', self.newest_snapshot('storage'), '--type', 'pd-standard']) except Exception as mesg: if 'already exists' not in str(mesg): raise if disk_size: log("creating persistent disk on which to store projects") try: cmd(['gcloud', 'compute', '--project', self.project, 'disks', 'create', disk_name, '--size', disk_size, '--zone', zone, '--type', 'pd-standard']) except Exception as mesg: if 'already exists' not in str(mesg): raise log("create storage compute instance") opts = (['gcloud', 'compute', '--project', self.project, 'instances', 'create', name, '--zone', zone, '--tags', 'storage', '--machine-type', machine_type, '--network', network, '--maintenance-policy', 'MIGRATE', '--scopes'] + ([] if devel else ['https://www.googleapis.com/auth/devstorage.full_control']) + ['https://www.googleapis.com/auth/logging.write', '--disk=name=%s,device-name=%s,mode=rw,boot=yes'%(name, name)] + ([] if devel else ['--no-boot-disk-auto-delete']) ) if disk_size: opts.extend(['--disk=name=%s,device-name=%s,mode=rw'%(disk_name, disk_name)]) try: cmd(opts) except Exception as mesg: if 'already exists' not in str(mesg): raise if devel: self.set_boot_auto_delete(name=name, zone=zone)
def init_cassandra_users(): pw_hub = open("%s/cassandra/hub" % SECRETS).read() cmd("""echo "CREATE USER hub WITH PASSWORD '%s' SUPERUSER;" | cqlsh localhost -u cassandra -p cassandra""" % pw_hub, verbose=0) rc = "%s/.cqlshrc" % os.environ['HOME'] log("writing %s", rc) open(rc, 'w').write(""" [authentication] username=hub password=%s """ % pw_hub) pw_salvus = open("%s/cassandra/salvus" % SECRETS).read() cmd("""echo "CREATE USER salvus WITH PASSWORD '%s' SUPERUSER;" | cqlsh localhost -u cassandra -p cassandra""" % pw_salvus, verbose=0) cmd("""echo "ALTER USER cassandra WITH PASSWORD '%s';" | cqlsh localhost -u cassandra -p cassandra""" % pw_hub, verbose=0)
def _create_smc_server(self, node, zone='us-central1-c', machine_type='n1-highmem-2', disk_size=100, network='default', devel=False): zone = self.expand_zone(zone) name = self.instance_name(node=node, prefix='smc', zone=zone, devel=devel) disk_name = "%s-cassandra"%name log("creating hard disk root filesystem image") try: cmd(['gcloud', 'compute', '--project', self.project, 'disks', 'create', name, '--zone', zone, '--source-snapshot', self.newest_snapshot('smc'), '--type', 'pd-standard']) except Exception as mesg: if 'already exists' not in str(mesg): raise if disk_size: log("creating persistent SSD disk on which to store Cassandra's files") try: cmd(['gcloud', 'compute', '--project', self.project, 'disks', 'create', disk_name, '--size', disk_size, '--zone', zone, '--type', 'pd-ssd']) except Exception as mesg: if 'already exists' not in str(mesg): raise log("create and starting smc compute instance") opts = ['gcloud', 'compute', '--project', self.project, 'instances', 'create', name, '--zone', zone, '--machine-type', machine_type, '--network', network, '--maintenance-policy', 'MIGRATE', '--scopes', 'https://www.googleapis.com/auth/logging.write', '--tags', 'http-server,https-server,hub', '--disk', 'name=%s'%name, 'device-name=%s'%name, 'mode=rw', 'boot=yes', ] if disk_size: opts.extend(['--disk', 'name=%s'%disk_name, 'device-name=%s'%disk_name, 'mode=rw']) cmd(opts, system=True) if devel: self.set_boot_auto_delete(name=name, zone=zone)
def install_startup_script(): # startup: # - run udpate script part that involves building stuff (not downloading) # - start compute daemon # - start all services cmd("crontab %s/scripts/dev/crontab.bak" % SALVUS_ROOT)
def create_data_secrets(): cmd("mkdir -p %s"%SECRETS) log("sendgrid fake password (will not work)") cmd("echo 'will-not-work' > %s/sendgrid_email_password"%SECRETS) log("generate cassandra passwords") cmd("mkdir -p %s/cassandra"%SECRETS) cmd("makepasswd -q > %s/cassandra/hub"%SECRETS) cmd("makepasswd -q > %s/cassandra/salvus"%SECRETS) cmd("mkdir -p %s/sagemath.com"%SECRETS) cmd("yes US | openssl req -x509 -newkey rsa:2048 -keyout key.pem -out cert.pem -nodes -days 10000 && cat key.pem cert.pem > %s/sagemath.com/nopassphrase.pem"%SECRETS)
def create_ssh_keys(): log("create new secrets for use in this dev image") log("generate salvus ssh key") cmd('ssh-keygen -b2048 -t rsa -N "" -f ~/.ssh/id_rsa',system=True) cmd('cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys') cmd('ssh-keyscan -H localhost >> ~/.ssh/known_hosts') log("generate root ssh key") cmd('sudo ssh-keygen -b2048 -t rsa -N "" -f /root/.ssh/id_rsa', system=True) cmd('sudo cat /root/.ssh/id_rsa.pub | sudo tee /root/.ssh/authorized_keys') cmd('sudo ssh-keyscan -H localhost | sudo tee /root/.ssh/known_hosts')
def update_rep(): log("update repo") cmd("git stash save xxx; git pull")
def create_data_secrets(): cmd("mkdir -p %s" % SECRETS) log("sendgrid fake password (will not work)") cmd("echo 'will-not-work' > %s/sendgrid_email_password" % SECRETS) log("generate cassandra passwords") cmd("mkdir -p %s/cassandra" % SECRETS) cmd("makepasswd -q > %s/cassandra/hub" % SECRETS) cmd("makepasswd -q > %s/cassandra/salvus" % SECRETS) cmd("mkdir -p %s/sagemath.com" % SECRETS) cmd("yes US | openssl req -x509 -newkey rsa:2048 -keyout key.pem -out cert.pem -nodes -days 10000 && cat key.pem cert.pem > %s/sagemath.com/nopassphrase.pem" % SECRETS)
def setup_projects_path(): log("create paths") cmd("sudo mkdir -p /projects") cmd("sudo chmod a+x /projects") cmd("sudo touch /projects/snapshots; sudo chmod a+r /projects/snapshots") cmd("sudo mkdir -p /projects/conf") cmd("sudo chown salvus. /projects/conf") cmd("sudo mkdir -p /projects/sagemathcloud") cmd("sudo rsync -LrxH --delete /home/salvus/salvus/salvus/local_hub_template/ /projects/sagemathcloud/" )
def snapshot_usage(self): # in gigabytes usage = 0 for s in json.loads(cmd(['gcloud', 'compute', 'snapshots', 'list', '--format', 'json'], verbose=0)): # storageBytes need not be set, e.g., while snapshot is being made. usage += float(s.get("storageBytes",0))/1000/1000/1000. return int(math.ceil(usage))
def instance_costs(self): cost_lower = cost_upper = 0 n_compute = 0 n_web = 0 n_db = 0 n_other = 0 n_dev = 0 n_admin = 0 n_storage = 0 n_preempt = 0 for x in cmd(['gcloud', 'compute', 'instances', 'list'], verbose=0).splitlines()[1:]: v = x.split() zone = v[1] machine_type = v[2] status = v[-1] if status != 'RUNNING': continue if len(v) == 7: preempt = (v[3] == 'true') n_preempt += 1 else: preempt = False if v[0].startswith('compute'): n_compute += 1 elif v[0].startswith('web'): n_web += 1 elif v[0].startswith('db'): n_db += 1 elif v[0].startswith('dev'): n_dev += 1 elif v[0].startswith('admin'): n_admin += 1 elif v[0].startswith('storage'): n_storage += 1 else: n_other += 1 t = machine_type.split('-') if len(t) == 3: b = '-'.join(t[:2]) cpus = int(t[2]) else: b = machine_type cpus = 1 if b == 'custom': print("warning -custom machine types not supported; skipping ", x) continue if preempt: pricing_hour = PRICING[b + '-hour-pre'] pricing_month = pricing_hour * 24 * 30.5 else: pricing_hour = PRICING[b + '-hour'] pricing_month = PRICING[b + '-month'] cost_lower += pricing_month * cpus * PRICING[zone.split('-')[0]] cost_upper += pricing_hour * 30.5 * 24 * cpus * PRICING[zone.split( '-')[0]] log( "INSTANCES : %8s/month -- (or %8s/month without sustained!); compute=%s, web=%s, db=%s, dev=%s, admin=%s, storage=%s, other=%s (preempt=%s)", money(cost_lower), money(cost_upper), n_compute, n_web, n_db, n_dev, n_admin, n_storage, n_other, n_preempt) return {'lower': cost_lower, 'upper': cost_upper}
opts =['gcloud', 'compute', '--project', self.project, 'instances', 'create', name, '--zone', zone, '--machine-type', machine_type, '--network', network] if address: opts.extend(["--address", address]) if preemptible: opts.append('--preemptible') else: opts.extend(['--maintenance-policy', 'MIGRATE']) opts.extend(['--scopes', 'https://www.googleapis.com/auth/logging.write', '--disk', 'name=%s,device-name=%s,mode=rw,boot=yes'%(name, name)]) opts.extend(['--disk', 'name=%s'%disk_name, 'device-name=%s'%disk_name, 'mode=rw']) opts.extend(['--tags', 'compute']) #if local_ssd: # opts.append('--local-ssd') #else: cmd(opts, system=True) if devel: self.set_boot_auto_delete(name=name, zone=zone) def create_compute_server0(self, node, zone='us-central1-c', machine_type='n1-highmem-4', preemptible=False, address=None): self._create_compute_server(node=node, zone=zone, machine_type=machine_type, projects_ssd=True, projects_size=150, base_ssd=True, network='default', address=address, preemptible=preemptible) def create_compute_server(self, node, zone='us-central1-c', machine_type='n1-highmem-4', projects_size=500, preemptible=False, address=None):
def set_boot_auto_delete(self, name, zone): log("set boot disk of %s to auto-delete"%name) cmd(['gcloud', 'compute', '--project', self.project, 'instances', 'set-disk-auto-delete', name, '--zone', zone, '--disk', name, '--auto-delete'])
def get_sage_install(): log("installling packages so that sage and latex will work") cmd("sudo apt-get install -y libatlas3gf-base liblapack-dev texlive", system=True) log("get copy of sage install (about 5-10 minutes)") cmd("sudo mkdir -p /usr/local/sage/current") cmd("sudo chown -R salvus. /usr/local/sage") cmd("chmod a+rx /usr/local/sage/ /usr/local/sage/current/") log("getting local_hub_template from a compute machine") cmd('ssh-keyscan -H %s > ~/.ssh/known_hosts' % compute_vm) cmd("rsync -axH %s:/home/salvus/salvus/salvus/local_hub_template/ /home/salvus/salvus/salvus/local_hub_template/" % compute_vm) v = cmd("ssh %s ls /projects/sage/" % compute_vm).splitlines() v.sort() v = [x for x in v if x.startswith('sage-')] cur = v[-1] log("newest version=%s", cur) cmd("rsync -axH %s:/projects/sage/%s/ /usr/local/sage/current/" % (compute_vm, cur), system=True) log('get jupyter kernel conf') cmd("rsync -axH %s:/usr/local/share/jupyter/ /tmp/jupyter && sudo rsync -axH /tmp/jupyter/ /usr/local/share/jupyter/" % compute_vm, system=True) log("create link") cmd("sudo ln -sf /usr/local/sage/current/sage /usr/local/bin/sage") log("run sage once") cmd("umask 022; /usr/local/bin/sage -b < /dev/null")
opts.extend(['--maintenance-policy', 'MIGRATE']) opts.extend([ '--scopes', 'https://www.googleapis.com/auth/logging.write', '--disk', 'name=%s,device-name=%s,mode=rw,boot=yes' % (name, name) ]) opts.extend([ '--disk', 'name=%s' % disk_name, 'device-name=%s' % disk_name, 'mode=rw' ]) opts.extend(['--tags', 'compute']) #if local_ssd: # opts.append('--local-ssd') #else: cmd(opts, system=True) if devel: self.set_boot_auto_delete(name=name, zone=zone) def create_compute_server0(self, node, zone='us-central1-c', machine_type='n1-highmem-4', preemptible=False, address=None): self._create_compute_server(node=node, zone=zone, machine_type=machine_type, projects_ssd=True, projects_size=150,
def create_ssh_keys(): log("create new secrets for use in this dev image") log("generate salvus ssh key") cmd('ssh-keygen -b2048 -t rsa -N "" -f ~/.ssh/id_rsa', system=True) cmd('cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys') cmd('ssh-keyscan -H localhost >> ~/.ssh/known_hosts') log("generate root ssh key") cmd('sudo ssh-keygen -b2048 -t rsa -N "" -f /root/.ssh/id_rsa', system=True) cmd('sudo cat /root/.ssh/id_rsa.pub | sudo tee /root/.ssh/authorized_keys' ) cmd('sudo ssh-keyscan -H localhost | sudo tee /root/.ssh/known_hosts')
class GCE(object): def __init__(self): self.project = os.environ.get("SMC_PROJECT", "sage-math-inc") def instance_name(self, node, prefix, zone, devel=False): # the zone names have got annoyingly non-canonical... if prefix.startswith('smc'): zone = "-" + self.expand_zone(zone) elif prefix.startswith('compute'): zone = "-" + self.short_zone(zone) else: zone = '' return '%s%s%s%s' % (prefix, node, '-devel' if devel else '', zone) def snapshots(self, prefix, devel=False): w = [] usage = 0 if devel: p = 'devel-%s' % prefix else: p = prefix for x in cmd(['gcloud', 'compute', 'snapshots', 'list'], verbose=0).splitlines()[1:]: v = x.split() if len(v) > 0: if v[0].startswith(p): w.append(v[0]) usage += int(v[1]) w.sort() return w def newest_snapshot(self, prefix=''): return self.snapshots(prefix)[-1] def short_zone(self, zone): return zone.split('-')[0] def expand_zone(self, zone): # See https://cloud.google.com/compute/docs/zones # Haswell processors are much better than Ivy Bridge and Sandy Bridge. if zone == 'us': return 'us-central1-c' elif zone == 'eu' or zone == 'europe': return 'europe-west1-d' elif zone == 'asia': return 'asia-east1-c' # not Haswell else: return zone def _create_compute_server(self, node, zone='us-central1-c', machine_type='n1-highmem-4', network='default', projects_ssd=False, base_ssd=False, projects_size=150, devel=False, address=None, preemptible=False): zone = self.expand_zone(zone) name = self.instance_name(node=node, prefix='compute', zone=zone, devel=devel) log("creating root filesystem image") try: opts = [ 'gcloud', 'compute', '--project', self.project, 'disks', 'create', name, '--zone', zone, '--source-snapshot', self.newest_snapshot('compute') ] if base_ssd: opts.extend(['--type', 'pd-ssd']) cmd(opts) except Exception, mesg: if 'already exists' not in str(mesg): raise log("%s already exists", name) log("creating /dev/sdb persistent disk") disk_name = "%s-projects" % name try: opts = [ 'gcloud', 'compute', '--project', self.project, 'disks', 'create', disk_name, '--size', projects_size, '--zone', zone ] if projects_ssd: opts.extend(['--type', 'pd-ssd']) cmd(opts) except Exception, mesg: if 'already exists' not in str(mesg): raise