def main(): parser = setupArgs() args = parser.parse_args() opsc = lcm.OpsCenter(args.opsc_ip, args.opscuser, args.opscpw) # Block waiting for OpsC to spin up, create session & login if needed opsc.setupSession(pause=args.pause, trys=args.trys) opsc.waitForCluster(cname=args.clustername, pause=args.pause, trys=args.trys) # Block until cluster created clusters = opsc.session.get( "{url}/api/v2/lcm/clusters/".format(url=opsc.url)).json() for r in clusters['results']: if r['name'] == args.clustername: cid = r['id'] opsc.waitForNodes(numnodes=args.clustersize, pause=args.pause, trys=args.trys) if args.dclevel: datacenters = opsc.session.get( "{url}/api/v2/lcm/datacenters/".format(url=opsc.url)).json() for r in datacenters['results']: dcid = r['id'] print "Triggering install for DC, id = {i}".format(i=dcid) opsc.triggerInstall(None, dcid) else: print "Triggering install for cluster, id = {i}".format(i=cid) opsc.triggerInstall(cid, None)
def main(): parser = setupArgs() args = parser.parse_args() opsc = lcm.OpsCenter(args.opsc_ip, args.opscuser, args.opscpw) # Block waiting for OpsC to spin up, create session & login if needed opsc.setupSession(pause=args.pause, trys=args.trys) # get cluster id, assume 1 cluster clusterconf = opsc.session.get( "{url}/cluster-configs".format(url=opsc.url)).json() cid = clusterconf.keys()[0] # get all node configs nodes = opsc.session.get("{url}/{id}/nodes".format(url=opsc.url, id=cid)).json() # loop of configs, counting nodes in each dc datacenters = {} for n in nodes: if n['dc'] in datacenters: datacenters[n['dc']] += 1 else: datacenters[n['dc']] = 1 # reuse dict for post data in REST call # min(3,#) handles edge case where # of nodes < 3 for d in datacenters: datacenters[d] = min(3, datacenters[d]) # keyspaces to alter # leaving out LocalStrategy (system & system_schema) and EverywhereStrategy (dse_system & solr_admin) keyspaces = [ "system_auth", "system_distributed", "system_traces", "dse_analytics", "dse_security", "dse_perf", "dse_leases", "cfs_archive", "spark_system", "cfs", "dsefs", "OpsCenter", "HiveMetaStore" ] postdata = { "strategy_class": "NetworkTopologyStrategy", "strategy_options": datacenters, "durable_writes": True } rawjson = json.dumps(postdata) # loop over keyspaces print "Looping over keyspaces: {k}".format(k=keyspaces) print "NOTE: No response indicates success" # keep track of non-sucess keyspaces to skip repairing skip = [] for ks in keyspaces: print "Calling: PUT {url}/{id}/keyspaces/{ks} with {d}".format( url=opsc.url, id=cid, ks=ks, d=rawjson) response = opsc.session.put("{url}/{id}/keyspaces/{ks}".format( url=opsc.url, id=cid, ks=ks), data=rawjson).json() print "Response: " if response != None: # add to keyspaces to skip skip.append(ks) print "Non-success for keyspace: {ks}, excluding later...".format( ks=ks) lcm.pretty(response) print "Calling repair on all keyspaces/nodes:" print "Skipping keyspaces: {s}".format(s=skip)
def main(): parser = setupArgs() args = parser.parse_args() opsc = lcm.OpsCenter(args.opsc_ip, args.opscuser, args.opscpw) # Block waiting for OpsC to spin up, create session & login if needed opsc.setupSession(pause=args.pause, trys=args.trys) # Block until cluster created opsc.waitForCluster(args.clustername, args.pause, args.trys) clusters = opsc.session.get("{url}/api/v1/lcm/clusters/".format(url=opsc.url)).json() for r in clusters['results']: if r['name'] == args.clustername: cid = r['id'] # Check if the DC --this-- node should belong to exists, if not add DC if opsc.checkForDC(args.dcname): print "Datacenter {d} exists".format(d=args.dcname) else: print "Datacenter {n} doesn't exist, creating...".format(n=args.dcname) opsc.addDC(args.dcname, cid) # kludge, assuming ony one cluster dcid = "" datacenters = opsc.session.get("{url}/api/v1/lcm/datacenters/".format(url=opsc.url)).json() for d in datacenters['results']: if d['name'] == args.dcname: dcid = d['id'] # always add self to DC nodes = opsc.session.get("{url}/api/v1/lcm/datacenters/{dcid}/nodes/".format(url=opsc.url, dcid=dcid)).json() nodecount = nodes['count'] # simple counting for node number hits a race condition... work around #nodename = 'node'+str(nodecount) # aws metadata service instance-id #inst = requests.get("http://169.254.169.254/latest/meta-data/instance-id").content nodename = 'node-'+args.nodeid nodeconf = json.dumps({ 'name': nodename, "datacenter-id": dcid, "rack": args.rack, "ssh-management-address": args.pubip, "listen-address": args.privip, "rpc-address": "0.0.0.0", "broadcast-address": args.pubip, "broadcast-rpc-address": args.pubip}) node = opsc.session.post("{url}/api/v1/lcm/nodes/".format(url=opsc.url), data=nodeconf).json() print "Added node '{n}', json:".format(n=nodename) lcm.pretty(node) nodes = opsc.session.get("{url}/api/v1/lcm/datacenters/{dcid}/nodes/".format(url=opsc.url, dcid=dcid)).json() nodecount = nodes['count'] print "{n} nodes in datacenter {d}".format(n=nodecount, d=dcid) print "Exiting addNode..."
def main(): parser = setupArgs() args = parser.parse_args() opsc = lcm.OpsCenter(args.opsc_ip, args.opscuser, args.opscpw) # Block waiting for OpsC to spin up, create session & login if needed opsc.setupSession(pause=args.pause, trys=args.trys) count = 0 while True: count += 1 if count > args.trys: print "Maximum attempts, exiting" exit() try: jobs = opsc.session.get( "{url}/api/v2/lcm/jobs/".format(url=opsc.url)).json() except requests.exceptions.Timeout as e: print "Request {c} to OpsC timeout after initial connection, exiting.".format( c=count) exit() except requests.exceptions.ConnectionError as e: print "Request {c} to OpsC refused after initial connection, exiting.".format( c=count) exit() lcm.pretty(jobs) if jobs['count'] == 0: print "No jobs found on try {c}, sleeping {p} sec...".format( c=count, p=args.pause) time.sleep(args.pause) continue if runningJob(jobs): print "Jobs running/pending on try {c}, sleeping {p} sec...".format( c=count, p=args.pause) time.sleep(args.pause) continue if (not runningJob(jobs)) and (jobs['count'] < args.num): print "Jobs found on try {c} but num {j} < {n}, sleeping {p} sec...".format( c=count, j=jobs['count'], n=args.num, p=args.pause) time.sleep(args.pause) continue if (not runningJob(jobs)) and (jobs['count'] >= args.num): print "No jobs running/pending and num >= {n} on try {c}, exiting".format( n=args.num, c=count) break
def main(): parser = setupArgs() args = parser.parse_args() opsc = lcm.OpsCenter(args.opsc_ip, args.opscuser, args.opscpw) # Block waiting for OpsC to spin up, create session & login if needed opsc.setupSession(pause=args.pause, trys=args.trys) # get cluster id, assume 1 cluster clusterconf = opsc.session.get( "{url}/cluster-configs".format(url=opsc.url)).json() cid = clusterconf.keys()[0] # get all node configs nodes = opsc.session.get("{url}/{id}/nodes".format(url=opsc.url, id=cid)).json() # loop of configs, counting nodes in each dc datacenters = {} for n in nodes: if n['dc'] in datacenters: datacenters[n['dc']] += 1 else: datacenters[n['dc']] = 1 # reuse dict for post data in REST call # min(3,#) handles edge case where # of nodes < 3 for d in datacenters: datacenters[d] = min(3, datacenters[d]) # keyspaces to alter # leaving out LocalStrategy (system & system_schema) and EverywhereStrategy (dse_system & solr_admin) keyspaces = [ "system_auth", "system_distributed", "system_traces", "dse_analytics", "dse_security", "dse_perf", "dse_leases", "cfs_archive", "spark_system", "cfs", "dsefs", "OpsCenter", "HiveMetaStore" ] postdata = { "strategy_class": "NetworkTopologyStrategy", "strategy_options": datacenters, "durable_writes": True } rawjson = json.dumps(postdata) # loop over keyspaces print "Looping over keyspaces: {k}".format(k=keyspaces) print "NOTE: No response indicates success" # keep track of non-sucess keyspaces to skip repairing skip = [] for ks in keyspaces: print "Calling: PUT {url}/{id}/keyspaces/{ks} with {d}".format( url=opsc.url, id=cid, ks=ks, d=rawjson) response = opsc.session.put("{url}/{id}/keyspaces/{ks}".format( url=opsc.url, id=cid, ks=ks), data=rawjson).json() print "Response: " if response != None: # add to keyspaces to skip skip.append(ks) print "Non-success for keyspace: {ks}, excluding later...".format( ks=ks) lcm.pretty(response) print "Calling repair on all keyspaces/nodes:" print "Skipping keyspaces: {s}".format(s=skip) for ks in keyspaces: if ks in skip: print "Skipping keyspace {ks}".format(ks=ks) continue print "Repairing {ks}...".format(ks=ks) for node in nodes: nodeip = str(node['node_ip']) print " ...on node {n}".format(n=nodeip) response = opsc.session.post( "{url}/{id}/ops/repair/{node}/{ks}".format(url=opsc.url, id=cid, node=nodeip, ks=ks), data='{"is_sequential": false}').json() print " ", response running = True count = 0 while (running): print " Sleeping 2s after check {c}...".format(c=count) time.sleep(2) status = opsc.session.get("{url}/request/{r}/status".format( url=opsc.url, r=response)).json() count += 1 if (status['state'] != u'running'): print " Status of request {r} is: {s}".format( r=response, s=status['state']) running = False if (count >= 15): print " Status 'running' after {c} checks, continuing".format( c=count) running = False
def main(): parser = setupArgs() args = parser.parse_args() checkArgs(args) # Basic repo config dserepo = { "name": "DSE repo", "username": args.repouser, "password": args.repopw } if args.verbose: print "Default repo config:" tmp = dserepo.copy() tmp['password'] = "******" lcm.pretty(tmp) # If privkey passed read key content... if args.privkey != None: keypath = os.path.abspath(args.privkey) with open(keypath, 'r') as keyfile: privkey = keyfile.read() print "Will create cluster {c} on {u} with keypath {k}".format( c=args.clustername, u=args.opsc_ip, k=keypath) dsecred = { "become-mode": "sudo", "use-ssh-keys": True, "name": "DSE creds", "login-user": args.username, "ssh-private-key": privkey, "become-user": None } # ...otherwise use a pw else: print "Will create cluster {c} on {u} with password".format( c=args.clustername, u=args.opsc_ip) dsecred = { "become-mode": "sudo", "use-ssh-keys": False, "name": "DSE creds", "login-user": args.username, "login-password": args.password, "become-user": None } if args.becomepw: dsecred['become-password'] = args.password if args.verbose: print "Default creds:" tmp = dsecred.copy() if 'login-password' in tmp: tmp['login-password'] = "******" if 'become-password' in tmp: tmp['become-password'] = "******" if 'ssh-private-key' in tmp: tmp['ssh-private-key'] = "ZZZZZ" lcm.pretty(tmp) # Minimal config profile defaultconfig = { "name": "Default config", "datastax-version": args.dsever, "json": { 'cassandra-yaml': { "authenticator": "com.datastax.bdp.cassandra.auth.DseAuthenticator", "num_tokens": 8, "allocate_tokens_for_local_replication_factor": 3, "endpoint_snitch": "org.apache.cassandra.locator.GossipingPropertyFileSnitch", "compaction_throughput_mb_per_sec": 64 }, "dse-yaml": { "authorization_options": { "enabled": True }, "authentication_options": { "enabled": True }, "dsefs_options": { "enabled": True } } } } # Since this isn't necessarily being called on the nodes where 'datapath' # exists checking is pointless if args.datapath != None: print "--datapath {p} passed, setting root datapath in default config".format( p=args.datapath) defaultconfig["json"]["cassandra-yaml"]["data_file_directories"] = [ os.path.join(args.datapath, "data") ] defaultconfig["json"]["cassandra-yaml"][ "saved_caches_directory"] = os.path.join(args.datapath, "saved_caches") defaultconfig["json"]["cassandra-yaml"][ "commitlog_directory"] = os.path.join(args.datapath, "commitlog") defaultconfig["json"]["dse-yaml"]["dsefs_options"][ "work_dir"] = os.path.join(args.datapath, "dsefs") defaultconfig["json"]["dse-yaml"]["dsefs_options"][ "data_directories"] = [{ "dir": os.path.join(args.datapath, "dsefs/data") }] # if --aoss option passed, enable AOSS if args.aoss and args.dsever.startswith('6'): print "--aoss passed, adding enable AOSS to default config" defaultconfig["json"]["dse-yaml"]["alwayson_sql_options"] = { "enabled": True } defaultconfig["json"]["dse-yaml"]["resource_manager_options"] = { "worker_options": { "workpools": [{ "memory": "0.4", "cores": "0.4", "name": "alwayson_sql" }] } } elif args.aoss and args.dsever.startswith('5'): print "WARNING: --aoss passed and DSE version <6, ignoring --aoss" # if nojava option passed, disable java/jce if args.nojava: print "--nojava passed, adding disable java to default config" defaultconfig["json"]["java-setup"] = {} defaultconfig["json"]["java-setup"]["manage-java"] = False # Overriding all config profile logic above # Todo, read config json from a file or http endpoint if args.config != None: print "WARNING: --config passed, OVERRIDING ALL OTHER config arguments" print "WARNING: Failed install job possible, e.g. if config json data" print "WARNING: paths don't match existing disks/paths" defaultconfig = json.loads(args.config) if args.verbose: print "Default config profile:" lcm.pretty(defaultconfig) opsc = lcm.OpsCenter(args.opsc_ip, args.opscuser, args.opscpw) # Block waiting for OpsC to spin up, create session & login if needed opsc.setupSession(pause=args.pause, trys=args.trys) # Return config instead of bool? # This check is here to allow calling script from node instances if desired. # Ie script may be called multiple times. # Cluster doesn't esist -> must be 1st node -> do setup c = opsc.checkForCluster(args.clustername) if not c: print "Cluster {n} doesn't exist, creating...".format( n=args.clustername) cred = opsc.addCred(json.dumps(dsecred)) repo = opsc.addRepo(json.dumps(dserepo)) conf = opsc.addConfig(json.dumps(defaultconfig)) cid = opsc.addCluster(args.clustername, cred['id'], repo['id'], conf['id'], args.dbpasswd) else: print "Cluster {n} exists, exiting...".format(n=args.clustername)
def main(): parser = setupArgs() args = parser.parse_args() print "Starting alterKeyspaces: {t}".format(t=time.ctime()) print "Sleeping {s} sec before start...".format(s=args.delay) time.sleep(args.delay) opsc = lcm.OpsCenter(args.opsc_ip, args.opscuser, args.opscpw) # Block waiting for OpsC to spin up, create session & login if needed opsc.setupSession(pause=args.pause, trys=args.trys) # get cluster id, assume 1 cluster clusterconf = opsc.session.get( "{url}/cluster-configs".format(url=opsc.url)).json() if len(clusterconf.keys()) == 0: print "Error: no clusters, exiting." # exiting with 0 as to not propigate error up to deploy exit() if args.verbose: lcm.pretty(clusterconf) cid = clusterconf.keys()[0] # get all node configs nodes = opsc.session.get("{url}/{id}/nodes".format(url=opsc.url, id=cid)).json() if len(nodes) == 0: print "Error: no nodes, exiting." # exiting with 0 as to not propigate error up to deploy exit() if args.verbose: lcm.pretty(nodes) # loop of configs, counting nodes in each dc datacenters = {} for n in nodes: if n['dc'] in datacenters: datacenters[n['dc']] += 1 else: datacenters[n['dc']] = 1 # reuse dict for post data in REST call # min(3,#) handles edge case where # of nodes < 3 for d in datacenters: datacenters[d] = min(3, datacenters[d]) # keyspaces to alter # leaving out LocalStrategy (system & system_schema) and EverywhereStrategy (dse_system & solr_admin) keyspaces = { "system_auth", "system_distributed", "system_traces", "dse_analytics", "dse_security", "dse_perf", "dse_leases", "cfs_archive", "spark_system", "cfs", "dsefs", "OpsCenter", "HiveMetaStore" } postdata = { "strategy_class": "NetworkTopologyStrategy", "strategy_options": datacenters, "durable_writes": True } rawjson = json.dumps(postdata) # loop over keyspaces print "Looping over keyspaces: {k}".format(k=', '.join(keyspaces)) print "NOTE: No response indicates success" # keep track of non-sucess keyspaces to skip repairing skip = set() for ks in keyspaces: print "Calling: PUT {url}/{id}/keyspaces/{ks} with {d}".format( url=opsc.url, id=cid, ks=ks, d=rawjson) response = opsc.session.put("{url}/{id}/keyspaces/{ks}".format( url=opsc.url, id=cid, ks=ks), data=rawjson).json() print "Response: {r}".format(r=response) if response != None: # add to keyspaces to skip skip.add(ks) print "Non-success for keyspace: {ks}, excluding later...".format( ks=ks) lcm.pretty(response) print "Skipping keyspaces: {s}".format(s=', '.join(skip)) for ks in skip: keyspaces.discard(ks) # look for version on all nodes, in case agent is down on some # dummy version for edge case where all agents aren't reporting, then bail version = '0' for n in nodes: if 'dse' in n['node_version']: version = n['node_version']['dse'] if version.startswith('0'): print "Error: no DSE version found, exiting." # exiting with 0 as to not propigate error up to deploy exit(0) if version.startswith('5'): if args.norepair: print "--norepair passed, skipping repair and exiting." exit(0) print "DSE version: {v}, calling repairs".format(v=version) print "Running repairs" runRepair(opsc, cid, nodes, keyspaces) else: print "DSE version: {v}".format(v=version) if args.nodesync: # Explicitly add dse_system/solr_admin which aren't passed in because they're # EverywhereStrategy and therefore un-altered keyspaces.add("dse_system") keyspaces.add("solr_admin") # Explicitly skip system_auth and opsc KS's keyspaces.discard("OpsCenter") keyspaces.discard("system_auth") enableNodesync(opsc, cid, keyspaces) # Explicitly repair keyspaces system_auth and OpsCenter if args.norepair: print "--norepair passed, skipping repair and exiting." exit(0) runRepair(opsc, cid, nodes, {"system_auth", "OpsCenter"}) else: if args.norepair: print "--norepair passed, skipping repair and exiting." exit(0) runRepair(opsc, cid, nodes, keyspaces)
def main(): parser = setupArgs() args = parser.parse_args() checkArgs(args) # Basic repo config dserepo = json.dumps({ "name":"DSE repo", "username":args.repouser, "password":args.repopw}) # If privkey passed read key content... if args.privkey != None: keypath = os.path.abspath(args.privkey) with open(keypath, 'r') as keyfile: privkey = keyfile.read() print "Will create cluster {c} on {u} with keypath {k}".format(c=args.clustername, u=args.opsc_ip, k=keypath) dsecred = json.dumps({ "become-mode":"sudo", "use-ssh-keys":True, "name":"DSE creds", "login-user":args.username, "ssh-private-key":privkey, "become-user":None}) # ...otherwise use a pw else: print "Will create cluster {c} on {u} with password".format(c=args.clustername, u=args.opsc_ip) dsecred = json.dumps({ "become-mode":"sudo", "use-ssh-keys":False, "name":"DSE creds", "login-user":args.username, "login-password":args.password, "become-user":None}) # Minimal config profile # Todo, read config json from a file defaultconfig = { "name":"Default config", "datastax-version": args.dsever, "json": { 'cassandra-yaml': { "authenticator":"com.datastax.bdp.cassandra.auth.DseAuthenticator", "num_tokens":32, "endpoint_snitch":"GossipingPropertyFileSnitch" }, "dse-yaml": { "authorization_options": {"enabled": True}, "authentication_options": {"enabled": True} } }} # Since this isn't necessarily being called on the nodes where 'datapath' # exists checking is pointless if args.datapath != None: defaultconfig["json"]["cassandra-yaml"]["data_file_directories"] = [os.path.join(args.datapath, "data")] defaultconfig["json"]["cassandra-yaml"]["saved_caches_directory"] = os.path.join(args.datapath, "saved_caches") defaultconfig["json"]["cassandra-yaml"]["commitlog_directory"] = os.path.join(args.datapath, "commitlog") defaultconfig = json.dumps(defaultconfig) opsc = lcm.OpsCenter(args.opsc_ip, args.opscuser, args.opscpw) # Block waiting for OpsC to spin up, create session & login if needed opsc.setupSession(pause=args.pause, trys=args.trys) # Return config instead of bool? # This check is here to allow calling script from node instances if desired. # Ie script may be called multiple times. # Cluster doesn't esist -> must be 1st node -> do setup c = opsc.checkForCluster(args.clustername) if not c: print "Cluster {n} doesn't exist, creating...".format(n=args.clustername) cred = opsc.addCred(dsecred) repo = opsc.addRepo(dserepo) conf = opsc.addConfig(defaultconfig) cid = opsc.addCluster(args.clustername, cred['id'], repo['id'], conf['id']) else: print "Cluster {n} exists, exiting...".format(n=args.clustername)