Beispiel #1
0
 def _gen_name( self, master_name,  prefix ):
     N = 5
     seed = string.ascii_uppercase * N + string.digits * N
     random_post = ''.join(random.sample( seed, N ))
     #extremely unlikely to have a collision, but possible, sigh ...
     while wkr.get_ANWorker( master_name=master_name, cluster_name=prefix+random_post ):
         random_post = ''.join(random.sample( seed, N ))
     return prefix + random_post
 def worker_id(self):
     if self._worker_id is None:
         try:
             wm = wkr_mdl.get_ANWorker( cluster_name = self.cluster_name )
             if len(wm) > 0:
                 self._worker_id = wm[0]['worker_id']
                 wkr_mdl.add_sqs_queues_ANWorker( self._worker_id, 
                         [self.command_q, self.response_q] )
         except:
             self.logger.exception("Unable to get worker_model for %s" % self.cluster_name)
     if self._worker_id is None:
         try:
             self._terminated = True
             self.terminate()
         except:
             self.logger.exception("Inconsistent state")
     return self._worker_id
Beispiel #3
0
 def _terminate_single_worker( self, worker_id):
     worker = wkr.get_ANWorker( worker_id=worker_id )
     self.app.logger.info("%r" % worker )
     if worker['status'] in [wkr.CONFIG, wkr.NA]:
         worker = wkr.update_ANWorker( worker_id, 
                     status=wkr.TERMINATED)
         msg = {'status':'complete',
                 'data' : json_prep( worker )}
         status = 200
     elif wkr.confirm_worker_running( worker ):
         #we have an active cluster
         master = mstr.get_active_master()
         if master:
             launcher_message = {'action':'terminate',
                                 'worker_id': worker_id}
             launcher_config = sys_def_mdl.get_system_defaults(
                     setting_name = 'launcher_config', component='Master' )
             conn = boto.sqs.connect_to_region('us-east-1')
             lq = conn.create_queue( launcher_config['launcher_sqs_in'] )
             worker = wkr.update_ANWorker( worker_id, 
                     status=wkr.MARKED_FOR_TERMINATION)
             mess = Message(body=json.dumps( launcher_message ))
             lq.write( mess )
             msg = {'status':'complete',
                     'data' : json_prep( worker ) }
             status = 200
         else:
             msg = {'status': 'error',
                     'data' : {'worker_id': worker_id},
                     'message' : 'Running Cluster without an active master'
                     }
             status = 409 #Conflict
     else:
         worker = wkr.update_ANWorker( worker_id, 
                     status=wkr.TERMINATED_WITH_ERROR)
         msg = {'status':'complete',
                 'data' : json_prep( worker )}
         status = 200
     return (msg, status)
Beispiel #4
0
 def GET( self, request):
     if self.worker_id is None:
         #return active workers
         if request.args.get('branch'):
             branch = None
         else:
             ls = sys_def_mdl.get_system_defaults('local_settings', 'Master')
             branch = ls['branch']
         self.app.logger.info("GETting workers for the %s branch" % (
             branch ))
         workers = wkr.get_active_workers(branch)
         workers = [json_prep( worker ) for worker in workers]
         if workers:
             msg = {
                     'status' : 'complete',
                     'data' : workers
                   }
             status = 200
         else:
             msg = {
                     'status' : 'error',
                     'data' : [],
                     'message': 'No workers available'
                     }
             status = 404
     else:
         result = wkr.get_ANWorker( worker_id=self.worker_id )
         if result:
             msg = {'status' : 'complete',
                     'data' : json_prep( result )
                     }
             status = 200
         else:
             msg = {'status': 'error',
                     'data' : {'worker_id' : self.worker_id},
                     'message' : 'Worker not found'
                     }
             status = 404
     return ( msg, status )
def config(worker_id):
    """=
        example config 
        {
        'cluster_name':'dummy-cluster',
        'aws_region':'us-east-1',
        'key_name': 'somekey',
        'key_location': '/home/sgeadmin/somekey.key',
        'cluster_size': 1,
        'node_instance_type': 'm1.xlarge',
        'node_image_id': 'ami-1234567',
        'iam_profile':'some-profile',
        'force_spot_master':True,
        'spot_bid':2.00,
        'plugins':'p1,p2,p3'
    }"""
    import masterdirac.models.worker as wrkr

    import masterdirac.models.systemdefaults as sys_def
    local_settings = sys_def.get_system_defaults('local_settings',
            'Master') 
    worker_model = wrkr.get_ANWorker( worker_id = worker_id )
    if worker_model:
        config_settings = worker_model['starcluster_config']
        if local_settings['branch']=='develop':
            def devify( pl ):
                t = ['dev-tgr']
                for plugin in pl.split(','):
                    if plugin.strip() == 'gpu-bootstrap':
                        t.append('gpu-dev-bootstrap')
                    elif plugin.strip() == 'data-bootstrap':
                        t.append('data-dev-bootstrap')
                    else:
                        t.append(plugin)
                return ', '.join(t)
            config_settings['plugins'] = devify( config_settings['plugins'] )
    return Response( render_template('sc-main.cfg', **config_settings) +
        render_template('sc-plugins.cfg') + 
        render_template('sc-security-group.cfg'), mimetype="text/plain" )
 def worker_model(self):
     return wkr_mdl.get_ANWorker( worker_id = self.worker_id )