예제 #1
0
 def __create_dynamodb_stochss_table(self, ec2_access_key, ec2_secret_key):
     database = DynamoDB(ec2_access_key, ec2_secret_key)
     result = database.createtable(JobDatabaseConfig.TABLE_NAME)
     if result:
         logging.debug("creating table {0}".format(JobDatabaseConfig.TABLE_NAME))
     else:
         logging.error("FAILED on creating table {0}".format(JobDatabaseConfig.TABLE_NAME))
예제 #2
0
    def __init__(self, cli_jobs_config):
        self.machines = cli_jobs_config["machines"]
        self.jobs = cli_jobs_config["jobs"]
        self.output_filename = cli_jobs_config["output_filename"]

        if cli_jobs_config["output_store"][
                "type"] not in self.SUPPORTED_OUTPUT_STORES:
            raise UnsupportedError("Output store {0} not supported !".format(
                cli_jobs_config["output_store"]["type"]))

        if cli_jobs_config["job_status_db_store"][
                "type"] not in self.SUPPORTED_JOB_STATUS_DB_STORES:
            raise UnsupportedError(
                "Job Status DB store {0} not supported !".format(
                    cli_jobs_config["job_status_db_store"]["type"]))

        if re.match('^amazon.*', cli_jobs_config["output_store"]["type"]) or \
                re.match('^amazon.*', cli_jobs_config["job_status_db_store"]["type"]):
            self.aws_credentials = get_aws_credentials()

        self.output_store_info = cli_jobs_config["output_store"]

        if self.output_store_info["type"] == "amazon_s3":
            trial = 0
            s3helper = S3Helper()
            while trial < 5:
                s3_uuid = uuid.uuid4()
                self.output_store_info['bucket_name'] = "{0}-{1}".format(
                    self.output_store_info['bucket_name_prefix'], s3_uuid)
                if s3helper.make_s3_bucket(
                        self.output_store_info['bucket_name']):
                    logging.info('bucket name = {0}'.format(
                        self.output_store_info['bucket_name']))
                    break
                else:
                    self.output_store_info['bucket_name'] = None
                trial += 1

            if self.output_store_info['bucket_name'] == None:
                logging.error("Could not create S3 bucket!")
                sys.exit(0)
        else:
            raise NotImplementedError("Only Amazon S3 is supported!")

        self.job_status_db_store_info = cli_jobs_config["job_status_db_store"]

        if self.job_status_db_store_info["type"] == "amazon_dynamodb":
            self.database = DynamoDB(
                secret_key=self.aws_credentials["AWS_SECRET_ACCESS_KEY"],
                access_key=self.aws_credentials["AWS_ACCESS_KEY_ID"])
        else:
            raise NotImplementedError("Only Amazon Dynamo DB is supported!")
예제 #3
0
 def get_database(self, job):
     ''' Get a database backend for this job's resource type'''
     logging.debug("get_database() job.resource = {0}".format(job.resource))
     # Use cached handles if we can
     if job.resource in self.database_connections:
         logging.debug("get_database() returning cached connection to {0}".format(job.resource))
         return self.database_connections[job.resource]
     # Make a new connection
     if job.resource == self.EC2_CLOUD_RESOURCE:
         params = self.get_credentials()
         #os.environ["AWS_ACCESS_KEY_ID"] = params['AWS_ACCESS_KEY_ID']
         #os.environ["AWS_SECRET_ACCESS_KEY"] = params['AWS_SECRET_ACCESS_KEY']
         db = DynamoDB(access_key=params['EC2_ACCESS_KEY'],
                             secret_key=params['EC2_SECRET_KEY'])
         self.database_connections[job.resource] = db
         logging.debug("get_database() returning new connection to {0}".format(job.resource))
         return db
     elif job.resource == self.FLEX_CLOUD_RESOURCE:
         params = self.get_credentials()
         db = FlexDB(password=params['flex_db_password'],
                           ip=params['queue_head_ip'])
         self.database_connections[job.resource] = db
         logging.debug("get_database() returning new connection to {0}".format(job.resource))
         return db
     else:
         raise Exception("Unknown job.resource = '{0}'".format(job.resource))
예제 #4
0
파일: cli.py 프로젝트: StochSS/stochss
    def __init__(self, cli_jobs_config):
        self.machines = cli_jobs_config["machines"]
        self.jobs = cli_jobs_config["jobs"]
        self.output_filename = cli_jobs_config["output_filename"]

        if cli_jobs_config["output_store"]["type"] not in self.SUPPORTED_OUTPUT_STORES:
            raise UnsupportedError("Output store {0} not supported !".format(cli_jobs_config["output_store"]["type"]))

        if cli_jobs_config["job_status_db_store"]["type"] not in self.SUPPORTED_JOB_STATUS_DB_STORES:
            raise UnsupportedError("Job Status DB store {0} not supported !".format(
                cli_jobs_config["job_status_db_store"]["type"]))

        if re.match('^amazon.*', cli_jobs_config["output_store"]["type"]) or \
                re.match('^amazon.*', cli_jobs_config["job_status_db_store"]["type"]):
            self.aws_credentials = get_aws_credentials()

        self.output_store_info = cli_jobs_config["output_store"]

        if self.output_store_info["type"] == "amazon_s3":
            trial = 0
            s3helper = S3Helper()
            while trial < 5:
                s3_uuid = uuid.uuid4()
                self.output_store_info['bucket_name'] = "{0}-{1}".format(self.output_store_info['bucket_name_prefix'],
                                                                         s3_uuid)
                if s3helper.make_s3_bucket(self.output_store_info['bucket_name']):
                    logging.info('bucket name = {0}'.format(self.output_store_info['bucket_name']))
                    break
                else:
                    self.output_store_info['bucket_name'] = None
                trial += 1

            if self.output_store_info['bucket_name'] == None:
                logging.error("Could not create S3 bucket!")
                sys.exit(0)
        else:
            raise NotImplementedError("Only Amazon S3 is supported!")


        self.job_status_db_store_info = cli_jobs_config["job_status_db_store"]

        if self.job_status_db_store_info["type"] == "amazon_dynamodb":
            self.database = DynamoDB(secret_key=self.aws_credentials["AWS_SECRET_ACCESS_KEY"],
                                     access_key=self.aws_credentials["AWS_ACCESS_KEY_ID"])
        else:
            raise NotImplementedError("Only Amazon Dynamo DB is supported!")
예제 #5
0
    def submit_cloud_task(self, params, agent_type=None, cost_replay=False, instance_type=None):

        logging.debug('submit_cloud_task() params =\n{}\n\n'.format(pprint.pformat(params)))

        if agent_type is None:
            if self.active_agent_type is not None:
                agent_type = self.active_agent_type
            else:
                self.isOneOrMoreComputeNodesRunning()
                if self.active_agent_type is not None:
                    agent_type = self.active_agent_type
                else:
                    raise Exception("No Cloud resources found")

        if agent_type not in JobConfig.SUPPORTED_AGENT_TYPES:
            raise Exception('Unsupported agent type {0}'.format(agent_type))

        credentials = self.get_credentials()

        if agent_type == AgentTypes.EC2:
            params['resource'] = self.EC2_CLOUD_RESOURCE
            params['bucketname'] = self.user_data.S3_bucket_name
            if 'EC2_ACCESS_KEY' not in credentials or credentials['EC2_ACCESS_KEY'] == '':
                raise Exception('EC2 Access Key is not valid!')
            if 'EC2_SECRET_KEY' not in credentials or credentials['EC2_SECRET_KEY'] == '':
                raise Exception('EC2 Secret Key is not valid!')
            ec2_access_key = credentials['EC2_ACCESS_KEY']
            ec2_secret_key = credentials['EC2_SECRET_KEY']
            logging.debug('ec2_access_key = {0}, ec2_secret_key = {1}'.format(ec2_access_key, ec2_secret_key))
            database = DynamoDB(ec2_access_key, ec2_secret_key)
            storage_agent = S3StorageAgent(bucket_name=self.user_data.S3_bucket_name,
                                           ec2_secret_key=ec2_secret_key,
                                           ec2_access_key=ec2_access_key)

        elif agent_type == AgentTypes.FLEX:
            params['resource'] = self.FLEX_CLOUD_RESOURCE
            params['bucketname'] = ''
#            if flex_credentials == None or 'flex_queue_head' not in flex_credentials \
#                    or 'flex_db_password' not in flex_credentials:
#                raise Exception('Please pass valid Flex credentials!')
            database = FlexDB(ip=credentials['queue_head_ip'],
                              password=credentials['flex_db_password'])
            flex_queue_head_machine = self.user_data.get_flex_queue_head_machine()
            storage_agent = FlexStorageAgent(queue_head_ip=flex_queue_head_machine['ip'],
                                             queue_head_username=flex_queue_head_machine['username'],
                                             queue_head_keyfile= os.path.join('/home', flex_queue_head_machine['username'], FlexConfig.QUEUE_HEAD_KEY_DIR, os.path.basename(flex_queue_head_machine['keyfile']))
                                             )
                                             #queue_head_keyfile=flex_queue_head_machine['keyfile'])
            ec2_access_key = None
            ec2_secret_key = None


        # if there is no taskid explicit, create one the first run
        if 'rerun_uuid' in params and params['rerun_uuid'] is not None:
            task_id = params['rerun_uuid']
        elif cost_replay:
            task_id = params['cost_analysis_uuid']
        else:
            task_id = str(uuid.uuid4())

        logging.debug('submit_cloud_task: task_id = {}'.format(task_id))

        result = helper.execute_cloud_task(params=params, agent_type=agent_type,
                                           ec2_access_key=ec2_access_key,
                                           ec2_secret_key=ec2_secret_key,
                                           task_id=task_id, instance_type=instance_type,
                                           cost_replay=cost_replay,
                                           database=database,
                                           storage_agent=storage_agent)

        return result
예제 #6
0
class BackendCli:
    SUPPORTED_OUTPUT_STORES = ["amazon_s3"]
    SUPPORTED_JOB_STATUS_DB_STORES = ["amazon_dynamodb"]
    AGENT_TYPE = AgentTypes.FLEX_CLI
    INSTANCE_TYPE = 'flexvm'

    def __init__(self, cli_jobs_config):
        self.machines = cli_jobs_config["machines"]
        self.jobs = cli_jobs_config["jobs"]
        self.output_filename = cli_jobs_config["output_filename"]

        if cli_jobs_config["output_store"][
                "type"] not in self.SUPPORTED_OUTPUT_STORES:
            raise UnsupportedError("Output store {0} not supported !".format(
                cli_jobs_config["output_store"]["type"]))

        if cli_jobs_config["job_status_db_store"][
                "type"] not in self.SUPPORTED_JOB_STATUS_DB_STORES:
            raise UnsupportedError(
                "Job Status DB store {0} not supported !".format(
                    cli_jobs_config["job_status_db_store"]["type"]))

        if re.match('^amazon.*', cli_jobs_config["output_store"]["type"]) or \
                re.match('^amazon.*', cli_jobs_config["job_status_db_store"]["type"]):
            self.aws_credentials = get_aws_credentials()

        self.output_store_info = cli_jobs_config["output_store"]

        if self.output_store_info["type"] == "amazon_s3":
            trial = 0
            s3helper = S3Helper()
            while trial < 5:
                s3_uuid = uuid.uuid4()
                self.output_store_info['bucket_name'] = "{0}-{1}".format(
                    self.output_store_info['bucket_name_prefix'], s3_uuid)
                if s3helper.make_s3_bucket(
                        self.output_store_info['bucket_name']):
                    logging.info('bucket name = {0}'.format(
                        self.output_store_info['bucket_name']))
                    break
                else:
                    self.output_store_info['bucket_name'] = None
                trial += 1

            if self.output_store_info['bucket_name'] == None:
                logging.error("Could not create S3 bucket!")
                sys.exit(0)
        else:
            raise NotImplementedError("Only Amazon S3 is supported!")

        self.job_status_db_store_info = cli_jobs_config["job_status_db_store"]

        if self.job_status_db_store_info["type"] == "amazon_dynamodb":
            self.database = DynamoDB(
                secret_key=self.aws_credentials["AWS_SECRET_ACCESS_KEY"],
                access_key=self.aws_credentials["AWS_ACCESS_KEY_ID"])
        else:
            raise NotImplementedError("Only Amazon Dynamo DB is supported!")

    def __wait_for_jobs(self, task_id_job_map, task_ids):
        finished_tasks = []
        while True:
            if len(task_ids) == 0:
                break

            time.sleep(5)
            tasks = self.database.describetask(
                taskids=task_ids,
                tablename=self.job_status_db_store_info['table_name'])
            for task_id in tasks.keys():
                task = tasks[task_id]
                job_index = task_id_job_map[task_id]

                if task['status'] == 'finished':
                    logging.info("Job #{0} finished.".format(job_index))
                    logging.info("Status = \n{0}".format(pprint.pformat(task)))
                    finished_tasks.append({
                        'job_index': job_index,
                        'job_status': task
                    })
                    task_ids.remove(task_id)

        return finished_tasks

    def __submit_job(self, job_index, job):
        logging.info("Preparing for  Job #{0}...".format(job_index))
        with open(job['model_file_path']) as xml_file:
            model_xml_doc = xml_file.read()

        params = job["params"]
        params['document'] = model_xml_doc
        params['bucketname'] = self.output_store_info['bucket_name']

        task_id = str(uuid.uuid4())

        result = helper.execute_cloud_task(
            params=params,
            agent_type=self.AGENT_TYPE,
            ec2_access_key=self.aws_credentials["AWS_ACCESS_KEY_ID"],
            ec2_secret_key=self.aws_credentials["AWS_SECRET_ACCESS_KEY"],
            task_id=task_id,
            instance_type=self.INSTANCE_TYPE,
            cost_replay=False,
            database=self.database)
        if result["success"]:
            logging.info("Job #{0} successfully submitted to backend.".format(
                job_index))
        else:
            logging.info(
                "Failed to submit Job #{0} to backend.".format(job_index))
        logging.debug("result = {0}".format(pprint.pformat(result)))
        return result

    def __launch_jobs(self):
        task_ids = []
        task_id_job_map = {}
        for job_index, job in enumerate(self.jobs):
            result = self.__submit_job(job_index, job)

            task_id = result["db_id"]
            task_ids.append(task_id)
            task_id_job_map[task_id] = job_index

        return task_id_job_map, task_ids

    def run(self):
        self.database.createtable(self.job_status_db_store_info['table_name'])

        if self.prepare_machines():
            task_id_job_map, task_ids = self.__launch_jobs()
            finished_tasks = self.__wait_for_jobs(task_id_job_map, task_ids)

            with open(self.output_filename, 'w') as f:
                f.write(pprint.pformat(finished_tasks))

            logging.info('All jobs finished!')

        else:
            logging.error("Failed to prepare machines!")

    def __get_preparing_commands(self):
        # These are commutative commands

        commands = []
        commands.append('#!/bin/bash')

        commands.append(
            'echo export AWS_ACCESS_KEY_ID={0} >> ~/.bashrc'.format(
                str(self.aws_credentials['AWS_ACCESS_KEY_ID'])))
        commands.append(
            'echo export AWS_SECRET_ACCESS_KEY={0} >> ~/.bashrc'.format(
                self.aws_credentials['AWS_SECRET_ACCESS_KEY']))

        commands.append('echo export STOCHKIT_HOME={0} >> ~/.bashrc'.format(
            "/home/ubuntu/stochss/StochKit/"))
        commands.append('echo export STOCHKIT_ODE={0} >> ~/.bashrc'.format(
            "/home/ubuntu/stochss/ode/"))

        commands.append('echo export C_FORCE_ROOT=1 >> ~/.bashrc')
        commands.append('echo export R_LIBS={0} >> ~/.bashrc'.format(
            "/home/ubuntu/stochss/stochoptim/library"))

        commands.append('source ~/.bashrc')

        return commands

    def __configure_celery(self, queue_head):
        commands = []
        commands.append('source ~/.bashrc')
        commands.append('export AWS_ACCESS_KEY_ID={0}'.format(
            str(self.aws_credentials['AWS_ACCESS_KEY_ID'])))
        commands.append('export AWS_SECRET_ACCESS_KEY={0}'.format(
            str(self.aws_credentials['AWS_SECRET_ACCESS_KEY'])))

        for machine in self.machines:
            logging.info(
                "Starting celery on {ip}".format(ip=machine["public_ip"]))
            success = helper.start_celery_on_vm(
                instance_type=self.INSTANCE_TYPE,
                ip=machine["public_ip"],
                username=machine["username"],
                key_file=machine["keyfile"],
                prepend_commands=commands,
                agent_type=self.AGENT_TYPE)
            if success != 0:
                raise Exception("Failure to start celery on {0}".format(
                    machine["public_ip"]))

        # get all intstance types and configure the celeryconfig.py locally
        instance_types = [self.INSTANCE_TYPE]
        helper.config_celery_queues(agent_type=self.AGENT_TYPE,
                                    instance_types=instance_types)

    def __get_queue_head_machine_info(self):
        queue_head = None
        for machine in self.machines:
            if machine["type"] == "queue-head":
                if queue_head is not None:
                    raise InvalidConfigurationError(
                        "There can be only one queue head!")
                else:
                    queue_head = machine
            elif machine["type"] == "worker":
                pass
            else:
                raise InvalidConfigurationError(
                    "Invalid machine type : {0} !".format(machine["type"]))

        if queue_head == None:
            raise InvalidConfigurationError("Need at least one queue head!")

        return queue_head

    def __run_prepare_script_on_vm(self, machine):
        run_script_commands = [
            "chmod +x ~/setup_script.sh", "bash ~/setup_script.sh"
        ]

        run_script_command = ";".join(run_script_commands)
        remote_command = "ssh -o 'UserKnownHostsFile=/dev/null' -o 'StrictHostKeyChecking=no' -i {key_file} {user}@{ip} \"{cmd}\"".format(
            key_file=machine["keyfile"],
            user=machine["username"],
            ip=machine["public_ip"],
            cmd=run_script_command)

        logging.info("Remote command: {0}".format(run_script_command))

        success = os.system(remote_command)
        return success

    def __copy_prepare_script_to_vm(self, machine):
        script_commands = self.__get_preparing_commands()

        if machine['type'] == 'queue-head':
            rabbitmq_commands = []
            rabbitmq_commands.append('sudo rabbitmqctl add_user stochss ucsb')
            rabbitmq_commands.append(
                'sudo rabbitmqctl set_permissions -p / stochss ".*" ".*" ".*"')

            logging.info("Adding RabbitMQ commands for {0}...".format(
                machine['public_ip']))
            script_command_string = '\n'.join(script_commands +
                                              rabbitmq_commands)

        else:
            script_command_string = '\n'.join(script_commands)

        logging.debug("command = \n{0}".format(script_command_string))

        bash_script_filename = os.path.abspath(
            os.path.join(os.path.dirname(__file__), "setup_script.sh"))
        with open(bash_script_filename, 'w') as file:
            file.write(script_command_string)

        logging.debug("script =\n\n{0}\n\n".format(script_command_string))
        remote_copy_command = \
            "scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -i {key_file} {bash_script_filename} {user}@{ip}:~/setup_script.sh".format(
                key_file=machine["keyfile"],
                user=machine["username"],
                ip=machine["public_ip"],
                bash_script_filename=bash_script_filename)

        logging.info("Remote copy command: {0}".format(remote_copy_command))

        success = os.system(remote_copy_command)
        return success

    def prepare_machines(self):
        logging.info(
            "prepare_machines: inside method with machine_info : \n%s",
            pprint.pformat(self.machines))

        queue_head = self.__get_queue_head_machine_info()

        # push queue head to be the first node to be prepared
        self.machines.remove(queue_head)
        self.machines.insert(0, queue_head)

        logging.info("queue head = \n{0}".format(pprint.pformat(queue_head)))

        try:
            logging.info("Preparing environment on remote machines...")
            for machine in self.machines:
                logging.info(
                    "For machine {ip}".format(ip=machine['public_ip']))

                success = self.__copy_prepare_script_to_vm(machine)

                if success != 0:
                    raise Exception(
                        "Remote copy command failed on {ip}!".format(
                            ip=machine['public_ip']))

                success = self.__run_prepare_script_on_vm(machine)

                if success != 0:
                    raise Exception("Remote command failed on {ip}!".format(
                        ip=machine['public_ip']))

            helper.update_celery_config_with_queue_head_ip(
                queue_head_ip=queue_head["public_ip"],
                agent_type=self.AGENT_TYPE)
            logging.info(
                "Updated celery config with queue head ip: {0}".format(
                    queue_head["public_ip"]))

            self.__configure_celery(queue_head)

            return True

        except Exception, e:
            traceback.print_exc()
            logging.error(
                "prepare_machines : exiting method with error : {0}".format(
                    str(e)))
            return False
예제 #7
0
파일: cli.py 프로젝트: StochSS/stochss
class BackendCli:
    SUPPORTED_OUTPUT_STORES = ["amazon_s3"]
    SUPPORTED_JOB_STATUS_DB_STORES = ["amazon_dynamodb"]
    AGENT_TYPE = AgentTypes.FLEX_CLI
    INSTANCE_TYPE = 'flexvm'

    def __init__(self, cli_jobs_config):
        self.machines = cli_jobs_config["machines"]
        self.jobs = cli_jobs_config["jobs"]
        self.output_filename = cli_jobs_config["output_filename"]

        if cli_jobs_config["output_store"]["type"] not in self.SUPPORTED_OUTPUT_STORES:
            raise UnsupportedError("Output store {0} not supported !".format(cli_jobs_config["output_store"]["type"]))

        if cli_jobs_config["job_status_db_store"]["type"] not in self.SUPPORTED_JOB_STATUS_DB_STORES:
            raise UnsupportedError("Job Status DB store {0} not supported !".format(
                cli_jobs_config["job_status_db_store"]["type"]))

        if re.match('^amazon.*', cli_jobs_config["output_store"]["type"]) or \
                re.match('^amazon.*', cli_jobs_config["job_status_db_store"]["type"]):
            self.aws_credentials = get_aws_credentials()

        self.output_store_info = cli_jobs_config["output_store"]

        if self.output_store_info["type"] == "amazon_s3":
            trial = 0
            s3helper = S3Helper()
            while trial < 5:
                s3_uuid = uuid.uuid4()
                self.output_store_info['bucket_name'] = "{0}-{1}".format(self.output_store_info['bucket_name_prefix'],
                                                                         s3_uuid)
                if s3helper.make_s3_bucket(self.output_store_info['bucket_name']):
                    logging.info('bucket name = {0}'.format(self.output_store_info['bucket_name']))
                    break
                else:
                    self.output_store_info['bucket_name'] = None
                trial += 1

            if self.output_store_info['bucket_name'] == None:
                logging.error("Could not create S3 bucket!")
                sys.exit(0)
        else:
            raise NotImplementedError("Only Amazon S3 is supported!")


        self.job_status_db_store_info = cli_jobs_config["job_status_db_store"]

        if self.job_status_db_store_info["type"] == "amazon_dynamodb":
            self.database = DynamoDB(secret_key=self.aws_credentials["AWS_SECRET_ACCESS_KEY"],
                                     access_key=self.aws_credentials["AWS_ACCESS_KEY_ID"])
        else:
            raise NotImplementedError("Only Amazon Dynamo DB is supported!")


    def __wait_for_jobs(self, task_id_job_map, task_ids):
        finished_tasks = []
        while True:
            if len(task_ids) == 0:
                break

            time.sleep(5)
            tasks = self.database.describetask(taskids=task_ids,
                                               tablename=self.job_status_db_store_info['table_name'])
            for task_id in tasks.keys():
                task = tasks[task_id]
                job_index = task_id_job_map[task_id]

                if task['status'] == 'finished':
                    logging.info("Job #{0} finished.".format(job_index))
                    logging.info("Status = \n{0}".format(pprint.pformat(task)))
                    finished_tasks.append({'job_index': job_index, 'job_status': task})
                    task_ids.remove(task_id)

        return finished_tasks

    def __submit_job(self, job_index, job):
        logging.info("Preparing for  Job #{0}...".format(job_index))
        with open(job['model_file_path']) as xml_file:
            model_xml_doc = xml_file.read()

        params = job["params"]
        params['document'] = model_xml_doc
        params['bucketname'] = self.output_store_info['bucket_name']

        task_id = str(uuid.uuid4())

        result = helper.execute_cloud_task(params=params,
                                           agent_type=self.AGENT_TYPE,
                                           ec2_access_key=self.aws_credentials["AWS_ACCESS_KEY_ID"],
                                           ec2_secret_key=self.aws_credentials["AWS_SECRET_ACCESS_KEY"],
                                           task_id=task_id,
                                           instance_type=self.INSTANCE_TYPE,
                                           cost_replay=False,
                                           database=self.database)
        if result["success"]:
            logging.info("Job #{0} successfully submitted to backend.".format(job_index))
        else:
            logging.info("Failed to submit Job #{0} to backend.".format(job_index))
        logging.debug("result = {0}".format(pprint.pformat(result)))
        return result

    def __launch_jobs(self):
        task_ids = []
        task_id_job_map = {}
        for job_index, job in enumerate(self.jobs):
            result = self.__submit_job(job_index, job)

            task_id = result["db_id"]
            task_ids.append(task_id)
            task_id_job_map[task_id] = job_index

        return task_id_job_map, task_ids

    def run(self):
        self.database.createtable(self.job_status_db_store_info['table_name'])

        if self.prepare_machines():
            task_id_job_map, task_ids = self.__launch_jobs()
            finished_tasks = self.__wait_for_jobs(task_id_job_map, task_ids)

            with open(self.output_filename, 'w') as f:
                f.write(pprint.pformat(finished_tasks))

            logging.info('All jobs finished!')

        else:
            logging.error("Failed to prepare machines!")

    def __get_preparing_commands(self):
        # These are commutative commands

        commands = []
        commands.append('#!/bin/bash')

        commands.append(
            'echo export AWS_ACCESS_KEY_ID={0} >> ~/.bashrc'.format(str(self.aws_credentials['AWS_ACCESS_KEY_ID'])))
        commands.append(
            'echo export AWS_SECRET_ACCESS_KEY={0} >> ~/.bashrc'.format(self.aws_credentials['AWS_SECRET_ACCESS_KEY']))

        commands.append('echo export STOCHKIT_HOME={0} >> ~/.bashrc'.format("/home/ubuntu/stochss/StochKit/"))
        commands.append('echo export STOCHKIT_ODE={0} >> ~/.bashrc'.format("/home/ubuntu/stochss/ode/"))

        commands.append('echo export C_FORCE_ROOT=1 >> ~/.bashrc')
        commands.append('echo export R_LIBS={0} >> ~/.bashrc'.format("/home/ubuntu/stochss/stochoptim/library"))

        commands.append('source ~/.bashrc')

        return commands

    def __configure_celery(self, queue_head):
        commands = []
        commands.append('source ~/.bashrc')
        commands.append('export AWS_ACCESS_KEY_ID={0}'.format(str(self.aws_credentials['AWS_ACCESS_KEY_ID'])))
        commands.append('export AWS_SECRET_ACCESS_KEY={0}'.format(str(self.aws_credentials['AWS_SECRET_ACCESS_KEY'])))

        for machine in self.machines:
            logging.info("Starting celery on {ip}".format(ip=machine["public_ip"]))
            success = helper.start_celery_on_vm(instance_type=self.INSTANCE_TYPE,
                                                ip=machine["public_ip"],
                                                username=machine["username"],
                                                key_file=machine["keyfile"],
                                                prepend_commands=commands,
                                                agent_type=self.AGENT_TYPE)
            if success != 0:
                raise Exception("Failure to start celery on {0}".format(machine["public_ip"]))

        # get all intstance types and configure the celeryconfig.py locally
        instance_types = [self.INSTANCE_TYPE]
        helper.config_celery_queues(agent_type=self.AGENT_TYPE, instance_types=instance_types)

    def __get_queue_head_machine_info(self):
        queue_head = None
        for machine in self.machines:
            if machine["type"] == "queue-head":
                if queue_head is not None:
                    raise InvalidConfigurationError("There can be only one queue head!")
                else:
                    queue_head = machine
            elif machine["type"] == "worker":
                pass
            else:
                raise InvalidConfigurationError("Invalid machine type : {0} !".format(machine["type"]))

        if queue_head == None:
            raise InvalidConfigurationError("Need at least one queue head!")

        return queue_head

    def __run_prepare_script_on_vm(self, machine):
        run_script_commands = [
            "chmod +x ~/setup_script.sh",
            "bash ~/setup_script.sh"
        ]

        run_script_command = ";".join(run_script_commands)
        remote_command = "ssh -o 'UserKnownHostsFile=/dev/null' -o 'StrictHostKeyChecking=no' -i {key_file} {user}@{ip} \"{cmd}\"".format(
            key_file=machine["keyfile"],
            user=machine["username"],
            ip=machine["public_ip"],
            cmd=run_script_command)

        logging.info("Remote command: {0}".format(run_script_command))

        success = os.system(remote_command)
        return success

    def __copy_prepare_script_to_vm(self, machine):
        script_commands = self.__get_preparing_commands()

        if machine['type'] == 'queue-head':
            rabbitmq_commands = []
            rabbitmq_commands.append('sudo rabbitmqctl add_user stochss ucsb')
            rabbitmq_commands.append(
                'sudo rabbitmqctl set_permissions -p / stochss ".*" ".*" ".*"')

            logging.info("Adding RabbitMQ commands for {0}...".format(machine['public_ip']))
            script_command_string = '\n'.join(script_commands + rabbitmq_commands)

        else:
            script_command_string = '\n'.join(script_commands)

        logging.debug("command = \n{0}".format(script_command_string))

        bash_script_filename = os.path.abspath(os.path.join(os.path.dirname(__file__), "setup_script.sh"))
        with open(bash_script_filename, 'w') as file:
            file.write(script_command_string)

        logging.debug("script =\n\n{0}\n\n".format(script_command_string))
        remote_copy_command = \
            "scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -i {key_file} {bash_script_filename} {user}@{ip}:~/setup_script.sh".format(
                key_file=machine["keyfile"],
                user=machine["username"],
                ip=machine["public_ip"],
                bash_script_filename=bash_script_filename)

        logging.info("Remote copy command: {0}".format(remote_copy_command))

        success = os.system(remote_copy_command)
        return success

    def prepare_machines(self):
        logging.info("prepare_machines: inside method with machine_info : \n%s", pprint.pformat(self.machines))

        queue_head = self.__get_queue_head_machine_info()

        # push queue head to be the first node to be prepared
        self.machines.remove(queue_head)
        self.machines.insert(0, queue_head)

        logging.info("queue head = \n{0}".format(pprint.pformat(queue_head)))

        try:
            logging.info("Preparing environment on remote machines...")
            for machine in self.machines:
                logging.info("For machine {ip}".format(ip=machine['public_ip']))

                success = self.__copy_prepare_script_to_vm(machine)

                if success != 0:
                    raise Exception("Remote copy command failed on {ip}!".format(ip=machine['public_ip']))

                success = self.__run_prepare_script_on_vm(machine)

                if success != 0:
                    raise Exception("Remote command failed on {ip}!".format(ip=machine['public_ip']))

            helper.update_celery_config_with_queue_head_ip(queue_head_ip=queue_head["public_ip"],
                                                           agent_type=self.AGENT_TYPE)
            logging.info("Updated celery config with queue head ip: {0}".format(queue_head["public_ip"]))

            self.__configure_celery(queue_head)

            return True

        except Exception, e:
            traceback.print_exc()
            logging.error("prepare_machines : exiting method with error : {0}".format(str(e)))
            return False