def __init__(self, n): # set the size attribute self.size = n # set up an EC2 connection, and grab an Ubuntu 8.04 image (http://alestic.com) connection = boto.connect_ec2() image = connection.get_image("ami-1c5db975") # create a keypair to use with the image, save to disk, and set permissions # so ssh will be happy self.keypair = connection.create_key_pair("mr_keypair") mr_lib.write_file(self.keypair.material, mr_lib.mr_keypair_filename()) os.system("chmod 600 " + mr_lib.mr_keypair_filename()) # tell EC2 to start the instances running, set the self.workers attribute to the # corresponding reservation, and wait for all the workers to start running self.workers = image.run(n, n, "mr_keypair") for instance in self.workers.instances: instance.update() while instance.state != u"running": instance.update() time.sleep(5) # Delay before we start distributing files, so all instances are running properly. time.sleep(10) # distribute a list of all the private ip addresses private_ip_list = [instance.private_dns_name for instances in self.workers.instances] mr_lib.write_pickle(private_ip_list, "cluster_description.mr") self.distribute_public("cluster_description.mr") for j in xrange(n): mr_lib.write_pickle([j, self.workers.instances[j].private_dns_name], "my_details.mr") self.send("my_details.mr", j) # distribute the files necessary to run map and mapreduce jobs self.distribute_public("map.py") self.distribute_public("map_combine.py") self.distribute_public("reduce.py") self.distribute_public("mr_lib.py") # Distribute the ssh keypairs and config file for instance in self.workers.instances: mr_lib.scp(mr_lib.mr_keypair_filename(), "root@" + instance.public_dns_name + ":.ssh/id_rsa-mr_keypair") mr_lib.ssh("root@" + instance.public_dns_name, "chmod 600 /root/.ssh/id_rsa-mr_keypair") mr_lib.scp(os.environ.get("HOME") + "/.ssh/config", "root@" + instance.public_dns_name + ":.ssh/config")
def shutdown(self): self.workers.stop_all() self.keypair.delete() os.system("rm " + mr_lib.mr_keypair_filename())