def get_dict_value(self, dict_name, key): instance = self.workers.instances[hash(key) % self.size] instance_filename = "root@" + instance.public_dns_name + ":" + dict_name mr_lib.scp(instance_filename, dict_name) d = mr_lib.read_pickle(dict_name) value = d[key] os.remove(dict_name) return value
def wait_until_task_done(self, flag_name): done = False while not done: time.sleep(2) done = True for instance in self.workers.instances: mr_lib.scp("root@" + instance.public_dns_name + ":flags.mr", "flags.mr") if not mr_lib.get_flags()[flag_name]: print instance.public_dns_name + " not done." done = False else: print instance.public_dns_name + " done."
def __init__(self, n): # set the size attribute self.size = n # set up an EC2 connection, and grab an Ubuntu 8.04 image (http://alestic.com) connection = boto.connect_ec2() image = connection.get_image("ami-1c5db975") # create a keypair to use with the image, save to disk, and set permissions # so ssh will be happy self.keypair = connection.create_key_pair("mr_keypair") mr_lib.write_file(self.keypair.material, mr_lib.mr_keypair_filename()) os.system("chmod 600 " + mr_lib.mr_keypair_filename()) # tell EC2 to start the instances running, set the self.workers attribute to the # corresponding reservation, and wait for all the workers to start running self.workers = image.run(n, n, "mr_keypair") for instance in self.workers.instances: instance.update() while instance.state != u"running": instance.update() time.sleep(5) # Delay before we start distributing files, so all instances are running properly. time.sleep(10) # distribute a list of all the private ip addresses private_ip_list = [instance.private_dns_name for instances in self.workers.instances] mr_lib.write_pickle(private_ip_list, "cluster_description.mr") self.distribute_public("cluster_description.mr") for j in xrange(n): mr_lib.write_pickle([j, self.workers.instances[j].private_dns_name], "my_details.mr") self.send("my_details.mr", j) # distribute the files necessary to run map and mapreduce jobs self.distribute_public("map.py") self.distribute_public("map_combine.py") self.distribute_public("reduce.py") self.distribute_public("mr_lib.py") # Distribute the ssh keypairs and config file for instance in self.workers.instances: mr_lib.scp(mr_lib.mr_keypair_filename(), "root@" + instance.public_dns_name + ":.ssh/id_rsa-mr_keypair") mr_lib.ssh("root@" + instance.public_dns_name, "chmod 600 /root/.ssh/id_rsa-mr_keypair") mr_lib.scp(os.environ.get("HOME") + "/.ssh/config", "root@" + instance.public_dns_name + ":.ssh/config")
def distribute_public(self, filename): for instance in self.workers.instances: instance_filename = "root@" + instance.public_dns_name + ":" + filename mr_lib.scp(filename, instance_filename)
def send(self, filename, worker_number): instance = self.workers.instances[worker_number] instance_filename = "root@" + instance.public_dns_name + ":" + filename mr_lib.scp(filename, instance_filename)