Exemple #1
0
 def get_dict_value(self, dict_name, key):
     instance = self.workers.instances[hash(key) % self.size]
     instance_filename = "root@" + instance.public_dns_name + ":" + dict_name
     mr_lib.scp(instance_filename, dict_name)
     d = mr_lib.read_pickle(dict_name)
     value = d[key]
     os.remove(dict_name)
     return value
Exemple #2
0
 def wait_until_task_done(self, flag_name):
     done = False
     while not done:
         time.sleep(2)
         done = True
         for instance in self.workers.instances:
             mr_lib.scp("root@" + instance.public_dns_name + ":flags.mr", "flags.mr")
             if not mr_lib.get_flags()[flag_name]:
                 print instance.public_dns_name + " not done."
                 done = False
             else:
                 print instance.public_dns_name + " done."
Exemple #3
0
    def __init__(self, n):

        # set the size attribute
        self.size = n

        # set up an EC2 connection, and grab an Ubuntu 8.04 image (http://alestic.com)
        connection = boto.connect_ec2()
        image = connection.get_image("ami-1c5db975")

        # create a keypair to use with the image, save to disk, and set permissions
        # so ssh will be happy
        self.keypair = connection.create_key_pair("mr_keypair")
        mr_lib.write_file(self.keypair.material, mr_lib.mr_keypair_filename())
        os.system("chmod 600 " + mr_lib.mr_keypair_filename())

        # tell EC2 to start the instances running, set the self.workers attribute to the
        # corresponding reservation, and wait for all the workers to start running
        self.workers = image.run(n, n, "mr_keypair")
        for instance in self.workers.instances:
            instance.update()
            while instance.state != u"running":
                instance.update()
                time.sleep(5)

        # Delay before we start distributing files, so all instances are running properly.
        time.sleep(10)

        # distribute a list of all the private ip addresses
        private_ip_list = [instance.private_dns_name for instances in self.workers.instances]
        mr_lib.write_pickle(private_ip_list, "cluster_description.mr")
        self.distribute_public("cluster_description.mr")
        for j in xrange(n):
            mr_lib.write_pickle([j, self.workers.instances[j].private_dns_name], "my_details.mr")
            self.send("my_details.mr", j)

        # distribute the files necessary to run map and mapreduce jobs
        self.distribute_public("map.py")
        self.distribute_public("map_combine.py")
        self.distribute_public("reduce.py")
        self.distribute_public("mr_lib.py")

        # Distribute the ssh keypairs and config file
        for instance in self.workers.instances:
            mr_lib.scp(mr_lib.mr_keypair_filename(), "root@" + instance.public_dns_name + ":.ssh/id_rsa-mr_keypair")
            mr_lib.ssh("root@" + instance.public_dns_name, "chmod 600 /root/.ssh/id_rsa-mr_keypair")
            mr_lib.scp(os.environ.get("HOME") + "/.ssh/config", "root@" + instance.public_dns_name + ":.ssh/config")
Exemple #4
0
 def distribute_public(self, filename):
     for instance in self.workers.instances:
         instance_filename = "root@" + instance.public_dns_name + ":" + filename
         mr_lib.scp(filename, instance_filename)
Exemple #5
0
 def send(self, filename, worker_number):
     instance = self.workers.instances[worker_number]
     instance_filename = "root@" + instance.public_dns_name + ":" + filename
     mr_lib.scp(filename, instance_filename)