Beispiel #1
0
    def create(filename, partitions, partitioner, 
                          fail_silently=False, tar_filename=None):
        """
        Create a new partition.
        filename: input filename
        partitions: the number of partitions to create
        partitioner: a function from key to partition number
        fail_silently: when set, will swallow any exception encountered when 
                       opening the input file
        tar_filename: The filename used to create the partition archive
        """
        make_filename = lambda partition: 'part-%05d' % partition
        make_path = lambda partition: '%s/%s' % (directory, 
                                                  make_filename(partition))
        nodes = map(lambda _: [], xrange(partitions))
        stream = None

        # Open the file, swallow if flag set
        try:
            stream = open(filename)
        except IOError:
            if not fail_silently: raise
            else: return None
            
        # For each line in the file, save it to its proper partition
        try:
            for line in stream:
                node = TabSeparatedNodeSerializer.deserialize(line)
                partition = partitioner(node.address)
                nodes[partition].append(node)
        finally:
            if stream: stream.close()

        # Create a temporary file
        directory = tempfile.mkdtemp()

        # For each partition, write its dataset
        for partition, partition_nodes in enumerate(nodes):
            stream = open(make_path(partition), "w")
            try:
                for node in sorted(partition_nodes, key=lambda n: n.address):
                    stream.write(\
                        TabSeparatedNodeSerializer.serialize(node) + '\n')
            finally:
                if stream: stream.close()
        
        # Compress the resulting partitions
        tar_filename = tar_filename or tempfile.mktemp(prefix='partition', 
                                                       suffix='.tar.gz')
        tar = tarfile.open(tar_filename, "w:gz")
        try: map(lambda p: tar.add(make_path(p), make_filename(p)), 
                  xrange(partitions))
        finally: tar.close()

        # Remove the scratch files
        map(lambda p: os.remove(make_path(p)), xrange(partitions))

        return tar_filename
    def partition_filename(self):
        """ Gets the partition filename associated with this reducer """

        if "_partition_filename" not in self.__dict__ or \
                self._partition_filename is None:
            self._partition_filename = \
                PartitionUtilities.get_partition_filename(
                    self.current_partition, self.options.partitions,
                    lambda key: self.partition(key), 
                    lambda line: TabSeparatedNodeSerializer.deserialize(line)\
                                  .address)
        
        return self._partition_filename
Beispiel #3
0
    def partition_filename(self):
        """ Gets the partition filename associated with this reducer """

        if "_partition_filename" not in self.__dict__ or \
                self._partition_filename is None:
            self._partition_filename = \
                PartitionUtilities.get_partition_filename(
                    self.current_partition, self.options.partitions,
                    lambda key: self.partition(key),
                    lambda line: TabSeparatedNodeSerializer.deserialize(line)\
                                  .address)

        return self._partition_filename
Beispiel #4
0
    def execute(filename, network, nodes_to_infect, hit_list_size):
        """
        Create a new network with the given filename.
        filename: output filename
        network: the network address space under consideration
        nodes_to_infect: the number of nodes to mark initially-infected
        hit_list_size: the initial hit-list size for infected nodes
        """
        try:
            # Create our list of vulnerable nodes
            with tempfile.NamedTemporaryFile('w', delete=False) as file:
                for host in CreateVulnerableHosts.execute(network, 
                                                           nodes_to_infect):
                    file.write(TabSeparatedNodeSerializer.serialize(host)+'\n')

            # Then run a map/reduce job that marks some nodes as infected
            with CreateHitLists(args=['--size',str(hit_list_size), file.name])\
                    .make_runner() as runner:
                runner.run()
                with open(filename, 'w') as output:
                    map(output.write, runner.stream_output())
        finally:
            os.remove(file.name)
Beispiel #5
0
    def execute(filename, network, nodes_to_infect, hit_list_size):
        """
        Create a new network with the given filename.
        filename: output filename
        network: the network address space under consideration
        nodes_to_infect: the number of nodes to mark initially-infected
        hit_list_size: the initial hit-list size for infected nodes
        """
        try:
            # Create our list of vulnerable nodes
            with tempfile.NamedTemporaryFile('w', delete=False) as file:
                for host in CreateVulnerableHosts.execute(
                        network, nodes_to_infect):
                    file.write(
                        TabSeparatedNodeSerializer.serialize(host) + '\n')

            # Then run a map/reduce job that marks some nodes as infected
            with CreateHitLists(args=['--size',str(hit_list_size), file.name])\
                    .make_runner() as runner:
                runner.run()
                with open(filename, 'w') as output:
                    map(output.write, runner.stream_output())
        finally:
            os.remove(file.name)