Example #1
0
 def __init__(self):
     self.sourceDir = 'imagesbackup'
     self.destDir = 'imagesdir'
     self.filesystem = Filesystem()
     if not self.filesystem.checkDirExists(self.destDir):
         self.filesystem.makeDir(self.destDir)
     assert self.filesystem.checkDirExists(self.destDir), \
         self.destDir + ' does not exist'
Example #2
0
    def get_filesystem(self):
        """ Get a filesystem object.

        Does not use a request.

        :return: Filesystem object linked to this session.
        """
        return Filesystem(self.rest_interface.get_copy())
Example #3
0
 def __init__(self, mountpoint, bootstrap, host_port):
     os.system("mkdir -p %s" % syscalls.LOCALSTORAGE_DIR)
     sock = listening_socket(host_port)
     response = one_time_message(bootstrap, method='join', port=host_port)
     filesystem = Filesystem(bootstrap)
     thread = Thread(target=listen, args=(sock, ))
     thread.daemon = True
     thread.start()
     atexit.register(shutdown, bootstrap)
     fuse = FUSE(filesystem, mountpoint, foreground=True)
Example #4
0
    def __init__(self, logger, gen, use_dfs=False, dfs_conf=None):
        """
        Initialize a WorkQueue instance
        @param logger a logger object
        @param gen a generator
        @param use_dfs boolean indicating whether to use a DFS or not
        @param dfs_conf a dictionary containing the necessary parameters for
                        the DFS Master initialization.
        """
        self.logger = logger
        self.generator = gen
        self.dead_queue = []
        self.last_tag = 0
        self.use_dfs = use_dfs

        if use_dfs and not DFS_AVAILABLE:
            raise Exception("You need to install fsdfs in order to use the" \
                            " distributed mode. Otherwise just toggle it "  \
                            "off from the configuration file")
        elif use_dfs:
            self.datadir = dfs_conf['datadir']
            self.fs = Filesystem(dfs_conf)
Example #5
0
def main():
    print 'Import Filesystem and _File'
    from filesystem import Filesystem
    from myfile import _File

    print 'Creating filesystem'
    fs = Filesystem()
    print 'Creating directory /usr'
    fs.mkdir('/usr')
    print 'Creating directory /usr/local/bin'
    fs.mkdir('/usr/local/bin')
    print 'Creating file'
    _file = _File()
    print 'Writing to file'
    _file.write(0, 'blah blah blah')
    print 'Saving file to /usr/local/bin/file'
    fs.write(_file, '/usr/local/bin/file')
    print 'Saving file to /file'
    fs.write(_file, '/file')
    print 'Deleting /usr/local/bin/file'
    fs.delete('/usr/local/bin/file')
    print 'Deleting /file'
    fs.delete('/file')
    print 'Deleting /usr recursively'
    fs.delete_recursive('/usr')
    print 'All done'
Example #6
0
    def __init__(self, fconf, master_id, worker_id):
        super(Worker, self).__init__("Worker")

        finished = False
        self.comm = MPI.COMM_WORLD.Get_parent()

        # Here we also need to handle the configuration file somehow
        self.conf = conf = json.load(open(fconf))
        self.use_dfs = use_dfs = conf['dfs-enabled']

        self.datadir = self.conf['datadir']
        self.input_prefix = self.conf['input-prefix']
        self.output_prefix = self.conf['output-prefix']

        if use_dfs and not DFS_AVAILABLE:
            raise Exception("You need to install fsdfs in order to use the" \
                            " distributed mode. Otherwise just toggle it "  \
                            "off from the configuration file")
        elif use_dfs:
            dconf = conf['dfs-conf']

            host = '%s:%d' % (conf['dfs-host'],
                              conf['dfs-startport'] + int(worker_id))

            self.datadir = os.path.join(
                self.datadir,
                'master-{:06d}'.format(int(master_id)),
                'worker-{:06d}'.format(int(worker_id))
            )
            self.info("Creating directory structure in %s" % self.datadir)

            if os.path.exists(self.datadir):
                shutil.rmtree(self.datadir)

            os.makedirs(self.datadir)
            os.makedirs(os.path.join(self.datadir, self.input_prefix))
            os.makedirs(os.path.join(self.datadir, self.output_prefix))

            dconf['host'] = host
            dconf['datadir'] = self.datadir

            self.info('Starting DFS client on %s' % host)

            self.fs = Filesystem(dconf)
            self.fs.start()

        # Here we need somehow to override the default scheme in case of DFS
        conf['datadir'] = self.datadir

        self.master_id = int(master_id)
        self.worker_id = int(worker_id)

        self.mapper = self.extract_cls(conf['map-module'], 'Mapper')(conf)
        self.reducer = self.extract_cls(conf['reduce-module'], 'Reducer')(conf)

        # We provide a VFS abstraction
        self.mapper.setup(self)
        self.reducer.setup(self)

        while not finished:
            self.comm.send(Message(MSG_AVAILABLE, 0, None), dest=0)
            msg = self.comm.recv()

            if msg.command == MSG_COMPUTE_MAP:
                info, result = self.mapper.execute(msg.result)

                msg = Message(MSG_FINISHED_MAP, msg.tag, result)
                msg.info = info

                self.info("Map performance: %.2f" % \
                           (info[0] / (1024 ** 2 * info[1])))

                self.comm.send(msg, dest=0)

            elif msg.command == MSG_COMPUTE_REDUCE:
                info, result = self.reducer.execute(msg.result)

                msg = Message(MSG_FINISHED_REDUCE, msg.tag, result)
                msg.info = info

                self.info("Reduce performance: %.2f" % \
                          (info[0] / (1024 ** 2 * info[1])))

                self.comm.send(msg, dest=0)

            elif msg.command == MSG_SLEEP:
                time.sleep(msg.result)

            elif msg.command == MSG_QUIT:
                finished = True

        if self.use_dfs:
            self.info("Stopping DFS client")
            self.fs.stop()
            self.info("Stopped")
Example #7
0
class Worker(Logger):
    def __init__(self, fconf, master_id, worker_id):
        super(Worker, self).__init__("Worker")

        finished = False
        self.comm = MPI.COMM_WORLD.Get_parent()

        # Here we also need to handle the configuration file somehow
        self.conf = conf = json.load(open(fconf))
        self.use_dfs = use_dfs = conf['dfs-enabled']

        self.datadir = self.conf['datadir']
        self.input_prefix = self.conf['input-prefix']
        self.output_prefix = self.conf['output-prefix']

        if use_dfs and not DFS_AVAILABLE:
            raise Exception("You need to install fsdfs in order to use the" \
                            " distributed mode. Otherwise just toggle it "  \
                            "off from the configuration file")
        elif use_dfs:
            dconf = conf['dfs-conf']

            host = '%s:%d' % (conf['dfs-host'],
                              conf['dfs-startport'] + int(worker_id))

            self.datadir = os.path.join(
                self.datadir,
                'master-{:06d}'.format(int(master_id)),
                'worker-{:06d}'.format(int(worker_id))
            )
            self.info("Creating directory structure in %s" % self.datadir)

            if os.path.exists(self.datadir):
                shutil.rmtree(self.datadir)

            os.makedirs(self.datadir)
            os.makedirs(os.path.join(self.datadir, self.input_prefix))
            os.makedirs(os.path.join(self.datadir, self.output_prefix))

            dconf['host'] = host
            dconf['datadir'] = self.datadir

            self.info('Starting DFS client on %s' % host)

            self.fs = Filesystem(dconf)
            self.fs.start()

        # Here we need somehow to override the default scheme in case of DFS
        conf['datadir'] = self.datadir

        self.master_id = int(master_id)
        self.worker_id = int(worker_id)

        self.mapper = self.extract_cls(conf['map-module'], 'Mapper')(conf)
        self.reducer = self.extract_cls(conf['reduce-module'], 'Reducer')(conf)

        # We provide a VFS abstraction
        self.mapper.setup(self)
        self.reducer.setup(self)

        while not finished:
            self.comm.send(Message(MSG_AVAILABLE, 0, None), dest=0)
            msg = self.comm.recv()

            if msg.command == MSG_COMPUTE_MAP:
                info, result = self.mapper.execute(msg.result)

                msg = Message(MSG_FINISHED_MAP, msg.tag, result)
                msg.info = info

                self.info("Map performance: %.2f" % \
                           (info[0] / (1024 ** 2 * info[1])))

                self.comm.send(msg, dest=0)

            elif msg.command == MSG_COMPUTE_REDUCE:
                info, result = self.reducer.execute(msg.result)

                msg = Message(MSG_FINISHED_REDUCE, msg.tag, result)
                msg.info = info

                self.info("Reduce performance: %.2f" % \
                          (info[0] / (1024 ** 2 * info[1])))

                self.comm.send(msg, dest=0)

            elif msg.command == MSG_SLEEP:
                time.sleep(msg.result)

            elif msg.command == MSG_QUIT:
                finished = True

        if self.use_dfs:
            self.info("Stopping DFS client")
            self.fs.stop()
            self.info("Stopped")

    def extract_cls(self, mname, fname):
        module = load_module(mname)
        return getattr(module, fname)

    def pull_remote_files(self, reduce_idx, file_ids):
        """
        Pull a set of files from the global DFS
        @param reduce_idx the reducer ID
        @param file_ids an iterable object containing integers (they will be
                        casted to int())
        """
        if not self.use_dfs:
            return

        for fileid in file_ids:
            fname = "output-r{:06d}-p{:018d}".format(reduce_idx, int(fileid))
            fname = os.path.join(self.output_prefix, fname)

            full_path = os.path.join(self.datadir, fname)
            self.info("Checking %s" % full_path)

            if os.path.exists(full_path):
                self.info("Skipping %s. It is already present" % fname)
                continue

            self.info("Worker worker_id=%d is downloading file '%s'" % \
                      (self.worker_id, fname))

            downloaded = False

            while not downloaded:
                try:
                    downloaded = self.fs.downloadFile(fname)
                except:
                    self.info("Failed to download %s. Retrying in 2 sec" % fname)
                    sleep(2)

    def pull_remote_file(self, inp):
        """
        Pull a file from the global DFS
        @param inp a tuple in the form (file name, file id)
        """
        filename, fileid = inp

        if self.use_dfs:
            self.info("Worker worker_id=%d is downloading file '%s'" % \
                      (self.worker_id, filename))
            downloaded = False

            while not downloaded:
                try:
                    downloaded = self.fs.downloadFile(filename)
                except:
                    self.info("Failed to download %s. Retrying in 2 sec" % fname)
                    sleep(2)

        return (os.path.join(self.datadir, filename), fileid)

    def push_local_file(self, fname, push=False):
        """
        Push a local file into the global DFS
        @param fname the file to import
        @param push if True the file will be pushed on the master.
        """
        if not self.use_dfs:
            return

        fname = os.path.join(self.output_prefix, fname)
        self.info("Pushing file '%s' into global DFS" % fname)
        self.fs.importFile(os.path.join(self.datadir, fname), fname)

        if push:
            ret = self.fs.pushFile(fname)
            self.info("Pushing returned %s" % str(ret))
Example #8
0
 def __init__(self):
     Filesystem.__init__(self)
Example #9
0
class WorkQueue(object):
    """
    The object is able to merge a generator and a queue and trasparently expose
    a simple interface for retrieving objects. The generator is prioritized
    with respect to the queue, which is used as a backup in some sense.
    """
    def __init__(self, logger, gen, use_dfs=False, dfs_conf=None):
        """
        Initialize a WorkQueue instance
        @param logger a logger object
        @param gen a generator
        @param use_dfs boolean indicating whether to use a DFS or not
        @param dfs_conf a dictionary containing the necessary parameters for
                        the DFS Master initialization.
        """
        self.logger = logger
        self.generator = gen
        self.dead_queue = []
        self.last_tag = 0
        self.use_dfs = use_dfs

        if use_dfs and not DFS_AVAILABLE:
            raise Exception("You need to install fsdfs in order to use the" \
                            " distributed mode. Otherwise just toggle it "  \
                            "off from the configuration file")
        elif use_dfs:
            self.datadir = dfs_conf['datadir']
            self.fs = Filesystem(dfs_conf)

    def push(self, item):
        """
        Push an item in the dead_queue. Please beware that it is not possible
        to push None objects
        @param item the item you want to push
        """
        if item is None:
            raise ValueError("Cannot push None in the queue")

        self.dead_queue.append(item)

    def next(self):
        """
        Extract the next value from the WorkQueue
        @return an object or None if the retrieve is not possible
        """
        self.last_tag += 1

        try:
            # Here value is a tuple (path to file, file id)
            fname, fid = self.generator.next()

            if self.use_dfs:
                self.logger.info("Publishing file '%s' from '%s'" % \
                                 (fname, self.datadir))

                self.fs.importFile(os.path.join(self.datadir, fname), fname)

            return WorkerStatus(TYPE_MAP, self.last_tag, (fname, fid))

        except StopIteration:
            if self.dead_queue:
                value = self.dead_queue.pop(0)
                return WorkerStatus(TYPE_MAP, self.last_tag, value)
            else:
                return None

    def pop(self):
        """
        An alias for the next method
        @return an object
        """
        return self.next()
Example #10
0
class Reset:
    def __init__(self):
        self.sourceDir = 'imagesbackup'
        self.destDir = 'imagesdir'
        self.filesystem = Filesystem()
        if not self.filesystem.checkDirExists(self.destDir):
            self.filesystem.makeDir(self.destDir)
        assert self.filesystem.checkDirExists(self.destDir), \
            self.destDir + ' does not exist'

    def fill(self):
        assert self.filesystem.checkDirExists(self.sourceDir), \
            self.sourceDir + ' does not exist'
        self.sourceImages = self.filesystem.listJpegs(self.sourceDir)
        for image in self.sourceImages:
            self.filesystem.copy([self.sourceDir, image],
                                 [self.destDir, image])

    def empty(self, path='', removeDir=True):
        if path == '':
            path = self.destDir
        path = self.filesystem.joinPath(path)
        if not self.filesystem.checkDirExists(path):
            return
        self.destImages = self.filesystem.listJpegs(path)
        for image in self.destImages:
            self.filesystem.removeFile([path, image])
        if removeDir:
            self.filesystem.removeDir(path)