Exemple #1
0
    def test_list(self):

        io = DataIOHDFS()
        hdfs_path = '/user/test'

        tempdir = tempfile.mkdtemp()
        handle1, fullpath1 = tempfile.mkstemp(dir=tempdir)
        handle2, fullpath2 = tempfile.mkstemp(dir=tempdir)

        io.copy_to_local(fullpath1, hdfs_path)
        io.copy_to_local(fullpath2, hdfs_path)

        for i in io.list(hdfs_path):
            self.assertTrue(i[6] == fullpath2 or i[6] == fullpath1)
Exemple #2
0
    def test_list(self):

        io = DataIOHDFS()
        hdfs_path = '/user/test'

        tempdir = tempfile.mkdtemp()
        handle1, fullpath1 = tempfile.mkstemp(dir=tempdir)
        handle2, fullpath2 = tempfile.mkstemp(dir=tempdir)

        io.copy_to_local(fullpath1, hdfs_path)
        io.copy_to_local(fullpath2, hdfs_path)

        for i in io.list(hdfs_path):
            self.assertTrue(i[6] == fullpath2 or i[6] == fullpath1)
Exemple #3
0
    def test_copy_to_local(self):

        io = DataIOHDFS()

        # Create a file on HDFS
        tempdir = tempfile.mkdtemp()
        handle, fullpath = tempfile.mkstemp(dir=tempdir)
        hdfs_path = '/user/test'
        io.copy_from_local(fullpath, hdfs_path)

        # Copy back to the filesystem
        tempdir = tempfile.mkdtemp()
        handle, fullpath = tempfile.mkstemp(dir=tempdir)
        io.copy_to_local(fullpath, hdfs_path)

        self.assertTrue(os.path.exists(fullpath))
Exemple #4
0
    def test_copy_from_local(self):

        io = DataIOHDFS()

        # Create a file on HDFS
        tempdir = tempfile.mkdtemp()
        handle, fullpath = tempfile.mkstemp(dir=tempdir)
        hdfs_path = '/user/test'
        io.copy_from_local(fullpath, hdfs_path)
        filename = os.path.basename(fullpath)

        # Check the hdfs path
        exists_in_hdfs = False
        for item in io.list(hdfs_path):
            if str(item) == filename:
                exists_in_hdfs = True
        self.assertTrue(exists_in_hdfs)
Exemple #5
0
    def test_copy_from_local(self):

        io = DataIOHDFS()

        # Create a file on HDFS
        tempdir = tempfile.mkdtemp()
        handle, fullpath = tempfile.mkstemp(dir=tempdir)
        hdfs_path = '/user/test'
        io.copy_from_local(fullpath, hdfs_path)
        filename = os.path.basename(fullpath)

        # Check the hdfs path
        exists_in_hdfs = False
        for item in io.list(hdfs_path):
            if str(item) == filename:
                exists_in_hdfs = True
        self.assertTrue(exists_in_hdfs)
Exemple #6
0
    def test_copy_to_local(self):

        io = DataIOHDFS()

        # Create a file on HDFS
        tempdir = tempfile.mkdtemp()
        handle, fullpath = tempfile.mkstemp(dir=tempdir)
        hdfs_path = '/user/test'
        io.copy_from_local(fullpath, hdfs_path)

        # Copy back to the filesystem
        tempdir = tempfile.mkdtemp()
        handle, fullpath = tempfile.mkstemp(dir=tempdir)
        io.copy_to_local(fullpath, hdfs_path)

        self.assertTrue(os.path.exists(fullpath))
Exemple #7
0
def ingest():
    """
    Handles form data ingestion.
    """
    # redirect to home with a message
    # return render_template('about.html')

    label = request.form['phraseInput']
    text = request.form['labelInput']
    # model = request.form['modelInput']

    with open(HDFS_BUFFER_FILE, 'a') as f:
        # TODO - use control char as separator
        f.write(str(label) + ':' + str(text) + '\n')

        # Flush the file to HDFS_STAGE if it exceeds MAX_BUFFER_SIZE
        #   TODO - This should be done in a separate thread there'll
        #   TODO - also need to be locking on the file
        if (os.stat(HDFS_BUFFER_FILE).st_size >= MAX_BUFFER_SIZE):
            DataIOHDFS().copy_from_local(HDFS_BUFFER_FILE, HDFS_STAGE)
            os.remove(HDFS_BUFFER_FILE)

    return redirect(url_for('home'))