def test_list(self): io = DataIOHDFS() hdfs_path = '/user/test' tempdir = tempfile.mkdtemp() handle1, fullpath1 = tempfile.mkstemp(dir=tempdir) handle2, fullpath2 = tempfile.mkstemp(dir=tempdir) io.copy_to_local(fullpath1, hdfs_path) io.copy_to_local(fullpath2, hdfs_path) for i in io.list(hdfs_path): self.assertTrue(i[6] == fullpath2 or i[6] == fullpath1)
def test_copy_to_local(self): io = DataIOHDFS() # Create a file on HDFS tempdir = tempfile.mkdtemp() handle, fullpath = tempfile.mkstemp(dir=tempdir) hdfs_path = '/user/test' io.copy_from_local(fullpath, hdfs_path) # Copy back to the filesystem tempdir = tempfile.mkdtemp() handle, fullpath = tempfile.mkstemp(dir=tempdir) io.copy_to_local(fullpath, hdfs_path) self.assertTrue(os.path.exists(fullpath))
def test_copy_from_local(self): io = DataIOHDFS() # Create a file on HDFS tempdir = tempfile.mkdtemp() handle, fullpath = tempfile.mkstemp(dir=tempdir) hdfs_path = '/user/test' io.copy_from_local(fullpath, hdfs_path) filename = os.path.basename(fullpath) # Check the hdfs path exists_in_hdfs = False for item in io.list(hdfs_path): if str(item) == filename: exists_in_hdfs = True self.assertTrue(exists_in_hdfs)
def ingest(): """ Handles form data ingestion. """ # redirect to home with a message # return render_template('about.html') label = request.form['phraseInput'] text = request.form['labelInput'] # model = request.form['modelInput'] with open(HDFS_BUFFER_FILE, 'a') as f: # TODO - use control char as separator f.write(str(label) + ':' + str(text) + '\n') # Flush the file to HDFS_STAGE if it exceeds MAX_BUFFER_SIZE # TODO - This should be done in a separate thread there'll # TODO - also need to be locking on the file if (os.stat(HDFS_BUFFER_FILE).st_size >= MAX_BUFFER_SIZE): DataIOHDFS().copy_from_local(HDFS_BUFFER_FILE, HDFS_STAGE) os.remove(HDFS_BUFFER_FILE) return redirect(url_for('home'))