def test_delete_dir(self): local = LocalFS(os.path.dirname(os.path.realpath(__file__))) hdfs_file = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) local.copy_to_hdfs(hdfs_file.path) self.assertTrue(hdfs_file.exists(), "Target HDFS dir does not exists") hdfs_file.delete(recursive=True) self.assertFalse(hdfs_file.exists(), "Target HDFS dir was not deleted")
def test_file_size(self): local = LocalFS(os.path.realpath(__file__)) hdfs_file = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) try: local.copy_to_hdfs(hdfs_file.path) self.assertTrue(hdfs_file.exists(), "Local file was not copied to HDFS") self.assertEqual(hdfs_file.size(), local.size()) finally: hdfs_file.delete()
def test_dir_size(self): local_basedir = os.path.dirname(os.path.realpath(__file__)) local = LocalFS(os.path.join(local_basedir, "resources", "test_dir_size")) hdfs_file = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) try: local.copy_to_hdfs(hdfs_file.path) self.assertTrue(hdfs_file.exists(), "Local file was not copied to HDFS") expected_fsize = local.size() self.assertEqual(hdfs_file.size(), expected_fsize) finally: hdfs_file.delete(recursive=True)
def test_merge(self): basedir = os.path.dirname(os.path.realpath(__file__)) local = LocalFS(os.path.join(basedir, "resources", "test_merge")) hdfs_file = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) merged_file = LocalFS(os.path.join(basedir, "resources", "merged.txt")) try: local.copy_to_hdfs(hdfs_file.path) self.assertTrue(hdfs_file.exists(), "Local file was not copied to HDFS") hdfs_file.merge(merged_file.path) self.assertTrue(merged_file.exists(), "merged file was not copied to local fs") finally: hdfs_file.delete_directory() merged_file.delete()
# may be used to endorse or promote products derived from this software without # specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # See the NOTICE file and the LICENSE file distributed with this work # for additional information regarding copyright ownership and licensing. # import os from merlin.fs.hdfs import HDFS from merlin.fs.localfs import LocalFS if __name__ == "__main__": _basedir = HDFS(os.path.join('/tmp', 'scd.active')) _basedir.create_directory() _scd_active_snapshot = LocalFS( os.path.join(os.path.dirname(__file__), 'resources', 'scd.active.csv')) _scd_active_snapshot.copy_to_hdfs(_basedir.path)
# specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # See the NOTICE file and the LICENSE file distributed with this work # for additional information regarding copyright ownership and licensing. # import os from merlin.fs.hdfs import HDFS from merlin.fs.localfs import LocalFS if __name__ == "__main__": _basedir = HDFS(os.path.join('/tmp', 'scd.active')) _basedir.create_directory() _scd_active_snapshot = LocalFS(os.path.join(os.path.dirname(__file__), 'resources', 'scd.active.csv')) _scd_active_snapshot.copy_to_hdfs(_basedir.path)
ftp.create(make_dir=True) # upload file to directory on FTP ftp.upload(local_path=os.path.join(os.path.dirname(__file__), "resources/file_12.11.2014_.txt")) ftp.upload(local_path=os.path.join(os.path.dirname(__file__), "resources/file_13.11.2014_.txt")) ftp.upload(local_path=os.path.join(os.path.dirname(__file__), "resources/file_14.11.2014_.txt")) # upload file to HDFS/ create directories hdfs_file = HDFS("{0}/raw/12.11.2014".format(BASE_DIR)) hdfs_file.create(directory=True) local_file = LocalFS(path=os.path.join(os.path.dirname(__file__), 'resources/file_12.11.2014_.txt')) local_file.copy_to_hdfs(hdfs_path="{0}/raw/12.11.2014".format(BASE_DIR)) hdfs_file = HDFS("{0}/raw/13.11.2014".format(BASE_DIR)) hdfs_file.create(directory=True) local_file = LocalFS(path=os.path.join(os.path.dirname(__file__), 'resources/file_13.11.2014_.txt')) local_file.copy_to_hdfs(hdfs_path="{0}/raw/13.11.2014".format(BASE_DIR)) # create empty local directory 'tmp' in folder 'resources' local_file = LocalFS( path=os.path.join(os.path.dirname(__file__), 'resources/tmp')) if local_file.exists(): local_file.delete_directory() local_file.create(directory=True) # create HIVE external table with partition
if ftp.exists(): ftp.delete(recursive=True) ftp.create(make_dir=True) # upload file to directory on FTP ftp.upload(local_path=os.path.join(os.path.dirname(__file__), "resources/file_12.11.2014_.txt")) ftp.upload(local_path=os.path.join(os.path.dirname(__file__), "resources/file_13.11.2014_.txt")) ftp.upload(local_path=os.path.join(os.path.dirname(__file__), "resources/file_14.11.2014_.txt")) # upload file to HDFS/ create directories hdfs_file = HDFS("{0}/raw/12.11.2014".format(BASE_DIR)) hdfs_file.create(directory=True) local_file = LocalFS(path=os.path.join(os.path.dirname(__file__), 'resources/file_12.11.2014_.txt')) local_file.copy_to_hdfs(hdfs_path="{0}/raw/12.11.2014".format(BASE_DIR)) hdfs_file = HDFS("{0}/raw/13.11.2014".format(BASE_DIR)) hdfs_file.create(directory=True) local_file = LocalFS(path=os.path.join(os.path.dirname(__file__), 'resources/file_13.11.2014_.txt')) local_file.copy_to_hdfs(hdfs_path="{0}/raw/13.11.2014".format(BASE_DIR)) # create empty local directory 'tmp' in folder 'resources' local_file = LocalFS(path=os.path.join(os.path.dirname(__file__), 'resources/tmp')) if local_file.exists(): local_file.delete_directory() local_file.create(directory=True) # create HIVE external table with partition