def test_copy_files(): cluster = pseudo_hdfs4.shared_cluster() try: c = make_logged_in_client() user = User.objects.get(username='******') prefix = '/tmp/test_copy_files' if cluster.fs.exists(prefix): cluster.fs.rmtree(prefix) # Jars in various locations deployment_dir = '%s/workspace' % prefix external_deployment_dir = '%s/deployment' % prefix jar_1 = '%s/udf1.jar' % prefix jar_2 = '%s/lib/udf2.jar' % prefix jar_3 = '%s/udf3.jar' % deployment_dir jar_4 = '%s/lib/udf4.jar' % deployment_dir # Doesn't move jar_5 = 'udf5.jar' jar_6 = 'lib/udf6.jar' # Doesn't move cluster.fs.mkdir(prefix) cluster.fs.create(jar_1) cluster.fs.create(jar_2) cluster.fs.create(jar_3) cluster.fs.create(jar_4) cluster.fs.create(deployment_dir + '/' + jar_5) cluster.fs.create(deployment_dir + '/' + jar_6) class MockJob(): XML_FILE_NAME = 'workflow.xml' def __init__(self): self.deployment_dir = deployment_dir self.nodes = [ Node({'id': '1', 'type': 'mapreduce', 'properties': {'jar_path': jar_1}}), Node({'id': '2', 'type': 'mapreduce', 'properties': {'jar_path': jar_2}}), Node({'id': '3', 'type': 'java', 'properties': {'jar_path': jar_3}}), Node({'id': '4', 'type': 'java', 'properties': {'jar_path': jar_4}}), # Workspace relative paths Node({'id': '5', 'type': 'java', 'properties': {'jar_path': jar_5}}), Node({'id': '6', 'type': 'java', 'properties': {'jar_path': jar_6}}) ] submission = Submission(user, job=MockJob(), fs=cluster.fs, jt=cluster.jt) submission._copy_files(deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'}) submission._copy_files(external_deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'}) assert_true(cluster.fs.exists(deployment_dir + '/workflow.xml'), deployment_dir) assert_true(cluster.fs.exists(deployment_dir + '/job.properties'), deployment_dir) # All sources still there assert_true(cluster.fs.exists(jar_1)) assert_true(cluster.fs.exists(jar_2)) assert_true(cluster.fs.exists(jar_3)) assert_true(cluster.fs.exists(jar_4)) assert_true(cluster.fs.exists(deployment_dir + '/' + jar_5)) assert_true(cluster.fs.exists(deployment_dir + '/' + jar_6)) # Lib deployment_dir = deployment_dir + '/lib' external_deployment_dir = external_deployment_dir + '/lib' list_dir_workspace = cluster.fs.listdir(deployment_dir) list_dir_deployement = cluster.fs.listdir(external_deployment_dir) # All destinations there assert_true(cluster.fs.exists(deployment_dir + '/udf1.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf2.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf3.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf4.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf5.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf6.jar'), list_dir_workspace) assert_true(cluster.fs.exists(external_deployment_dir + '/udf1.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf2.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf3.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf4.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf5.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf6.jar'), list_dir_deployement) stats_udf1 = cluster.fs.stats(deployment_dir + '/udf1.jar') stats_udf2 = cluster.fs.stats(deployment_dir + '/udf2.jar') stats_udf3 = cluster.fs.stats(deployment_dir + '/udf3.jar') stats_udf4 = cluster.fs.stats(deployment_dir + '/udf4.jar') stats_udf5 = cluster.fs.stats(deployment_dir + '/udf5.jar') stats_udf6 = cluster.fs.stats(deployment_dir + '/udf6.jar') submission._copy_files('%s/workspace' % prefix, "<xml>My XML</xml>", {'prop1': 'val1'}) assert_not_equal(stats_udf1['fileId'], cluster.fs.stats(deployment_dir + '/udf1.jar')['fileId']) assert_not_equal(stats_udf2['fileId'], cluster.fs.stats(deployment_dir + '/udf2.jar')['fileId']) assert_not_equal(stats_udf3['fileId'], cluster.fs.stats(deployment_dir + '/udf3.jar')['fileId']) assert_equal(stats_udf4['fileId'], cluster.fs.stats(deployment_dir + '/udf4.jar')['fileId']) assert_not_equal(stats_udf5['fileId'], cluster.fs.stats(deployment_dir + '/udf5.jar')['fileId']) assert_equal(stats_udf6['fileId'], cluster.fs.stats(deployment_dir + '/udf6.jar')['fileId']) # Test _create_file() submission._create_file(deployment_dir, 'test.txt', data='Test data') assert_true(cluster.fs.exists(deployment_dir + '/test.txt'), list_dir_workspace) finally: try: cluster.fs.rmtree(prefix) except: LOG.exception('failed to remove %s' % prefix)
def test_copy_files(): cluster = pseudo_hdfs4.shared_cluster() try: c = make_logged_in_client() user = User.objects.get(username='******') ensure_home_directory(cluster.fs, user) prefix = '/tmp/test_copy_files' if cluster.fs.exists(prefix): cluster.fs.rmtree(prefix) # Jars in various locations deployment_dir = '%s/workspace' % prefix external_deployment_dir = '%s/deployment' % prefix jar_1 = '%s/udf1.jar' % prefix jar_2 = '%s/lib/udf2.jar' % prefix jar_3 = '%s/udf3.jar' % deployment_dir jar_4 = '%s/lib/udf4.jar' % deployment_dir # Doesn't move jar_5 = 'udf5.jar' jar_6 = 'lib/udf6.jar' # Doesn't move cluster.fs.mkdir(prefix) cluster.fs.create(jar_1) cluster.fs.create(jar_2) cluster.fs.create(jar_3) cluster.fs.create(jar_4) cluster.fs.create(deployment_dir + '/' + jar_5) cluster.fs.create(deployment_dir + '/' + jar_6) class MockJob(object): XML_FILE_NAME = 'workflow.xml' def __init__(self): self.deployment_dir = deployment_dir self.nodes = [ Node({ 'id': '1', 'type': 'mapreduce', 'properties': { 'jar_path': jar_1 } }), Node({ 'id': '2', 'type': 'mapreduce', 'properties': { 'jar_path': jar_2 } }), Node({ 'id': '3', 'type': 'java', 'properties': { 'jar_path': jar_3 } }), Node({ 'id': '4', 'type': 'java', 'properties': { 'jar_path': jar_4 } }), # Workspace relative paths Node({ 'id': '5', 'type': 'java', 'properties': { 'jar_path': jar_5 } }), Node({ 'id': '6', 'type': 'java', 'properties': { 'jar_path': jar_6 } }) ] submission = Submission(user, job=MockJob(), fs=cluster.fs, jt=cluster.jt) submission._copy_files(deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'}) submission._copy_files(external_deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'}) assert_true(cluster.fs.exists(deployment_dir + '/workflow.xml'), deployment_dir) assert_true(cluster.fs.exists(deployment_dir + '/job.properties'), deployment_dir) # All sources still there assert_true(cluster.fs.exists(jar_1)) assert_true(cluster.fs.exists(jar_2)) assert_true(cluster.fs.exists(jar_3)) assert_true(cluster.fs.exists(jar_4)) assert_true(cluster.fs.exists(deployment_dir + '/' + jar_5)) assert_true(cluster.fs.exists(deployment_dir + '/' + jar_6)) # Lib deployment_dir = deployment_dir + '/lib' external_deployment_dir = external_deployment_dir + '/lib' if USE_LIBPATH_FOR_JARS.get(): assert_true(jar_1 in submission.properties['oozie.libpath']) assert_true(jar_2 in submission.properties['oozie.libpath']) assert_true(jar_3 in submission.properties['oozie.libpath']) assert_true(jar_4 in submission.properties['oozie.libpath']) print(deployment_dir + '/' + jar_5) assert_true((deployment_dir + '/' + jar_5) in submission.properties['oozie.libpath'], submission.properties['oozie.libpath']) assert_true((deployment_dir + '/' + jar_6) in submission.properties['oozie.libpath'], submission.properties['oozie.libpath']) else: list_dir_workspace = cluster.fs.listdir(deployment_dir) list_dir_deployement = cluster.fs.listdir(external_deployment_dir) # All destinations there assert_true(cluster.fs.exists(deployment_dir + '/udf1.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf2.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf3.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf4.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf5.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf6.jar'), list_dir_workspace) assert_true( cluster.fs.exists(external_deployment_dir + '/udf1.jar'), list_dir_deployement) assert_true( cluster.fs.exists(external_deployment_dir + '/udf2.jar'), list_dir_deployement) assert_true( cluster.fs.exists(external_deployment_dir + '/udf3.jar'), list_dir_deployement) assert_true( cluster.fs.exists(external_deployment_dir + '/udf4.jar'), list_dir_deployement) assert_true( cluster.fs.exists(external_deployment_dir + '/udf5.jar'), list_dir_deployement) assert_true( cluster.fs.exists(external_deployment_dir + '/udf6.jar'), list_dir_deployement) stats_udf1 = cluster.fs.stats(deployment_dir + '/udf1.jar') stats_udf2 = cluster.fs.stats(deployment_dir + '/udf2.jar') stats_udf3 = cluster.fs.stats(deployment_dir + '/udf3.jar') stats_udf4 = cluster.fs.stats(deployment_dir + '/udf4.jar') stats_udf5 = cluster.fs.stats(deployment_dir + '/udf5.jar') stats_udf6 = cluster.fs.stats(deployment_dir + '/udf6.jar') submission._copy_files('%s/workspace' % prefix, "<xml>My XML</xml>", {'prop1': 'val1'}) assert_not_equal( stats_udf1['fileId'], cluster.fs.stats(deployment_dir + '/udf1.jar')['fileId']) assert_not_equal( stats_udf2['fileId'], cluster.fs.stats(deployment_dir + '/udf2.jar')['fileId']) assert_not_equal( stats_udf3['fileId'], cluster.fs.stats(deployment_dir + '/udf3.jar')['fileId']) assert_equal( stats_udf4['fileId'], cluster.fs.stats(deployment_dir + '/udf4.jar')['fileId']) assert_not_equal( stats_udf5['fileId'], cluster.fs.stats(deployment_dir + '/udf5.jar')['fileId']) assert_equal( stats_udf6['fileId'], cluster.fs.stats(deployment_dir + '/udf6.jar')['fileId']) # Test _create_file() submission._create_file(deployment_dir, 'test.txt', data='Test data') assert_true(cluster.fs.exists(deployment_dir + '/test.txt'), list_dir_workspace) finally: try: cluster.fs.rmtree(prefix) except: LOG.exception('failed to remove %s' % prefix)
def test_copy_files(): cluster = pseudo_hdfs4.shared_cluster() try: c = make_logged_in_client() user = User.objects.get(username='******') prefix = '/tmp/test_copy_files' if cluster.fs.exists(prefix): cluster.fs.rmtree(prefix) # Jars in various locations deployment_dir = '%s/workspace' % prefix external_deployment_dir = '%s/deployment' % prefix jar_1 = '%s/udf1.jar' % prefix jar_2 = '%s/lib/udf2.jar' % prefix jar_3 = '%s/udf3.jar' % deployment_dir jar_4 = '%s/lib/udf4.jar' % deployment_dir # Never move cluster.fs.mkdir(prefix) cluster.fs.create(jar_1) cluster.fs.create(jar_2) cluster.fs.create(jar_3) cluster.fs.create(jar_4) class MockNode(): def __init__(self, jar_path): self.jar_path = jar_path class MockJob(): XML_FILE_NAME = 'workflow.xml' def __init__(self): self.node_list = [ MockNode(jar_1), MockNode(jar_2), MockNode(jar_3), MockNode(jar_4), ] submission = Submission(user, job=MockJob(), fs=cluster.fs, jt=cluster.jt) submission._copy_files(deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'}) submission._copy_files(external_deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'}) assert_true(cluster.fs.exists(deployment_dir + '/workflow.xml'), deployment_dir) assert_true(cluster.fs.exists(deployment_dir + '/job.properties'), deployment_dir) # All sources still there assert_true(cluster.fs.exists(jar_1)) assert_true(cluster.fs.exists(jar_2)) assert_true(cluster.fs.exists(jar_3)) assert_true(cluster.fs.exists(jar_4)) deployment_dir = deployment_dir + '/lib' external_deployment_dir = external_deployment_dir + '/lib' list_dir_workspace = cluster.fs.listdir(deployment_dir) list_dir_deployement = cluster.fs.listdir(external_deployment_dir) # All destinations there assert_true(cluster.fs.exists(deployment_dir + '/udf1.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf2.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf3.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf4.jar'), list_dir_workspace) assert_true(cluster.fs.exists(external_deployment_dir + '/udf1.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf2.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf3.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf4.jar'), list_dir_deployement) stats_udf1 = cluster.fs.stats(deployment_dir + '/udf1.jar') stats_udf2 = cluster.fs.stats(deployment_dir + '/udf2.jar') stats_udf3 = cluster.fs.stats(deployment_dir + '/udf3.jar') stats_udf4 = cluster.fs.stats(deployment_dir + '/udf4.jar') submission._copy_files('%s/workspace' % prefix, "<xml>My XML</xml>", {'prop1': 'val1'}) assert_not_equal(stats_udf1['fileId'], cluster.fs.stats(deployment_dir + '/udf1.jar')['fileId']) assert_not_equal(stats_udf2['fileId'], cluster.fs.stats(deployment_dir + '/udf2.jar')['fileId']) assert_not_equal(stats_udf3['fileId'], cluster.fs.stats(deployment_dir + '/udf3.jar')['fileId']) assert_equal(stats_udf4['fileId'], cluster.fs.stats(deployment_dir + '/udf4.jar')['fileId']) finally: try: cluster.fs.rmtree(prefix) except: pass