def setUp(self): self.client = make_logged_in_client(username="******", groupname="default", recreate=True, is_superuser=False) self.client_not_me = make_logged_in_client(username="******", groupname="default", recreate=True, is_superuser=False) self.user = User.objects.get(username="******") self.user_not_me = User.objects.get(username="******") # Beware: Monkey patch HS2API Mock API if not hasattr( notebook.connectors.hiveserver2, 'original_HS2Api'): # Could not monkey patch base.get_api notebook.connectors.hiveserver2.original_HS2Api = notebook.connectors.hiveserver2.HS2Api notebook.connectors.hiveserver2.HS2Api = MockedApi originalCluster.get_hdfs() self.original_fs = originalCluster.FS_CACHE["default"] originalCluster.FS_CACHE["default"] = MockFs() grant_access("test", "default", "notebook") grant_access("test", "default", "beeswax") grant_access("not_perm_user", "default", "notebook") grant_access("not_perm_user", "default", "beeswax") add_permission('test', 'has_adls', permname='adls_access', appname='filebrowser')
def test_update_properties(): finish = [] finish.append(MR_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True)) finish.append(YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True)) try: properties = { 'user.name': 'hue', 'test.1': 'http://localhost/test?test1=test&test2=test' } final_properties = properties.copy() submission = Submission(None, properties=properties, oozie_id='test') assert_equal(properties, submission.properties) submission._update_properties('jtaddress', 'deployment-directory') assert_equal(final_properties, submission.properties) cluster.clear_caches() fs = cluster.get_hdfs() jt = cluster.get_next_ha_mrcluster()[1] final_properties = properties.copy() final_properties.update({ 'jobTracker': 'jtaddress', 'nameNode': fs.fs_defaultfs }) submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=jt) assert_equal(properties, submission.properties) submission._update_properties('jtaddress', 'deployment-directory') assert_equal(final_properties, submission.properties) finish.append( HDFS_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('namenode')) finish.append( MR_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('jobtracker')) cluster.clear_caches() fs = cluster.get_hdfs() jt = cluster.get_next_ha_mrcluster()[1] final_properties = properties.copy() final_properties.update({ 'jobTracker': 'jobtracker', 'nameNode': 'namenode' }) submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=jt) assert_equal(properties, submission.properties) submission._update_properties('jtaddress', 'deployment-directory') assert_equal(final_properties, submission.properties) finally: cluster.clear_caches() for reset in finish: reset()
def test_update_properties(self): finish = [] finish.append(MR_CLUSTERS.set_for_testing({'default': {}})) finish.append(MR_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True)) finish.append(YARN_CLUSTERS.set_for_testing({'default': {}})) finish.append(YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True)) try: properties = { 'user.name': 'hue', 'test.1': 'http://localhost/test?test1=test&test2=test', 'nameNode': 'hdfs://curacao:8020', 'jobTracker': 'jtaddress', 'security_enabled': False } final_properties = properties.copy() submission = Submission(None, properties=properties, oozie_id='test', fs=MockFs()) assert_equal(properties, submission.properties) submission._update_properties('jtaddress', 'deployment-directory') assert_equal(final_properties, submission.properties) cluster.clear_caches() fs = cluster.get_hdfs() jt = cluster.get_next_ha_mrcluster()[1] final_properties = properties.copy() final_properties.update({ 'jobTracker': 'jtaddress', 'nameNode': fs.fs_defaultfs }) submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=jt) assert_equal(properties, submission.properties) submission._update_properties('jtaddress', 'deployment-directory') assert_equal(final_properties, submission.properties) finish.append(HDFS_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('namenode')) finish.append(MR_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('jobtracker')) cluster.clear_caches() fs = cluster.get_hdfs() jt = cluster.get_next_ha_mrcluster()[1] final_properties = properties.copy() final_properties.update({ 'jobTracker': 'jobtracker', 'nameNode': 'namenode' }) submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=jt) assert_equal(properties, submission.properties) submission._update_properties('jtaddress', 'deployment-directory') assert_equal(final_properties, submission.properties) finally: cluster.clear_caches() for reset in finish: reset()
def handle_noargs(self, **options): fs = cluster.get_hdfs() create_directories(fs, [REMOTE_SAMPLE_DIR.get()]) remote_dir = REMOTE_SAMPLE_DIR.get() # Copy examples binaries for name in os.listdir(LOCAL_SAMPLE_DIR.get()): local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name) remote_data_dir = fs.join(remote_dir, name) LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir) # Copy sample data local_dir = paths.get_thirdparty_root("sample_data") remote_data_dir = fs.join(remote_dir, 'data') LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir) # Load jobs sample_user = install_sample_user() management.call_command('loaddata', 'initial_pig_examples.json', verbosity=2) Document.objects.sync() if USE_NEW_EDITOR.get(): # Get or create sample user directories home_dir = Directory.objects.get_home_directory(sample_user) examples_dir, created = Directory.objects.get_or_create( parent_directory=home_dir, owner=sample_user, name=Document2.EXAMPLES_DIR) try: # Don't overwrite doc = Document.objects.get(object_id=1100713) doc2 = Document2.objects.get(owner=sample_user, name=doc.name, type='link-pigscript') # If document exists but has been trashed, recover from Trash if doc2.parent_directory != examples_dir: doc2.parent_directory = examples_dir doc2.save() except Document.DoesNotExist: LOG.warn('Sample pig script document not found.') except Document2.DoesNotExist: if doc.content_object: data = doc.content_object.dict data.update({'content_type': doc.content_type.model, 'object_id': doc.object_id}) data = json.dumps(data) doc2 = Document2.objects.create( owner=sample_user, parent_directory=examples_dir, name=doc.name, type='link-pigscript', description=doc.description, data=data) LOG.info('Successfully installed sample link to pig script: %s' % (doc2.name,)) # Share with default group examples_dir.share(sample_user, Document2Permission.READ_PERM, groups=[get_default_user_group()])
def handle_noargs(self, **options): self.user = install_sample_user() self.fs = cluster.get_hdfs() LOG.info(_("Creating sample directory '%s' in HDFS") % REMOTE_SAMPLE_DIR.get()) create_directories(self.fs, [REMOTE_SAMPLE_DIR.get()]) remote_dir = REMOTE_SAMPLE_DIR.get() # Copy examples binaries for name in os.listdir(LOCAL_SAMPLE_DIR.get()): local_dir = self.fs.join(LOCAL_SAMPLE_DIR.get(), name) remote_data_dir = self.fs.join(remote_dir, name) LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) self.fs.do_as_user(self.fs.DEFAULT_USER, self.fs.copyFromLocal, local_dir, remote_data_dir) # Copy sample data local_dir = LOCAL_SAMPLE_DATA_DIR.get() remote_data_dir = self.fs.join(remote_dir, 'data') LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) self.fs.do_as_user(self.fs.DEFAULT_USER, self.fs.copyFromLocal, local_dir, remote_data_dir) # Load jobs LOG.info(_("Installing examples...")) if ENABLE_V2.get(): management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2) self.install_examples() Document.objects.sync()
def handle_noargs(self, **options): fs = cluster.get_hdfs() remote_dir = create_directories(fs) # Copy examples binaries for name in os.listdir(LOCAL_SAMPLE_DIR.get()): local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name) remote_data_dir = fs.join(remote_dir, name) LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir) # Copy sample data local_dir = LOCAL_SAMPLE_DATA_DIR.get() remote_data_dir = fs.join(remote_dir, 'data') LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir) # Load jobs USERNAME = '******' try: sample_user = User.objects.get(username=USERNAME) except User.DoesNotExist: sample_user = User.objects.create(username=USERNAME, password='******', is_active=False, is_superuser=False, id=1100713, pk=1100713) management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2)
def handle_noargs(self, **options): fs = cluster.get_hdfs() remote_dir = create_directories(fs) # Copy examples binaries for name in os.listdir(LOCAL_SAMPLE_DIR.get()): local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name) remote_data_dir = fs.join(remote_dir, name) LOG.info( _('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir }) fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir) # Copy sample data local_dir = LOCAL_SAMPLE_DATA_DIR.get() remote_data_dir = fs.join(remote_dir, 'data') LOG.info( _('Copying data %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir }) fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir) # Load jobs sample, created = User.objects.get_or_create(username='******') management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2) from oozie.models import Job Job.objects.filter(owner__id=1100713).update(owner=sample) # 11OOZIE
def config_validator(user): # dbms is dependent on beeswax.conf (this file) # import in method to avoid circular dependency from beeswax.server import dbms res = [] try: if not 'test' in sys.argv: # Avoid tests hanging server = dbms.get(user) server.get_databases() except: res.append( (NICE_NAME, _("The application won't work without a running HiveServer2."))) try: from hadoop import cluster warehouse = beeswax.hive_site.get_metastore_warehouse_dir() fs = cluster.get_hdfs() fs.stats(warehouse) except Exception: return [(NICE_NAME, _('Failed to access Hive warehouse: %s') % warehouse)] return res
def handle_noargs(self, **options): fs = cluster.get_hdfs() create_directories(fs, [REMOTE_SAMPLE_DIR.get()]) remote_dir = REMOTE_SAMPLE_DIR.get() # Copy examples binaries for name in os.listdir(LOCAL_SAMPLE_DIR.get()): local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name) remote_data_dir = fs.join(remote_dir, name) LOG.info( _('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir }) fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir) # Copy sample data local_dir = paths.get_thirdparty_root("sample_data") remote_data_dir = fs.join(remote_dir, 'data') LOG.info( _('Copying data %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir }) fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir) # Load jobs install_sample_user() management.call_command('loaddata', 'initial_pig_examples.json', verbosity=2) Document.objects.sync()
def handle_noargs(self, **options): fs = cluster.get_hdfs() create_directories(fs, [REMOTE_SAMPLE_DIR.get()]) remote_dir = REMOTE_SAMPLE_DIR.get() # Copy examples binaries for name in os.listdir(LOCAL_SAMPLE_DIR.get()): local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name) remote_data_dir = fs.join(remote_dir, name) LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir) # Copy sample data local_dir = LOCAL_SAMPLE_DATA_DIR.get() remote_data_dir = fs.join(remote_dir, 'data') LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir) # Load jobs USERNAME = '******' try: sample_user = User.objects.get(username=USERNAME) except User.DoesNotExist: sample_user = User.objects.create(username=USERNAME, password='******', is_active=False, is_superuser=False, id=1100713, pk=1100713) management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2)
def config_validator(user): # dbms is dependent on beeswax.conf (this file) # import in method to avoid circular dependency from beeswax.server import dbms res = [] try: if not 'test' in sys.argv: # Avoid tests hanging server = dbms.get(user) server.get_databases() except: msg = "The application won't work without a running HiveServer2." LOG.exception(msg) res.append((NICE_NAME, _(msg))) try: from hadoop import cluster warehouse = beeswax.hive_site.get_metastore_warehouse_dir() fs = cluster.get_hdfs() fs.stats(warehouse) except Exception: msg = 'Failed to access Hive warehouse: %s' LOG.exception(msg % warehouse) return [(NICE_NAME, _(msg) % warehouse)] return res
def process_view(self, request, view_func, view_args, view_kwargs): """ Sets request.fs and request.jt on every request to point to the configured filesystem. """ request.fs_ref = request.REQUEST.get('fs', view_kwargs.get('fs', 'default')) if "fs" in view_kwargs: del view_kwargs["fs"] try: request.fs = cluster.get_hdfs(request.fs_ref) except KeyError: raise KeyError( _('Cannot find HDFS called "%(fs_ref)s".') % {'fs_ref': request.fs_ref}) if request.user.is_authenticated(): if request.fs is not None: request.fs.setuser(request.user.username) request.jt = cluster.get_default_mrcluster() if request.jt is not None: request.jt.setuser(request.user.username) else: request.jt = None
def process_view(self, request, view_func, view_args, view_kwargs): """ Sets request.fs and request.jt on every request to point to the configured filesystem. """ request.fs_ref = request.REQUEST.get('fs', view_kwargs.get('fs', 'default')) if "fs" in view_kwargs: del view_kwargs["fs"] try: request.fs = cluster.get_hdfs(request.fs_ref) except KeyError: raise KeyError( _('Cannot find HDFS called "%(fs_ref)s".') % {'fs_ref': request.fs_ref}) if request.user.is_authenticated(): if request.fs is not None: request.fs.setuser(request.user.username) request.jt = cluster.get_default_mrcluster( ) # Deprecated, only there for MR1 if request.jt is not None: request.jt.setuser(request.user.username) else: request.jt = None
def get_children_data(ensemble, namespace, read_only=True): hdfs = cluster.get_hdfs() if hdfs is None: raise PopupException(_('No [hdfs] configured in hue.ini.')) if hdfs.security_enabled: sasl_server_principal = PRINCIPAL_NAME.get() else: sasl_server_principal = None zk = KazooClient(hosts=ensemble, read_only=read_only, sasl_server_principal=sasl_server_principal) zk.start() children_data = [] children = zk.get_children(namespace) for node in children: data, stat = zk.get("%s/%s" % (namespace, node)) children_data.append(data) zk.stop() return children_data
def test_filebrowser(self): sys.stdout.write("Checking HDFS access\n") fs = cluster.get_hdfs() try: _do_newfile_save(fs, "/tmp/smoke_fb.test", "Test", "utf-8") fs.remove("/tmp/smoke_fb.test") except Exception, ex: sys.stderr.write("[Hdfs/WebHdfs] Exception: %s \n" % ex)
def handle_noargs(self, **options): remote_fs = cluster.get_hdfs() if hasattr(remote_fs, "setuser"): remote_fs.setuser("hue", ["supergroup"]) logging.info("Using remote fs: %s" % str(remote_fs)) # Copy over examples/ and script_templates/ directories for dirname in ("examples", "script_templates"): local_dir = os.path.join(jobsub.conf.LOCAL_DATA_DIR.get(), dirname) remote_dir = posixpath.join(jobsub.conf.REMOTE_DATA_DIR.get(), dirname) copy_dir(local_dir, remote_fs, remote_dir) # Copy over sample data copy_dir(jobsub.conf.SAMPLE_DATA_DIR.get(), remote_fs, posixpath.join(jobsub.conf.REMOTE_DATA_DIR.get(), "sample_data")) # Also copy over Hadoop examples and streaming jars local_src = hadoop.conf.HADOOP_EXAMPLES_JAR.get() if local_src is None: raise Exception('Failed to locate the Hadoop example jar') remote_dst = posixpath.join(jobsub.conf.REMOTE_DATA_DIR.get(), "examples", "hadoop-examples.jar") copy_file(local_src, remote_fs, remote_dst) # Write out the models too fixture_path = os.path.join(os.path.dirname(__file__), "..", "..", "fixtures", "example_data.xml") examples = django.core.serializers.deserialize("xml", open(fixture_path)) sample_user = None sample_job_designs = [] for example in examples: if isinstance(example.object, User): sample_user = example elif isinstance(example.object, jobsub.models.JobDesign): sample_job_designs.append(example) else: raise Exception("Unexpected fixture type.") if sample_user is None: raise Exception("Expected sample user fixture.") # Create the sample user if it doesn't exist try: sample_user.object = User.objects.get(username=sample_user.object.username) except User.DoesNotExist: sample_user.object.pk = None sample_user.object.id = None sample_user.save() for j in sample_job_designs: j.object.id = None j.object.pk = None j.object.owner_id = sample_user.object.id j.save() # Upon success, write to the database try: entry = jobsub.models.CheckForSetup.objects.get(id=1) except jobsub.models.CheckForSetup.DoesNotExist: entry = jobsub.models.CheckForSetup(id=1) entry.setup_run = True entry.save()
def handle_noargs(self, **options): remote_fs = cluster.get_hdfs() if hasattr(remote_fs, "setuser"): remote_fs.setuser(remote_fs.superuser) LOG.info("Using remote fs: %s" % str(remote_fs)) # Copy over examples/ and script_templates/ directories for dirname in ("examples", "script_templates"): local_dir = os.path.join(jobsub.conf.LOCAL_DATA_DIR.get(), dirname) remote_dir = posixpath.join(jobsub.conf.REMOTE_DATA_DIR.get(), dirname) copy_dir(local_dir, remote_fs, remote_dir) # Copy over sample data copy_dir(jobsub.conf.SAMPLE_DATA_DIR.get(), remote_fs, posixpath.join(jobsub.conf.REMOTE_DATA_DIR.get(), "sample_data")) # Also copy over Hadoop examples and streaming jars local_src = hadoop.conf.HADOOP_EXAMPLES_JAR.get() if local_src is None: raise Exception('Failed to locate the Hadoop example jar') remote_dst = posixpath.join(jobsub.conf.REMOTE_DATA_DIR.get(), "examples", "hadoop-examples.jar") copy_file(local_src, remote_fs, remote_dst) # Write out the models too fixture_path = os.path.join(os.path.dirname(__file__), "..", "..", "fixtures", "example_data.xml") examples = django.core.serializers.deserialize("xml", open(fixture_path)) sample_user = None sample_job_designs = [] for example in examples: if isinstance(example.object, User): sample_user = example elif isinstance(example.object, jobsub.models.JobDesign): sample_job_designs.append(example) else: raise Exception("Unexpected fixture type.") if sample_user is None: raise Exception("Expected sample user fixture.") # Create the sample user if it doesn't exist try: sample_user.object = User.objects.get(username=sample_user.object.username) except User.DoesNotExist: sample_user.object.pk = None sample_user.object.id = None sample_user.save() for j in sample_job_designs: j.object.id = None j.object.pk = None j.object.owner_id = sample_user.object.id j.save() # Upon success, write to the database try: entry = jobsub.models.CheckForSetup.objects.get(id=1) except jobsub.models.CheckForSetup.DoesNotExist: entry = jobsub.models.CheckForSetup(id=1) entry.setup_run = True entry.save()
def setUp(self): self.client = make_logged_in_client(username="******", groupname="default", recreate=True, is_superuser=False) self.client_not_me = make_logged_in_client(username="******", groupname="default", recreate=True, is_superuser=False) self.user = User.objects.get(username="******") self.user_not_me = User.objects.get(username="******") # Beware: Monkey patch HS2API Mock API if not hasattr(notebook.connectors.hiveserver2, 'original_HS2Api'): # Could not monkey patch base.get_api notebook.connectors.hiveserver2.original_HS2Api = notebook.connectors.hiveserver2.HS2Api notebook.connectors.hiveserver2.HS2Api = MockedApi originalCluster.get_hdfs() self.original_fs = originalCluster.FS_CACHE["default"] originalCluster.FS_CACHE["default"] = MockFs() grant_access("test", "default", "notebook") grant_access("not_perm_user", "default", "notebook")
def handle(self, *args, **options): fs = cluster.get_hdfs() create_directories(fs, [REMOTE_SAMPLE_DIR.get()]) remote_dir = REMOTE_SAMPLE_DIR.get() sample_user = install_sample_user() # Copy examples binaries for name in os.listdir(LOCAL_SAMPLE_DIR.get()): local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name) remote_data_dir = fs.join(remote_dir, name) LOG.info( _('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir }) fs.do_as_user(sample_user.username, fs.copyFromLocal, local_dir, remote_data_dir) # Copy sample data local_dir = paths.get_thirdparty_root("sample_data") remote_data_dir = fs.join(remote_dir, 'data') LOG.info( _('Copying data %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir }) fs.do_as_user(sample_user.username, fs.copyFromLocal, local_dir, remote_data_dir) # Initialize doc2, whether editor script or link doc2 = None # Install editor pig script without doc1 link LOG.info("Using Hue 4, will install pig editor sample.") doc2 = self.install_pig_script(sample_user) if USE_NEW_EDITOR.get(): # Get or create sample user directories LOG.info("Creating sample user directories.") home_dir = Directory.objects.get_home_directory(sample_user) examples_dir, created = Directory.objects.get_or_create( parent_directory=home_dir, owner=sample_user, name=Document2.EXAMPLES_DIR) # If document exists but has been trashed, recover from Trash if doc2 and doc2.parent_directory != examples_dir: doc2.parent_directory = examples_dir doc2.save() # Share with default group examples_dir.share(sample_user, Document2Permission.READ_PERM, groups=[get_default_user_group()])
def __init__(self): self.fs = cluster.get_hdfs("default") # Assumes /tmp exists and is 1777 self.fs_prefix = get_fs_prefix(self.fs) LOG.info("Using %s as FS root" % self.fs_prefix) # Might need more self.fs.do_as_user("test", self.fs.create_home_dir, "/user/test") self.fs.do_as_user("hue", self.fs.create_home_dir, "/user/hue")
def test_end_to_end(self): if not is_live_cluster(): raise SkipTest() fs = cluster.get_hdfs() collection_name = "test_collection" indexer = Indexer("test", fs) input_loc = "/tmp/test.csv" # upload the test file to hdfs fs.create(input_loc, data=IndexerTest.simpleCSVString, overwrite=True) # open a filestream for the file on hdfs stream = fs.open(input_loc) # guess the format of the file file_type_format = indexer.guess_format( {'file': { "stream": stream, "name": "test.csv" }}) field_types = indexer.guess_field_types({ "file": { "stream": stream, "name": "test.csv" }, "format": file_type_format }) format_ = field_types.copy() format_['format'] = file_type_format # find a field name available to use for the record's uuid unique_field = indexer.get_unique_field(format_) is_unique_generated = indexer.is_unique_generated(format_) # generate morphline morphline = indexer.generate_morphline_config(collection_name, format_, unique_field) schema_fields = indexer.get_kept_field_list(format_['columns']) if is_unique_generated: schema_fields += [{"name": unique_field, "type": "string"}] # create the collection from the specified fields collection_manager = CollectionManagerController("test") if collection_manager.collection_exists(collection_name): collection_manager.delete_collection(collection_name, None) collection_manager.create_collection(collection_name, schema_fields, unique_key_field=unique_field) # index the file indexer.run_morphline(collection_name, morphline, input_loc)
def _upload_to_hdfs(self, django_user, local_filepath, hdfs_root_destination, filename=None): fs = cluster.get_hdfs() if filename is None: filename = self.name hdfs_destination = '%s/%s' % (hdfs_root_destination, filename) LOG.info('Uploading local data %s to HDFS path "%s"' % (self.name, hdfs_destination)) fs.do_as_user(django_user, fs.copyFromLocal, local_filepath, hdfs_destination) return hdfs_destination
def __init__(self): self.fs = cluster.get_hdfs('default') # Assumes /tmp exists and is 1777 self.jt = None # Deprecated self.fs_prefix = get_fs_prefix(self.fs) LOG.info('Using %s as FS root' % self.fs_prefix) # Might need more self.fs.do_as_user('test', self.fs.create_home_dir, '/user/test') self.fs.do_as_user('hue', self.fs.create_home_dir, '/user/hue')
def load(self, django_user): """ Upload data to HDFS home of user then load (aka move) it into the Hive table (in the Hive metastore in HDFS). """ LOAD_HQL = \ """ LOAD DATA INPATH '%(filename)s' OVERWRITE INTO TABLE %(tablename)s """ fs = cluster.get_hdfs() if self.app_name == 'impala': # Because Impala does not have impersonation on by default, we use a public destination for the upload. from impala.conf import IMPERSONATION_ENABLED if not IMPERSONATION_ENABLED.get(): tmp_public = '/tmp/public_hue_examples' fs.do_as_user(django_user, fs.mkdir, tmp_public, '0777') hdfs_root_destination = tmp_public else: hdfs_root_destination = fs.do_as_user(django_user, fs.get_home_dir) hdfs_destination = os.path.join(hdfs_root_destination, self.name) LOG.info('Uploading local data %s to HDFS table "%s"' % (self.name, hdfs_destination)) fs.do_as_user(django_user, fs.copyFromLocal, self._contents_file, hdfs_destination) LOG.info('Loading data into table "%s"' % (self.name, )) hql = LOAD_HQL % {'tablename': self.name, 'filename': hdfs_destination} query = hql_query(hql) try: results = dbms.get(django_user, self.query_server).execute_and_wait(query) if not results: msg = _( 'Error loading table %(table)s: Operation timeout.') % { 'table': self.name } LOG.error(msg) raise InstallException(msg) except QueryServerException, ex: msg = _('Error loading table %(table)s: %(error)s.') % { 'table': self.name, 'error': ex } LOG.error(msg) raise InstallException(msg)
def get_configuration_statements(self): configuration = [] for f in self.file_resources: if not urlparse.urlsplit(f['path']).scheme: scheme = get_hdfs().fs_defaultfs else: scheme = '' configuration.append(render_to_string("hql_resource.mako", dict(type=f['type'], path=f['path'], scheme=scheme))) for f in self.functions: configuration.append(render_to_string("hql_function.mako", f)) return configuration
def handle_noargs(self, **options): self.user = install_sample_user() self.fs = cluster.get_hdfs() LOG.info(_("Creating sample directory '%s' in HDFS") % REMOTE_SAMPLE_DIR.get()) create_directories(self.fs, [REMOTE_SAMPLE_DIR.get()]) remote_dir = REMOTE_SAMPLE_DIR.get() # Copy examples binaries for name in os.listdir(LOCAL_SAMPLE_DIR.get()): local_dir = self.fs.join(LOCAL_SAMPLE_DIR.get(), name) remote_data_dir = self.fs.join(remote_dir, name) LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) self.fs.do_as_user(self.fs.DEFAULT_USER, self.fs.copyFromLocal, local_dir, remote_data_dir) # Copy sample data local_dir = LOCAL_SAMPLE_DATA_DIR.get() remote_data_dir = self.fs.join(remote_dir, 'data') LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) self.fs.do_as_user(self.fs.DEFAULT_USER, self.fs.copyFromLocal, local_dir, remote_data_dir) # Load jobs LOG.info(_("Installing examples...")) if ENABLE_V2.get(): management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2) # Get or create sample user directories home_dir = Directory.objects.get_home_directory(self.user) examples_dir, created = Directory.objects.get_or_create( parent_directory=home_dir, owner=self.user, name=Document2.EXAMPLES_DIR ) # Share oozie examples with default group oozie_examples = Document2.objects.filter( type__in=['oozie-workflow2', 'oozie-coordinator2', 'oozie-bundle2'], owner=self.user, parent_directory=None ) oozie_examples.update(parent_directory=examples_dir) examples_dir.share(self.user, Document2Permission.READ_PERM, groups=[get_default_user_group()]) self.install_examples() Document.objects.sync()
def install_sample_user(): """ Setup the de-activated sample user with a certain id. Do not create a user profile. """ try: user = auth_models.User.objects.get(username=SAMPLE_USERNAME) except auth_models.User.DoesNotExist: user = auth_models.User.objects.create(username=SAMPLE_USERNAME, password='******', is_active=False, is_superuser=False, id=1100713, pk=1100713) LOG.info('Installed a user called "%s"' % (SAMPLE_USERNAME,)) fs = cluster.get_hdfs() fs.do_as_user(SAMPLE_USERNAME, fs.create_home_dir) return user
def get_configuration_statements(self): configuration = [] for f in self.file_resources: if not urlparse.urlsplit(f['path']).scheme: scheme = get_hdfs().fs_defaultfs else: scheme = '' configuration.append('ADD %(type)s %(scheme)s%(path)s' % {'type': f['type'], 'path': f['path'], 'scheme': scheme}) for f in self.functions: configuration.append("CREATE TEMPORARY FUNCTION %(name)s AS '%(class_name)s'" % {'name': f['name'], 'class_name': f['class_name']}) return configuration
def install(self, django_user): if has_concurrency_support() and not self.is_transactional: LOG.info('Skipping table %s as non transactional' % self.name) return if not (has_concurrency_support() and self.is_transactional) and not cluster.get_hdfs(): raise PopupException('Requiring a File System to load its data') self.create(django_user) if self.partition_files: for partition_spec, filepath in list(self.partition_files.items()): self.load_partition(django_user, partition_spec, filepath, columns=self.columns) else: self.load(django_user) return True
def get_configuration_statements(self): configuration = [] for f in self.file_resources: if not urlparse.urlsplit(f['path']).scheme: scheme = get_hdfs().fs_defaultfs else: scheme = '' configuration.append('ADD %(type)s %(scheme)s%(path)s' % {'type': f['type'].upper(), 'path': f['path'], 'scheme': scheme}) for f in self.functions: configuration.append("CREATE TEMPORARY FUNCTION %(name)s AS '%(class_name)s'" % {'name': f['name'], 'class_name': f['class_name']}) return configuration
def handle_noargs(self, **options): self.user = install_sample_user() self.fs = cluster.get_hdfs() self.searcher = controller.CollectionManagerController(self.user) LOG.info(_("Installing twitter collection")) path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_twitter_demo/index_data.csv')) self._setup_collection_from_csv({ 'name': 'twitter_demo', 'fields': self._parse_fields(path), 'uniqueKeyField': 'id', 'df': 'text' }, path) LOG.info(_("Twitter collection successfully installed")) LOG.info(_("Installing yelp collection")) path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_yelp_demo/index_data.csv')) self._setup_collection_from_csv({ 'name': 'yelp_demo', 'fields': self._parse_fields(path), 'uniqueKeyField': 'id', 'df': 'text' }, path) LOG.info(_("Yelp collection successfully installed")) LOG.info(_("Installing jobs collection")) path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_jobs_demo/index_data.csv')) self._setup_collection_from_csv({ 'name': 'jobs_demo', 'fields': self._parse_fields(path), 'uniqueKeyField': 'id', 'df': 'description' }, path) LOG.info(_("Jobs collection successfully installed")) LOG.info(_("Installing logs collection")) path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_log_analytics_demo/index_data.csv')) self._setup_collection_from_csv({ 'name': 'log_analytics_demo', 'fields': self._parse_fields(path, fieldtypes={ 'region_code': 'string', 'referer': 'string' }), 'uniqueKeyField': 'id', 'df': 'record' }, path) LOG.info(_("Logs collection successfully installed"))
def get_configuration_statements(self): configuration = [] for f in self.file_resources: if not urlparse.urlsplit(f['path']).scheme: scheme = get_hdfs().fs_defaultfs else: scheme = '' configuration.append( render_to_string( "hql_resource.mako", dict(type=f['type'], path=f['path'], scheme=scheme))) for f in self.functions: configuration.append(render_to_string("hql_function.mako", f)) return configuration
def __init__(self, hosts=None, read_only=True): self.hosts = hosts if hosts else ENSEMBLE.get() self.read_only = read_only hdfs = cluster.get_hdfs() if hdfs is None: raise ZookeeperConfigurationException('No [hdfs] configured in hue.ini.') if hdfs.security_enabled: self.sasl_server_principal = PRINCIPAL_NAME.get() else: self.sasl_server_principal = None self.zk = KazooClient(hosts=self.hosts, read_only=self.read_only, sasl_server_principal=self.sasl_server_principal)
def handle(self, *args, **options): fs = cluster.get_hdfs() create_directories(fs, [REMOTE_SAMPLE_DIR.get()]) remote_dir = REMOTE_SAMPLE_DIR.get() sample_user = install_sample_user() # Copy examples binaries for name in os.listdir(LOCAL_SAMPLE_DIR.get()): local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name) remote_data_dir = fs.join(remote_dir, name) LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) fs.do_as_user(sample_user.username, fs.copyFromLocal, local_dir, remote_data_dir) # Copy sample data local_dir = paths.get_thirdparty_root("sample_data") remote_data_dir = fs.join(remote_dir, 'data') LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) fs.do_as_user(sample_user.username, fs.copyFromLocal, local_dir, remote_data_dir) # Initialize doc2, whether editor script or link doc2 = None # Install editor pig script without doc1 link LOG.info("Using Hue 4, will install pig editor sample.") doc2 = self.install_pig_script(sample_user) if USE_NEW_EDITOR.get(): # Get or create sample user directories LOG.info("Creating sample user directories.") home_dir = Directory.objects.get_home_directory(sample_user) examples_dir, created = Directory.objects.get_or_create( parent_directory=home_dir, owner=sample_user, name=Document2.EXAMPLES_DIR) # If document exists but has been trashed, recover from Trash if doc2 and doc2.parent_directory != examples_dir: doc2.parent_directory = examples_dir doc2.save() # Share with default group examples_dir.share(sample_user, Document2Permission.READ_PERM, groups=[get_default_user_group()])
def handle(self, *args, **options): fs = cluster.get_hdfs() fs.setuser(fs.DEFAULT_USER) if not fs.exists(UDF_PATH): fs.mkdir(UDF_PATH, 0777) for f in args: file_name = os.path.split(f)[-1] path = fs.join(UDF_PATH, file_name) fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, f, path) UDF.objects.create(url=path, file_name=file_name, owner=User.objects.get(id=1)) if not args: for f in fs.listdir(UDF_PATH): try: UDF.objects.get(file_name=f) except UDF.DoesNotExist: path = fs.join(UDF_PATH, f) UDF.objects.create(url=path, file_name=f, owner=User.objects.get(id=1))
def test_non_default_cluster(): NON_DEFAULT_NAME = 'non_default' old_caches = clear_sys_caches() reset = ( conf.HDFS_CLUSTERS.set_for_testing({NON_DEFAULT_NAME: {}}), conf.MR_CLUSTERS.set_for_testing({NON_DEFAULT_NAME: {}}), ) try: # This is indeed the only hdfs/mr cluster assert_equal(1, len(cluster.get_all_hdfs())) assert_true(cluster.get_hdfs(NON_DEFAULT_NAME)) cli = make_logged_in_client() # That we can get to a view without errors means that the middlewares work cli.get('/about') finally: for old_conf in reset: old_conf() restore_sys_caches(old_caches)
def _get_hdfs_root_destination(self, django_user, subdir=None): fs = cluster.get_hdfs() if self.app_name == 'impala': # Because Impala does not have impersonation on by default, we use a public destination for the upload. from impala.conf import IMPERSONATION_ENABLED if not IMPERSONATION_ENABLED.get(): tmp_public = '/tmp/public_hue_examples' if subdir: tmp_public += '/%s' % subdir fs.do_as_user(django_user, fs.mkdir, tmp_public, '0777') hdfs_root_destination = tmp_public else: hdfs_root_destination = fs.do_as_user(django_user, fs.get_home_dir) if subdir: hdfs_root_destination += '/%s' % subdir fs.do_as_user(django_user, fs.mkdir, hdfs_root_destination, '0777') return hdfs_root_destination
def test_non_default_cluster(): NON_DEFAULT_NAME = 'non_default' cluster.clear_caches() reset = ( conf.HDFS_CLUSTERS.set_for_testing({ NON_DEFAULT_NAME: { } }), conf.MR_CLUSTERS.set_for_testing({ NON_DEFAULT_NAME: { } }), ) try: # This is indeed the only hdfs/mr cluster assert_equal(1, len(cluster.get_all_hdfs())) assert_equal(1, len(cluster.all_mrclusters())) assert_true(cluster.get_hdfs(NON_DEFAULT_NAME)) assert_true(cluster.get_mrcluster(NON_DEFAULT_NAME)) cli = make_logged_in_client() # That we can get to a view without errors means that the middlewares work cli.get('/about') finally: for old_conf in reset: old_conf()
def install_sample_user(): """ Setup the de-activated sample user with a certain id. Do not create a user profile. """ try: user = auth_models.User.objects.get(username=SAMPLE_USERNAME) except auth_models.User.DoesNotExist: user = auth_models.User.objects.create(username=SAMPLE_USERNAME, password='******', is_active=False, is_superuser=False, id=1100713, pk=1100713) LOG.info('Installed a user called "%s"' % (SAMPLE_USERNAME, )) fs = cluster.get_hdfs() fs.do_as_user(SAMPLE_USERNAME, fs.create_home_dir) return user
def load(self, django_user): """ Upload data to HDFS home of user then load (aka move) it into the Hive table (in the Hive metastore in HDFS). """ LOAD_HQL = \ """ LOAD DATA INPATH '%(filename)s' OVERWRITE INTO TABLE %(tablename)s """ fs = cluster.get_hdfs() if self.app_name == 'impala': # Because Impala does not have impersonation on by default, we use a public destination for the upload. from impala.conf import IMPERSONATION_ENABLED if not IMPERSONATION_ENABLED.get(): tmp_public = '/tmp/public_hue_examples' fs.do_as_user(django_user, fs.mkdir, tmp_public, '0777') hdfs_root_destination = tmp_public else: hdfs_root_destination = fs.do_as_user(django_user, fs.get_home_dir) hdfs_destination = os.path.join(hdfs_root_destination, self.name) LOG.info('Uploading local data %s to HDFS table "%s"' % (self.name, hdfs_destination)) fs.do_as_user(django_user, fs.copyFromLocal, self._contents_file, hdfs_destination) LOG.info('Loading data into table "%s"' % (self.name,)) hql = LOAD_HQL % {'tablename': self.name, 'filename': hdfs_destination} query = hql_query(hql) try: results = dbms.get(django_user, self.query_server).execute_and_wait(query) if not results: msg = _('Error loading table %(table)s: Operation timeout.') % {'table': self.name} LOG.error(msg) raise InstallException(msg) except QueryServerException, ex: msg = _('Error loading table %(table)s: %(error)s.') % {'table': self.name, 'error': ex} LOG.error(msg) raise InstallException(msg)
def handle_noargs(self, **options): fs = cluster.get_hdfs() remote_dir = create_data_dir(fs) # Copy examples binaries for name in os.listdir(LOCAL_SAMPLE_DIR.get()): local_dir = posixpath.join(LOCAL_SAMPLE_DIR.get(), name) remote_data_dir = posixpath.join(remote_dir, name) LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) copy_dir(fs, local_dir, remote_data_dir) # Copy sample data local_dir = LOCAL_SAMPLE_DATA_DIR.get() remote_data_dir = posixpath.join(remote_dir, 'data') LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) copy_dir(fs, local_dir, remote_data_dir) # Load jobs management.call_command('loaddata', 'apps/oozie/src/oozie/fixtures/initial_data.json', verbosity=2)
def _get_hdfs_root_destination(self, django_user, subdir=None): fs = cluster.get_hdfs() hdfs_root_destination = None can_impersonate_hdfs = False if self.app_name == 'impala': # Impala can support impersonation, so use home instead of a public destination for the upload from impala.conf import IMPERSONATION_ENABLED can_impersonate_hdfs = IMPERSONATION_ENABLED.get() if can_impersonate_hdfs: hdfs_root_destination = fs.do_as_user(django_user, fs.get_home_dir) if subdir: hdfs_root_destination += '/%s' % subdir fs.do_as_user(django_user, fs.mkdir, hdfs_root_destination, '0777') else: tmp_public = '/tmp/public_hue_examples' if subdir: tmp_public += '/%s' % subdir fs.do_as_user(django_user, fs.mkdir, tmp_public, '0777') hdfs_root_destination = tmp_public return hdfs_root_destination
def handle_noargs(self, **options): fs = cluster.get_hdfs() create_directories(fs, [REMOTE_SAMPLE_DIR.get()]) remote_dir = REMOTE_SAMPLE_DIR.get() # Copy examples binaries for name in os.listdir(LOCAL_SAMPLE_DIR.get()): local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name) remote_data_dir = fs.join(remote_dir, name) LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir) # Copy sample data local_dir = paths.get_thirdparty_root("sample_data") remote_data_dir = fs.join(remote_dir, 'data') LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir) # Load jobs install_sample_user() management.call_command('loaddata', 'initial_pig_examples.json', verbosity=2) Document.objects.sync()
def handle_noargs(self, **options): fs = cluster.get_hdfs() sample_user = CreateSandboxUserCommand().handle_noargs() fs.setuser(sample_user) create_directories(fs, [REMOTE_SAMPLE_DIR.get()]) remote_dir = REMOTE_SAMPLE_DIR.get() # Copy examples binaries for name in os.listdir(LOCAL_SAMPLE_DIR.get()): local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name) remote_data_dir = fs.join(remote_dir, name) LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) fs.copyFromLocal(local_dir, remote_data_dir) # Copy sample data local_dir = LOCAL_SAMPLE_DATA_DIR.get() remote_data_dir = fs.join(remote_dir, 'data') LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) fs.copyFromLocal(local_dir, remote_data_dir) # Load jobs management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2)
def test_end_to_end(self): fs = cluster.get_hdfs() collection_name = "test_collection" indexer = Indexer("test", fs) input_loc = "/tmp/test.csv" # upload the test file to hdfs fs.create(input_loc, data=IndexerTest.simpleCSVString, overwrite=True) # open a filestream for the file on hdfs stream = fs.open(input_loc) # guess the format of the file file_type_format = indexer.guess_format({'file': {"stream": stream, "name": "test.csv"}}) field_types = indexer.guess_field_types({"file":{"stream": stream, "name": "test.csv"}, "format": file_type_format}) format_ = field_types.copy() format_['format'] = file_type_format # find a field name available to use for the record's uuid unique_field = indexer.get_uuid_name(format_) # generate morphline morphline = indexer.generate_morphline_config(collection_name, format_, unique_field) schema_fields = [{"name": unique_field, "type": "string"}] + indexer.get_kept_field_list(format_['columns']) # create the collection from the specified fields collection_manager = CollectionManagerController("test") if collection_manager.collection_exists(collection_name): collection_manager.delete_collection(collection_name, None) collection_manager.create_collection(collection_name, schema_fields, unique_key_field=unique_field) # index the file indexer.run_morphline(collection_name, morphline, input_loc)
def handle_noargs(self, **options): fs = cluster.get_hdfs() remote_dir = create_directories(fs) # Copy examples binaries for name in os.listdir(LOCAL_SAMPLE_DIR.get()): local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name) remote_data_dir = fs.join(remote_dir, name) LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir) # Copy sample data local_dir = LOCAL_SAMPLE_DATA_DIR.get() remote_data_dir = fs.join(remote_dir, 'data') LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir) # Load jobs sample, created = User.objects.get_or_create(username='******') management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2) from oozie.models import Job Job.objects.filter(owner__id=1100713).update(owner=sample) # 11OOZIE
def handle_noargs(self, **options): self.user = install_sample_user() self.fs = cluster.get_hdfs() LOG.info(_("Creating sample directory '%s' in HDFS") % REMOTE_SAMPLE_DIR.get()) create_directories(self.fs, [REMOTE_SAMPLE_DIR.get()]) remote_dir = REMOTE_SAMPLE_DIR.get() # Copy examples binaries for name in os.listdir(LOCAL_SAMPLE_DIR.get()): local_dir = self.fs.join(LOCAL_SAMPLE_DIR.get(), name) remote_data_dir = self.fs.join(remote_dir, name) LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) self.fs.do_as_user(self.user.username, self.fs.copyFromLocal, local_dir, remote_data_dir) # Copy sample data local_dir = LOCAL_SAMPLE_DATA_DIR.get() remote_data_dir = self.fs.join(remote_dir, 'data') LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir}) self.fs.do_as_user(self.user.username, self.fs.copyFromLocal, local_dir, remote_data_dir) # Get or create sample user directories home_dir = Directory.objects.get_home_directory(self.user) examples_dir, created = Directory.objects.get_or_create( parent_directory=home_dir, owner=self.user, name=Document2.EXAMPLES_DIR ) # Load jobs LOG.info(_("Installing examples...")) if ENABLE_V2.get(): management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2) if IS_HUE_4.get(): # Install editor oozie examples without doc1 link LOG.info("Using Hue 4, will install oozie editor samples.") example_jobs = [] example_jobs.append(self._install_mapreduce_example()) example_jobs.append(self._install_java_example()) example_jobs.append(self._install_spark_example()) example_jobs.append(self._install_pyspark_example()) # If documents exist but have been trashed, recover from Trash for doc in example_jobs: if doc is not None and doc.parent_directory != examples_dir: doc.parent_directory = examples_dir doc.save() elif USE_NEW_EDITOR.get(): # Install as link-workflow doc2 to old Job Designs docs = Document.objects.get_docs(self.user, Workflow).filter(owner=self.user) for doc in docs: if doc.content_object: data = doc.content_object.data_dict data.update({'content_type': doc.content_type.model, 'object_id': doc.object_id}) data = json.dumps(data) # Don't overwrite doc2, created = Document2.objects.get_or_create( owner=self.user, parent_directory=examples_dir, name=doc.name, type='link-workflow', description=doc.description, data=data ) LOG.info('Successfully installed sample link to jobsub: %s' % (doc2.name,)) # Share oozie examples with default group oozie_examples = Document2.objects.filter( type__in=['oozie-workflow2', 'oozie-coordinator2', 'oozie-bundle2'], owner=self.user, parent_directory=None ) oozie_examples.update(parent_directory=examples_dir) examples_dir.share(self.user, Document2Permission.READ_PERM, groups=[get_default_user_group()]) if not IS_HUE_4.get(): self.install_examples() Document.objects.sync()
def test_update_properties(self): finish = [] finish.append(MR_CLUSTERS.set_for_testing({'default': {}})) finish.append(MR_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True)) finish.append(YARN_CLUSTERS.set_for_testing({'default': {}})) finish.append(YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True)) try: properties = { 'user.name': 'hue', 'test.1': 'http://localhost/test?test1=test&test2=test', 'nameNode': 'hdfs://curacao:8020', 'jobTracker': 'jtaddress', 'security_enabled': False } final_properties = properties.copy() submission = Submission(None, properties=properties, oozie_id='test', fs=MockFs()) assert_equal(properties, submission.properties) submission._update_properties('jtaddress', 'deployment-directory') assert_equal(final_properties, submission.properties) clear_sys_caches() fs = cluster.get_hdfs() final_properties = properties.copy() final_properties.update({ 'jobTracker': 'jtaddress', 'nameNode': fs.fs_defaultfs }) submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=None) assert_equal(properties, submission.properties) submission._update_properties('jtaddress', 'deployment-directory') assert_equal(final_properties, submission.properties) finish.append( HDFS_CLUSTERS['default'].LOGICAL_NAME.set_for_testing( 'namenode')) finish.append(MR_CLUSTERS['default'].LOGICAL_NAME.set_for_testing( 'jobtracker')) clear_sys_caches() fs = cluster.get_hdfs() final_properties = properties.copy() final_properties.update({ 'jobTracker': 'jobtracker', 'nameNode': 'namenode' }) submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=None) assert_equal(properties, submission.properties) finally: clear_sys_caches() for reset in finish: reset()
def install_sample_user(): """ Setup the de-activated sample user with a certain id. Do not create a user profile. """ #Moved to avoid circular import with is_admin from desktop.models import SAMPLE_USER_ID, SAMPLE_USER_INSTALL user = None try: if User.objects.filter(id=SAMPLE_USER_ID).exists(): user = User.objects.get(id=SAMPLE_USER_ID) LOG.info('Sample user found with username "%s" and User ID: %s' % (user.username, user.id)) elif User.objects.filter(username=SAMPLE_USER_INSTALL).exists(): user = User.objects.get(username=SAMPLE_USER_INSTALL) LOG.info('Sample user found: %s' % user.username) else: user, created = User.objects.get_or_create( username=SAMPLE_USER_INSTALL, password='******', is_active=False, is_superuser=False, id=SAMPLE_USER_ID, pk=SAMPLE_USER_ID) if created: LOG.info('Installed a user called "%s"' % SAMPLE_USER_INSTALL) if user.username != SAMPLE_USER_INSTALL: LOG.warn( 'Sample user does not have username "%s", will attempt to modify the username.' % SAMPLE_USER_INSTALL) with transaction.atomic(): user = User.objects.get(id=SAMPLE_USER_ID) user.username = SAMPLE_USER_INSTALL user.save() except Exception as ex: LOG.exception('Failed to get or create sample user') # If sample user doesn't belong to default group, add to default group default_group = get_default_user_group() if user is not None and default_group is not None and default_group not in user.groups.all( ): user.groups.add(default_group) user.save() fs = cluster.get_hdfs() # If home directory doesn't exist for sample user, create it try: if not fs.do_as_user(SAMPLE_USER_INSTALL, fs.get_home_dir): fs.do_as_user(SAMPLE_USER_INSTALL, fs.create_home_dir) LOG.info('Created home directory for user: %s' % SAMPLE_USER_INSTALL) else: LOG.info('Home directory already exists for user: %s' % SAMPLE_USER_INSTALL) except Exception as ex: LOG.exception('Failed to create home directory for user %s: %s' % (SAMPLE_USER_INSTALL, str(ex))) return user
def handle_noargs(self, **options): self.user = install_sample_user() self.fs = cluster.get_hdfs() LOG.info( _("Creating sample directory '%s' in HDFS") % REMOTE_SAMPLE_DIR.get()) create_directories(self.fs, [REMOTE_SAMPLE_DIR.get()]) remote_dir = REMOTE_SAMPLE_DIR.get() # Copy examples binaries for name in os.listdir(LOCAL_SAMPLE_DIR.get()): local_dir = self.fs.join(LOCAL_SAMPLE_DIR.get(), name) remote_data_dir = self.fs.join(remote_dir, name) LOG.info( _('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir }) self.fs.do_as_user(self.fs.DEFAULT_USER, self.fs.copyFromLocal, local_dir, remote_data_dir) # Copy sample data local_dir = LOCAL_SAMPLE_DATA_DIR.get() remote_data_dir = self.fs.join(remote_dir, 'data') LOG.info( _('Copying data %(local_dir)s to %(remote_data_dir)s\n') % { 'local_dir': local_dir, 'remote_data_dir': remote_data_dir }) self.fs.do_as_user(self.fs.DEFAULT_USER, self.fs.copyFromLocal, local_dir, remote_data_dir) # Load jobs LOG.info(_("Installing examples...")) if ENABLE_V2.get(): management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2) # Get or create sample user directories home_dir = Directory.objects.get_home_directory(self.user) examples_dir, created = Directory.objects.get_or_create( parent_directory=home_dir, owner=self.user, name=Document2.EXAMPLES_DIR) # Share oozie examples with default group oozie_examples = Document2.objects.filter(type__in=[ 'oozie-workflow2', 'oozie-coordinator2', 'oozie-bundle2' ], owner=self.user, parent_directory=None) oozie_examples.update(parent_directory=examples_dir) examples_dir.share(self.user, Document2Permission.READ_PERM, groups=[get_default_user_group()]) self.install_examples() Document.objects.sync()
LOG.info("HuePermissions: %d added, %d updated, %d up to date, %d stale" % (len(added), updated, uptodate, available - len(added) - updated - uptodate)) models.signals.post_syncdb.connect(update_app_permissions) models.signals.post_syncdb.connect(get_default_user_group) def install_sample_user(): """ Setup the de-activated sample user with a certain id. Do not create a user profile. """ try: user = auth_models.User.objects.get(username=SAMPLE_USERNAME) except auth_models.User.DoesNotExist: try: user = auth_models.User.objects.create(username=SAMPLE_USERNAME, password='******', is_active=False, is_superuser=False, id=1100713, pk=1100713) LOG.info('Installed a user called "%s"' % (SAMPLE_USERNAME,)) except Exception, e: LOG.info('Sample user race condition: %s' % e) user = auth_models.User.objects.get(username=SAMPLE_USERNAME) LOG.info('Sample user race condition, got: %s' % user) fs = cluster.get_hdfs() fs.do_as_user(SAMPLE_USERNAME, fs.create_home_dir) return user
def install_sample_user(django_user=None): """ Setup the de-activated sample user with a certain id. Do not create a user profile. """ from desktop.models import SAMPLE_USER_ID, get_sample_user_install from hadoop import cluster user = None django_username = get_sample_user_install(django_user) if ENABLE_ORGANIZATIONS.get(): lookup = {'email': django_username} django_username_short = django_user.username_short else: lookup = {'username': django_username} django_username_short = django_username try: if User.objects.filter( id=SAMPLE_USER_ID).exists() and not ENABLE_ORGANIZATIONS.get(): user = User.objects.get(id=SAMPLE_USER_ID) LOG.info('Sample user found with username "%s" and User ID: %s' % (user.username, user.id)) elif User.objects.filter(**lookup).exists(): user = User.objects.get(**lookup) LOG.info('Sample user found: %s' % lookup) else: user_attributes = lookup.copy() if ENABLE_ORGANIZATIONS.get(): user_attributes['organization'] = get_organization( email=django_username) else: user_attributes['id'] = SAMPLE_USER_ID user_attributes.update({ 'password': '******', 'is_active': False, 'is_superuser': False, }) user, created = User.objects.get_or_create(**user_attributes) if created: LOG.info('Installed a user "%s"' % lookup) if user.username != django_username and not ENABLE_ORGANIZATIONS.get(): LOG.warn( 'Sample user does not have username "%s", will attempt to modify the username.' % django_username) with transaction.atomic(): user = User.objects.get(id=SAMPLE_USER_ID) user.username = django_username user.save() except: LOG.exception('Failed to get or create sample user') # If sample user doesn't belong to default group, add to default group default_group = get_default_user_group(user=user) if user is not None and default_group is not None and default_group not in user.groups.all( ): user.groups.add(default_group) user.save() # If home directory doesn't exist for sample user, create it fs = cluster.get_hdfs() try: if not fs: LOG.info( 'No fs configured, skipping home directory creation for user: %s' % django_username_short) elif not fs.do_as_user(django_username_short, fs.get_home_dir): fs.do_as_user(django_username_short, fs.create_home_dir) LOG.info('Created home directory for user: %s' % django_username_short) else: LOG.info('Home directory already exists for user: %s' % django_username) except Exception as ex: LOG.exception('Failed to create home directory for user %s: %s' % (django_username, str(ex))) return user
res = [] try: try: if not 'test' in sys.argv: # Avoid tests hanging server = dbms.get(user) server.get_databases() except StructuredThriftTransportException, e: if 'Error validating the login' in str(e): msg = 'Failed to authenticate to HiveServer2, check authentication configurations.' LOG.exception(msg) res.append((NICE_NAME, _(msg))) else: raise e except Exception, e: msg = "The application won't work without a running HiveServer2." LOG.exception(msg) res.append((NICE_NAME, _(msg))) try: from hadoop import cluster warehouse = beeswax.hive_site.get_metastore_warehouse_dir() fs = cluster.get_hdfs() fs.stats(warehouse) except Exception: msg = 'Failed to access Hive warehouse: %s' LOG.exception(msg % warehouse) return [(NICE_NAME, _(msg) % warehouse)] return res