Exemple #1
0
    def test_convert_pig_script(self):
        attrs = {
            "user": self.user,
            "id": 1000,
            "name": "Test",
            "script": 'A = LOAD "$data"; STORE A INTO "$output";',
            "parameters": [],
            "resources": [],
            "hadoopProperties": [],
        }
        pig_script = create_or_update_script(**attrs)

        try:
            # Test that corresponding doc2 is created after convert
            assert_false(Document2.objects.filter(owner=self.user, type="link-pigscript").exists())

            converter = DocumentConverter(self.user)
            converter.convert()

            doc2 = Document2.objects.get(owner=self.user, type="link-pigscript")

            # Verify absolute_url
            response = self.client.get(doc2.get_absolute_url())
            assert_equal(200, response.status_code)
        finally:
            pig_script.delete()
Exemple #2
0
    def test_convert_hive_query_with_invalid_name(self):
        sql = "SELECT * FROM sample_07"
        settings = [
            {"key": "hive.exec.scratchdir", "value": "/tmp/mydir"},
            {"key": "hive.querylog.location", "value": "/tmp/doc2"},
        ]
        file_resources = [{"type": "jar", "path": "/tmp/doc2/test.jar"}]
        functions = [{"name": "myUpper", "class_name": "org.hue.udf.MyUpper"}]
        design = hql_query(sql, database="etl", settings=settings, file_resources=file_resources, functions=functions)

        query = SavedQuery.objects.create(
            type=SavedQuery.TYPES_MAPPING["hql"],
            owner=self.user,
            data=design.dumps(),
            name="Test / Hive query",
            desc="Test Hive query",
        )
        doc = Document.objects.link(query, owner=query.owner, extra=query.type, name=query.name, description=query.desc)

        try:
            # Test that corresponding doc2 is created after convert
            assert_equal(0, Document2.objects.filter(owner=self.user, type="query-hive").count())

            converter = DocumentConverter(self.user)
            converter.convert()

            assert_equal(1, Document2.objects.filter(owner=self.user, type="query-hive").count())

            doc2 = Document2.objects.get(owner=self.user, type="query-hive", is_history=False)
            # Verify Document2 name is stripped of invalid chars
            assert_equal("Test  Hive query", doc2.data_dict["name"])
        finally:
            query.delete()
Exemple #3
0
  def test_import_project(self):
    # Test that when importing a Document that is tagged with a project, we create a directory with that tag name and
    # place the document within it
    sql = 'SELECT * FROM sample_07'
    design = hql_query(sql)

    query = SavedQuery.objects.create(
        type=SavedQuery.TYPES_MAPPING['impala'],
        owner=self.user,
        data=design.dumps(),
        name='Impala query',
        desc='Test Impala query'
    )
    doc = Document.objects.link(query, owner=query.owner, extra=query.type, name=query.name, description=query.desc)
    default_tag = DocumentTag.objects.get_default_tag(self.user)
    custom_tag = DocumentTag.objects.create_tag(self.user, 'sample_07')
    doc.add_tag(default_tag)
    doc.add_tag(custom_tag)

    try:
      converter = DocumentConverter(self.user)
      converter.convert()

      # Should have a directory named after custom tag
      assert_true(Directory.objects.filter(owner=self.user, name=custom_tag.tag, parent_directory=self.home_dir).exists())

      # But ignore reserved tags (default)
      assert_false(Directory.objects.filter(owner=self.user, name=default_tag.tag, parent_directory=self.home_dir).exists())

      # Document should exist under custom directory
      project_dir = Directory.objects.get(owner=self.user, name=custom_tag.tag, parent_directory=self.home_dir)
      assert_true(Document2.objects.filter(owner=self.user, name='Impala query', parent_directory=project_dir).exists())
    finally:
      query.delete()
Exemple #4
0
  def test_convert_pig_script(self):
    attrs = {
      'user': self.user,
      'id': 1000,
      'name': 'Test',
      'script': 'A = LOAD "$data"; STORE A INTO "$output";',
      'parameters': [],
      'resources': [],
      'hadoopProperties': []
    }
    pig_script = create_or_update_script(**attrs)

    try:
      # Test that corresponding doc2 is created after convert
      assert_false(Document2.objects.filter(owner=self.user, type='link-pigscript').exists())

      converter = DocumentConverter(self.user)
      converter.convert()

      doc2 = Document2.objects.get(owner=self.user, type='link-pigscript')

      # Verify absolute_url
      response = self.client.get(doc2.get_absolute_url())
      assert_equal(200, response.status_code)
    finally:
      pig_script.delete()
Exemple #5
0
  def test_convert_mapreduce(self):
    wf = Workflow.objects.new_workflow(self.user)
    wf.save()
    Workflow.objects.initialize(wf)
    Link.objects.filter(parent__workflow=wf).delete()
    action = add_node(wf, 'action-name-1', 'mapreduce', [wf.start], {
      'description': 'Test MR job design',
      'files': '[]',
      'jar_path': '/user/hue/oozie/examples/lib/hadoop-examples.jar',
      'job_properties': '[{"name": "sleep.job.map.sleep.time", "value": "5"}, {"name": "sleep.job.reduce.sleep.time", "value": "10"}]',
      'prepares': '[{"value":"${output}","type":"delete"},{"value":"/test","type":"mkdir"}]',
      'archives': '[]',
    })
    Link(parent=action, child=wf.end, name="ok").save()

    # Setting doc.last_modified to older date
    doc = Document.objects.get(id=wf.doc.get().id)
    Document.objects.filter(id=doc.id).update(last_modified=datetime.strptime('2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ'))
    doc = Document.objects.get(id=doc.id)

    try:
      # Test that corresponding doc2 is created after convert
      assert_false(Document2.objects.filter(owner=self.user, type='query-mapreduce').exists())

      converter = DocumentConverter(self.user)
      converter.convert()

      doc2 = Document2.objects.get(owner=self.user, type='query-mapreduce')

      # Verify snippet values
      assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
      assert_equal('/user/hue/oozie/examples/lib/hadoop-examples.jar', doc2.data_dict['snippets'][0]['properties']['app_jar'])
      assert_equal(['sleep.job.map.sleep.time=5', 'sleep.job.reduce.sleep.time=10'], doc2.data_dict['snippets'][0]['properties']['hadoopProperties'])
    finally:
      wf.delete()
Exemple #6
0
    def handle_noargs(self, **options):
        print 'Starting document conversions...\n'
        try:
            with transaction.atomic():
                users = User.objects.all()
                logging.info("Starting document conversions for %d users" %
                             len(users))
                for index, user in enumerate(users):
                    logging.info(
                        "Starting document conversion for user %d: %s" %
                        (index, user.username))

                    start_time = time.time()
                    converter = DocumentConverter(user)
                    converter.convert()
                    logging.info(
                        "Document conversions for user:%s took %.3f seconds" %
                        (user.username, time.time() - start_time))

                    if converter.failed_docs:
                        print >> sys.stderr, 'Failed to import %d document(s) for user: %s - %s' % (
                            len(converter.failed_docs), user.username,
                            ([doc.id for doc in converter.failed_docs]))
        except Exception, e:
            logging.exception("Failed to execute the document conversions.")
Exemple #7
0
    def test_convert_pig_script(self):
        attrs = {
            'user': self.user,
            'id': 1000,
            'name': 'Test',
            'script': 'A = LOAD "$data"; STORE A INTO "$output";',
            'parameters': [],
            'resources': [],
            'hadoopProperties': []
        }
        pig_script = create_or_update_script(**attrs)

        try:
            # Test that corresponding doc2 is created after convert
            assert_false(
                Document2.objects.filter(owner=self.user,
                                         type='link-pigscript').exists())

            converter = DocumentConverter(self.user)
            converter.convert()

            doc2 = Document2.objects.get(owner=self.user,
                                         type='link-pigscript')

            # Verify absolute_url
            response = self.client.get(doc2.get_absolute_url())
            assert_equal(200, response.status_code)
        finally:
            pig_script.delete()
Exemple #8
0
    def handle(self, *args, **options):
        print('Starting document conversions...\n')
        try:
            with transaction.atomic():
                users = User.objects.all()
                logging.info("Starting document conversions for %d users" %
                             len(users))
                for index, user in enumerate(users):
                    logging.info(
                        "Starting document conversion for user %d: %s" %
                        (index, user.username))

                    start_time = time.time()
                    converter = DocumentConverter(user)
                    converter.convert()
                    logging.info(
                        "Document conversions for user:%s took %.3f seconds" %
                        (user.username, time.time() - start_time))

                    if converter.failed_doc_ids:
                        print(
                            'Failed to import %d document(s) for user: %s - %s'
                            % (len(converter.failed_doc_ids), user.username,
                               converter.failed_doc_ids),
                            file=sys.stderr)
        except Exception as e:
            logging.exception("Failed to execute the document conversions.")

        print('Finished running document conversions.\n')
Exemple #9
0
    def test_convert_rdbms_query(self):
        sql = 'SELECT * FROM auth_user'
        data = {
            'query': {
                'query': sql,
                'server': 'sqlite',
                'type': 'rdbms',
                'database': 'desktop/desktop.db'
            },
            'VERSION': '0.0.1'
        }
        data_json = json.dumps(data)
        design = SQLdesign.loads(data_json)
        query = SavedQuery.objects.create(
            type=SavedQuery.TYPES_MAPPING['rdbms'],
            owner=self.user,
            data=design.dumps(),
            name='SQLite query',
            desc='Test SQLite query')
        doc = Document.objects.link(query,
                                    owner=query.owner,
                                    extra=query.type,
                                    name=query.name,
                                    description=query.desc)

        # Setting doc.last_modified to older date
        Document.objects.filter(
            id=doc.id).update(last_modified=datetime.strptime(
                '2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ'))
        doc = Document.objects.get(id=doc.id)

        try:
            # Test that corresponding doc2 is created after convert
            assert_false(
                Document2.objects.filter(owner=self.user,
                                         type='query-sqlite').exists())

            converter = DocumentConverter(self.user)
            converter.convert()

            doc2 = Document2.objects.get(owner=self.user, type='query-sqlite')

            # Verify Document2 attributes
            assert_equal(doc.name, doc2.data_dict['name'])
            assert_equal(doc.description, doc2.data_dict['description'])
            assert_equal(doc.last_modified.strftime('%Y-%m-%dT%H:%M:%S'),
                         doc2.last_modified.strftime('%Y-%m-%dT%H:%M:%S'))

            # Verify session type
            assert_equal('sqlite', doc2.data_dict['sessions'][0]['type'])

            # Verify snippet values
            assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
            assert_equal(sql, doc2.data_dict['snippets'][0]['statement'])
            assert_equal(sql, doc2.data_dict['snippets'][0]['statement_raw'])
        finally:
            query.delete()
Exemple #10
0
    def test_convert_impala_query(self):
        sql = 'SELECT * FROM sample_07'
        settings = [{
            'key': 'EXPLAIN_LEVEL',
            'value': '2'
        }, {
            'key': 'ABORT_ON_ERROR',
            'value': '1'
        }]
        design = hql_query(sql, database='etl', settings=settings)

        query = SavedQuery.objects.create(
            type=SavedQuery.TYPES_MAPPING['impala'],
            owner=self.user,
            data=design.dumps(),
            name='Impala query',
            desc='Test Impala query')
        doc = Document.objects.link(query,
                                    owner=query.owner,
                                    extra=query.type,
                                    name=query.name,
                                    description=query.desc)

        try:
            # Test that corresponding doc2 is created after convert
            assert_false(
                Document2.objects.filter(owner=self.user,
                                         type='query-impala').exists())

            converter = DocumentConverter(self.user)
            converter.convert()

            doc2 = Document2.objects.get(owner=self.user, type='query-impala')

            # Verify Document2 attributes
            assert_equal(doc.name, doc2.data_dict['name'])
            assert_equal(doc.description, doc2.data_dict['description'])

            # Verify session type
            assert_equal('impala', doc2.data_dict['sessions'][0]['type'])

            # Verify snippet values
            assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
            assert_equal(sql, doc2.data_dict['snippets'][0]['statement'])
            assert_equal(sql, doc2.data_dict['snippets'][0]['statement_raw'])
            assert_equal('etl', doc2.data_dict['snippets'][0]['database'])

            # Verify snippet properties
            assert_equal(
                settings,
                doc2.data_dict['snippets'][0]['properties']['settings'])

            # Verify default properties
            assert_true(doc2.data_dict['isSaved'])
        finally:
            query.delete()
Exemple #11
0
    def test_convert_hive_query_with_special_chars(self):
        sql = 'SELECT * FROM sample_07'
        settings = [{
            'key': 'hive.exec.scratchdir',
            'value': '/tmp/mydir'
        }, {
            'key': 'hive.querylog.location',
            'value': '/tmp/doc2'
        }]
        file_resources = [{'type': 'jar', 'path': '/tmp/doc2/test.jar'}]
        functions = [{'name': 'myUpper', 'class_name': 'org.hue.udf.MyUpper'}]
        design = hql_query(sql,
                           database='etl',
                           settings=settings,
                           file_resources=file_resources,
                           functions=functions)

        query = SavedQuery.objects.create(type=SavedQuery.TYPES_MAPPING['hql'],
                                          owner=self.user,
                                          data=design.dumps(),
                                          name='Test / Hive query',
                                          desc='Test Hive query')
        doc = Document.objects.link(query,
                                    owner=query.owner,
                                    extra=query.type,
                                    name=query.name,
                                    description=query.desc)

        try:
            # Test that corresponding doc2 is created after convert
            assert_equal(
                0,
                Document2.objects.filter(owner=self.user,
                                         type='query-hive').count())

            converter = DocumentConverter(self.user)
            converter.convert()

            assert_equal(
                1,
                Document2.objects.filter(owner=self.user,
                                         type='query-hive').count())

            doc2 = Document2.objects.get(owner=self.user,
                                         type='query-hive',
                                         is_history=False)

            # Verify name is maintained
            assert_equal('Test / Hive query', doc2.name)

            # Verify Document2 path is stripped of invalid chars
            assert_equal('/Test%20/%20Hive%20query', doc2.path)
        finally:
            query.delete()
Exemple #12
0
  def test_convert_impala_query(self):
    sql = 'SELECT * FROM sample_07'
    settings = [
        {'key': 'EXPLAIN_LEVEL', 'value': '2'},
        {'key': 'ABORT_ON_ERROR', 'value': '1'}
    ]
    design = hql_query(sql, database='etl', settings=settings)

    query = SavedQuery.objects.create(
        type=SavedQuery.TYPES_MAPPING['impala'],
        owner=self.user,
        data=design.dumps(),
        name='Impala query',
        desc='Test Impala query'
    )
    doc = Document.objects.link(query, owner=query.owner, extra=query.type, name=query.name, description=query.desc)

    # Setting doc.last_modified to older date
    Document.objects.filter(id=doc.id).update(last_modified=datetime.strptime('2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ'))
    doc = Document.objects.get(id=doc.id)

    try:
      # Test that corresponding doc2 is created after convert
      assert_false(Document2.objects.filter(owner=self.user, type='query-impala').exists())

      converter = DocumentConverter(self.user)
      converter.convert()

      doc2 = Document2.objects.get(owner=self.user, type='query-impala')

      # Verify Document2 attributes
      assert_equal(doc.name, doc2.data_dict['name'])
      assert_equal(doc.description, doc2.data_dict['description'])
      assert_equal(doc.last_modified.strftime('%Y-%m-%dT%H:%M:%S'), doc2.last_modified.strftime('%Y-%m-%dT%H:%M:%S'))

      # Verify session type
      assert_equal('impala', doc2.data_dict['sessions'][0]['type'])

      # Verify snippet values
      assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
      assert_equal(sql, doc2.data_dict['snippets'][0]['statement'])
      assert_equal(sql, doc2.data_dict['snippets'][0]['statement_raw'])
      assert_equal('etl', doc2.data_dict['snippets'][0]['database'])

      # Verify snippet properties
      assert_equal(settings, doc2.data_dict['snippets'][0]['properties']['settings'])

      # Verify default properties
      assert_true(doc2.data_dict['isSaved'])
    finally:
      query.delete()
Exemple #13
0
  def test_convert_hive_query(self):
    sql = 'SELECT * FROM sample_07'
    settings = [
        {'key': 'hive.exec.scratchdir', 'value': '/tmp/mydir'},
        {'key': 'hive.querylog.location', 'value': '/tmp/doc2'}
    ]
    file_resources = [{'type': 'jar', 'path': '/tmp/doc2/test.jar'}]
    functions = [{'name': 'myUpper', 'class_name': 'org.hue.udf.MyUpper'}]
    design = hql_query(sql, database='etl', settings=settings, file_resources=file_resources, functions=functions)

    query = SavedQuery.objects.create(
        type=SavedQuery.TYPES_MAPPING['hql'],
        owner=self.user,
        data=design.dumps(),
        name='Hive query',
        desc='Test Hive query'
    )
    doc = Document.objects.link(query, owner=query.owner, extra=query.type, name=query.name, description=query.desc)

    try:
      # Test that corresponding doc2 is created after convert
      assert_false(Document2.objects.filter(owner=self.user, type='query-hive').exists())

      converter = DocumentConverter(self.user)
      converter.convert()

      doc2 = Document2.objects.get(owner=self.user, type='query-hive')

      # Verify Document2 attributes
      assert_equal(doc.name, doc2.data_dict['name'])
      assert_equal(doc.description, doc2.data_dict['description'])

      # Verify session type
      assert_equal('hive', doc2.data_dict['sessions'][0]['type'])

      # Verify snippet values
      assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
      assert_equal(sql, doc2.data_dict['snippets'][0]['statement'])
      assert_equal(sql, doc2.data_dict['snippets'][0]['statement_raw'])
      assert_equal('etl', doc2.data_dict['snippets'][0]['database'])

      # Verify snippet properties
      assert_equal(settings, doc2.data_dict['snippets'][0]['properties']['settings'])
      assert_equal(file_resources, doc2.data_dict['snippets'][0]['properties']['files'])
      assert_equal(functions, doc2.data_dict['snippets'][0]['properties']['functions'])

      # Verify default properties
      assert_true(doc2.data_dict['isSaved'])
    finally:
      query.delete()
Exemple #14
0
  def test_convert_pig_script(self):
    attrs = {
      'user': self.user,
      'id': 1000,
      'name': 'Test',
      'script': 'A = LOAD "$data"; STORE A INTO "$output";',
      'hadoopProperties': [
        {u'name': u'mapred.job.queue.name', u'value': u'pig'},
        {u'name': u'mapreduce.task.profile', u'value': u'true'}
      ],
      'parameters': [
        {u'name': u'input', u'value': u'/user/test/data'},
        {u'name': u'verbose', u'value': u'true'}
      ],
      'resources': [
        {u'type': u'file', u'value': u'/user/test/test.txt'},
        {u'type': u'archive', u'value': u'/user/test/test.jar'}
      ],
    }
    pig_script = create_or_update_script(**attrs)
    pig_script.save()

    # Setting doc.last_modified to older date
    doc = Document.objects.get(id=pig_script.doc.get().id)
    Document.objects.filter(id=doc.id).update(last_modified=datetime.strptime('2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ'))
    doc = Document.objects.get(id=doc.id)

    try:
      if IS_HUE_4.get():
        # Test that corresponding doc2 is created after convert
        assert_false(Document2.objects.filter(owner=self.user, type='query-pig').exists())

        converter = DocumentConverter(self.user)
        converter.convert()

        doc2 = Document2.objects.get(owner=self.user, type='query-pig')

        # Verify snippet values
        assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
        assert_equal(attrs['script'], doc2.data_dict['snippets'][0]['statement'], doc2.data_dict)
        assert_equal(attrs['script'], doc2.data_dict['snippets'][0]['statement_raw'])
        assert_equal(['mapred.job.queue.name=pig', 'mapreduce.task.profile=true'], doc2.data_dict['snippets'][0]['properties']['hadoopProperties'])
        assert_equal(['input=/user/test/data', 'verbose=true'], doc2.data_dict['snippets'][0]['properties']['parameters'])
        assert_equal(['/user/test/test.txt', '/user/test/test.jar'], doc2.data_dict['snippets'][0]['properties']['resources'])
      else:
        # Test that corresponding doc2 is created after convert
        assert_false(Document2.objects.filter(owner=self.user, type='link-pigscript').exists())

        converter = DocumentConverter(self.user)
        converter.convert()

        doc2 = Document2.objects.get(owner=self.user, type='link-pigscript')

        # Verify absolute_url
        response = self.client.get(doc2.get_absolute_url())
        assert_equal(200, response.status_code)
        assert_equal(doc.last_modified.strftime('%Y-%m-%dT%H:%M:%S'), doc2.last_modified.strftime('%Y-%m-%dT%H:%M:%S'))
    finally:
      pig_script.delete()
Exemple #15
0
  def test_convert_rdbms_query(self):
    sql = 'SELECT * FROM auth_user'
    data = {
        'query': {
            'query': sql,
            'server': 'sqlite',
            'type': 'rdbms',
            'database': 'desktop/desktop.db'
        },
        'VERSION': '0.0.1'
    }
    data_json = json.dumps(data)
    design = SQLdesign.loads(data_json)
    query = SavedQuery.objects.create(
        type=SavedQuery.TYPES_MAPPING['rdbms'],
        owner=self.user,
        data=design.dumps(),
        name='SQLite query',
        desc='Test SQLite query'
    )
    doc = Document.objects.link(query, owner=query.owner, extra=query.type, name=query.name, description=query.desc)

    # Setting doc.last_modified to older date
    Document.objects.filter(id=doc.id).update(last_modified=datetime.strptime('2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ'))
    doc = Document.objects.get(id=doc.id)

    try:
      # Test that corresponding doc2 is created after convert
      assert_false(Document2.objects.filter(owner=self.user, type='query-sqlite').exists())

      converter = DocumentConverter(self.user)
      converter.convert()

      doc2 = Document2.objects.get(owner=self.user, type='query-sqlite')

      # Verify Document2 attributes
      assert_equal(doc.name, doc2.data_dict['name'])
      assert_equal(doc.description, doc2.data_dict['description'])
      assert_equal(doc.last_modified.strftime('%Y-%m-%dT%H:%M:%S'), doc2.last_modified.strftime('%Y-%m-%dT%H:%M:%S'))

      # Verify session type
      assert_equal('sqlite', doc2.data_dict['sessions'][0]['type'])

      # Verify snippet values
      assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
      assert_equal(sql, doc2.data_dict['snippets'][0]['statement'])
      assert_equal(sql, doc2.data_dict['snippets'][0]['statement_raw'])
    finally:
      query.delete()
Exemple #16
0
  def handle_noargs(self, **options):
    self.stdout.write('Starting document conversions...\n')
    try:
      with transaction.atomic():
        users = User.objects.all()
        logging.info("Starting document conversions for %d users" % len(users))
        for index, user in enumerate(users):
          logging.info("Starting document conversion for user %d: %s" % (index, user.username))

          start_time = time.time()
          converter = DocumentConverter(user)
          converter.convert()
          logging.info("Document conversions for user:%s took %.3f seconds" % (user.username, time.time() - start_time))
    except Exception, e:
      logging.exception("Failed to execute the document conversions.")
Exemple #17
0
  def test_convert_java(self):
    wf = Workflow.objects.new_workflow(self.user)
    wf.save()
    Workflow.objects.initialize(wf)
    Link.objects.filter(parent__workflow=wf).delete()
    action = add_node(wf, 'action-name-1', 'java', [wf.start], {
      'name': 'MyTeragen',
      "description": "Generate N number of records",
      "main_class": "org.apache.hadoop.examples.terasort.TeraGen",
      "args": "1000 ${output_dir}/teragen",
      "files": '["my_file","my_file2"]',
      "job_xml": "",
      "java_opts": "-Dexample-property=natty",
      "jar_path": "/user/hue/oozie/workspaces/lib/hadoop-examples.jar",
      'job_properties': '[{"name": "mapred.job.queue.name", "value": "test"}]',
      "prepares": '[{"value":"/test","type":"mkdir"}]',
      "archives": '[{"dummy":"","name":"my_archive"},{"dummy":"","name":"my_archive2"}]',
      "capture_output": True,
    })
    Link(parent=action, child=wf.end, name="ok").save()

    # Setting doc.last_modified to older date
    doc = Document.objects.get(id=wf.doc.get().id)
    Document.objects.filter(id=doc.id).update(
      last_modified=datetime.strptime('2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ'))
    doc = Document.objects.get(id=doc.id)

    try:
      # Test that corresponding doc2 is created after convert
      assert_false(Document2.objects.filter(owner=self.user, type='query-java').exists())

      converter = DocumentConverter(self.user)
      converter.convert()

      doc2 = Document2.objects.get(owner=self.user, type='query-java')

      # Verify snippet values
      assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
      assert_equal('/user/hue/oozie/workspaces/lib/hadoop-examples.jar', doc2.data_dict['snippets'][0]['properties']['app_jar'])
      assert_equal('org.apache.hadoop.examples.terasort.TeraGen', doc2.data_dict['snippets'][0]['properties']['class'])
      assert_equal('1000 ${output_dir}/teragen', doc2.data_dict['snippets'][0]['properties']['args'])
      assert_equal('-Dexample-property=natty', doc2.data_dict['snippets'][0]['properties']['java_opts'])
      assert_equal(['mapred.job.queue.name=test'], doc2.data_dict['snippets'][0]['properties']['hadoopProperties'])
      assert_equal(['my_archive', 'my_archive2'], doc2.data_dict['snippets'][0]['properties']['archives'])
      assert_equal([{'type': 'file', 'path': 'my_file'}, {'type': 'file', 'path': 'my_file2'}], doc2.data_dict['snippets'][0]['properties']['files'])
      assert_equal(True, doc2.data_dict['snippets'][0]['properties']['capture_output'])
    finally:
      wf.delete()
Exemple #18
0
  def test_convert_shell(self):
    wf = Workflow.objects.new_workflow(self.user)
    wf.save()
    Workflow.objects.initialize(wf)
    Link.objects.filter(parent__workflow=wf).delete()
    action = add_node(wf, 'action-name-1', 'shell', [wf.start], {
      u'job_xml': 'my-job.xml',
      u'files': '["hello.py"]',
      u'name': 'Shell',
      u'job_properties': '[{"name": "mapred.job.queue.name", "value": "test"}]',
      u'capture_output': 'on',
      u'command': 'hello.py',
      u'archives': '[{"dummy": "", "name": "test.zip"}]',
      u'prepares': '[]',
      u'params': '[{"type": "argument", "value": "baz"}, {"type": "env-var", "value": "foo=bar"}]',
      u'description': 'Execute a Python script printing its arguments'
    })
    Link(parent=action, child=wf.end, name="ok").save()

    # Setting doc.last_modified to older date
    doc = Document.objects.get(id=wf.doc.get().id)
    Document.objects.filter(id=doc.id).update(last_modified=datetime.strptime('2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ'))
    doc = Document.objects.get(id=doc.id)

    try:
      if IS_HUE_4.get():
        # Test that corresponding doc2 is created after convert
        assert_false(Document2.objects.filter(owner=self.user, type='query-shell').exists())

        converter = DocumentConverter(self.user)
        converter.convert()

        doc2 = Document2.objects.get(owner=self.user, type='query-shell')

        # Verify snippet values
        assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
        assert_equal('hello.py', doc2.data_dict['snippets'][0]['properties']['command_path'])
        assert_equal(['baz'], doc2.data_dict['snippets'][0]['properties']['arguments'])
        assert_equal(['foo=bar'], doc2.data_dict['snippets'][0]['properties']['env_var'])
        assert_equal(['mapred.job.queue.name=test'], doc2.data_dict['snippets'][0]['properties']['hadoopProperties'])
        assert_equal(['test.zip'], doc2.data_dict['snippets'][0]['properties']['archives'])
        assert_equal([{'type': 'file', 'path': 'hello.py'}], doc2.data_dict['snippets'][0]['properties']['files'])
        assert_equal(True, doc2.data_dict['snippets'][0]['properties']['capture_output'])
      else:
        # Test that corresponding doc2 is created after convert
        assert_false(Document2.objects.filter(owner=self.user, type='link-workflow').exists())

        converter = DocumentConverter(self.user)
        converter.convert()

        doc2 = Document2.objects.get(owner=self.user, type='link-workflow')

        # Verify absolute_url
        response = self.client.get(doc2.get_absolute_url())
        assert_equal(200, response.status_code)
        assert_equal(doc.last_modified.strftime('%Y-%m-%dT%H:%M:%S'), doc2.last_modified.strftime('%Y-%m-%dT%H:%M:%S'))
    finally:
      wf.delete()
Exemple #19
0
    def test_convert_mapreduce(self):
        wf = Workflow.objects.new_workflow(self.user)
        wf.save()
        Workflow.objects.initialize(wf)
        Link.objects.filter(parent__workflow=wf).delete()
        action = add_node(
            wf, 'action-name-1', 'mapreduce', [wf.start], {
                'description': 'Test MR job design',
                'files': '[]',
                'jar_path': '/user/hue/oozie/examples/lib/hadoop-examples.jar',
                'job_properties':
                '[{"name": "sleep.job.map.sleep.time", "value": "5"}, {"name": "sleep.job.reduce.sleep.time", "value": "10"}]',
                'prepares':
                '[{"value":"${output}","type":"delete"},{"value":"/test","type":"mkdir"}]',
                'archives': '[]',
            })
        Link(parent=action, child=wf.end, name="ok").save()

        # Setting doc.last_modified to older date
        doc = Document.objects.get(id=wf.doc.get().id)
        Document.objects.filter(
            id=doc.id).update(last_modified=datetime.strptime(
                '2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ'))
        doc = Document.objects.get(id=doc.id)

        try:
            # Test that corresponding doc2 is created after convert
            assert_false(
                Document2.objects.filter(owner=self.user,
                                         type='query-mapreduce').exists())

            converter = DocumentConverter(self.user)
            converter.convert()

            doc2 = Document2.objects.get(owner=self.user,
                                         type='query-mapreduce')

            # Verify snippet values
            assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
            assert_equal(
                '/user/hue/oozie/examples/lib/hadoop-examples.jar',
                doc2.data_dict['snippets'][0]['properties']['app_jar'])
            assert_equal([
                'sleep.job.map.sleep.time=5', 'sleep.job.reduce.sleep.time=10'
            ], doc2.data_dict['snippets'][0]['properties']['hadoopProperties'])
        finally:
            wf.delete()
Exemple #20
0
  def test_convert_pig_script(self):
    attrs = {
      'user': self.user,
      'id': 1000,
      'name': 'Test',
      'script': 'A = LOAD "$data"; STORE A INTO "$output";',
      'hadoopProperties': [
        {u'name': u'mapred.job.queue.name', u'value': u'pig'},
        {u'name': u'mapreduce.task.profile', u'value': u'true'}
      ],
      'parameters': [
        {u'name': u'input', u'value': u'/user/test/data'},
        {u'name': u'verbose', u'value': u'true'}
      ],
      'resources': [
        {u'type': u'file', u'value': u'/user/test/test.txt'},
        {u'type': u'archive', u'value': u'/user/test/test.jar'}
      ],
    }
    pig_script = create_or_update_script(**attrs)
    pig_script.save()

    # Setting doc.last_modified to older date
    doc = Document.objects.get(id=pig_script.doc.get().id)
    Document.objects.filter(id=doc.id).update(last_modified=datetime.strptime('2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ'))
    doc = Document.objects.get(id=doc.id)

    try:
      # Test that corresponding doc2 is created after convert
      assert_false(Document2.objects.filter(owner=self.user, type='query-pig').exists())

      converter = DocumentConverter(self.user)
      converter.convert()

      doc2 = Document2.objects.get(owner=self.user, type='query-pig')

      # Verify snippet values
      assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
      assert_equal(attrs['script'], doc2.data_dict['snippets'][0]['statement'], doc2.data_dict)
      assert_equal(attrs['script'], doc2.data_dict['snippets'][0]['statement_raw'])
      assert_equal(['mapred.job.queue.name=pig', 'mapreduce.task.profile=true'], doc2.data_dict['snippets'][0]['properties']['hadoopProperties'])
      assert_equal(['input=/user/test/data', 'verbose=true'], doc2.data_dict['snippets'][0]['properties']['parameters'])
      assert_equal(['/user/test/test.txt', '/user/test/test.jar'], doc2.data_dict['snippets'][0]['properties']['resources'])
    finally:
      pig_script.delete()
Exemple #21
0
  def handle(self, *args, **options):
    print 'Starting document conversions...\n'
    try:
      with transaction.atomic():
        users = User.objects.all()
        logging.info("Starting document conversions for %d users" % len(users))
        for index, user in enumerate(users):
          logging.info("Starting document conversion for user %d: %s" % (index, user.username))

          start_time = time.time()
          converter = DocumentConverter(user)
          converter.convert()
          logging.info("Document conversions for user:%s took %.3f seconds" % (user.username, time.time() - start_time))

          if converter.failed_doc_ids:
            print >> sys.stderr, 'Failed to import %d document(s) for user: %s - %s' % (len(converter.failed_doc_ids), user.username, converter.failed_doc_ids)
    except Exception, e:
      logging.exception("Failed to execute the document conversions.")
Exemple #22
0
  def test_convert_shell(self):
    wf = Workflow.objects.new_workflow(self.user)
    wf.save()
    Workflow.objects.initialize(wf)
    Link.objects.filter(parent__workflow=wf).delete()
    action = add_node(wf, 'action-name-1', 'shell', [wf.start], {
      u'job_xml': 'my-job.xml',
      u'files': '["hello.py"]',
      u'name': 'Shell',
      u'job_properties': '[{"name": "mapred.job.queue.name", "value": "test"}]',
      u'capture_output': True,
      u'command': 'hello.py',
      u'archives': '[{"dummy": "", "name": "test.zip"}]',
      u'prepares': '[]',
      u'params': '[{"type": "argument", "value": "baz"}, {"type": "env-var", "value": "foo=bar"}]',
      u'description': 'Execute a Python script printing its arguments'
    })
    Link(parent=action, child=wf.end, name="ok").save()

    # Setting doc.last_modified to older date
    doc = Document.objects.get(id=wf.doc.get().id)
    Document.objects.filter(id=doc.id).update(last_modified=datetime.strptime('2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ'))
    doc = Document.objects.get(id=doc.id)

    try:
      # Test that corresponding doc2 is created after convert
      assert_false(Document2.objects.filter(owner=self.user, type='query-shell').exists())

      converter = DocumentConverter(self.user)
      converter.convert()

      doc2 = Document2.objects.get(owner=self.user, type='query-shell')

      # Verify snippet values
      assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
      assert_equal('hello.py', doc2.data_dict['snippets'][0]['properties']['command_path'])
      assert_equal(['baz'], doc2.data_dict['snippets'][0]['properties']['arguments'])
      assert_equal(['foo=bar'], doc2.data_dict['snippets'][0]['properties']['env_var'])
      assert_equal(['mapred.job.queue.name=test'], doc2.data_dict['snippets'][0]['properties']['hadoopProperties'])
      assert_equal(['test.zip'], doc2.data_dict['snippets'][0]['properties']['archives'])
      assert_equal([{'type': 'file', 'path': 'hello.py'}], doc2.data_dict['snippets'][0]['properties']['files'])
      assert_equal(True, doc2.data_dict['snippets'][0]['properties']['capture_output'])
    finally:
      wf.delete()
Exemple #23
0
    def test_convert_impala_query(self):
        sql = "SELECT * FROM sample_07"
        settings = [{"key": "EXPLAIN_LEVEL", "value": "2"}, {"key": "ABORT_ON_ERROR", "value": "1"}]
        design = hql_query(sql, database="etl", settings=settings)

        query = SavedQuery.objects.create(
            type=SavedQuery.TYPES_MAPPING["impala"],
            owner=self.user,
            data=design.dumps(),
            name="Impala query",
            desc="Test Impala query",
        )
        doc = Document.objects.link(query, owner=query.owner, extra=query.type, name=query.name, description=query.desc)

        try:
            # Test that corresponding doc2 is created after convert
            assert_false(Document2.objects.filter(owner=self.user, type="query-impala").exists())

            converter = DocumentConverter(self.user)
            converter.convert()

            doc2 = Document2.objects.get(owner=self.user, type="query-impala")

            # Verify Document2 attributes
            assert_equal(doc.name, doc2.data_dict["name"])
            assert_equal(doc.description, doc2.data_dict["description"])

            # Verify session type
            assert_equal("impala", doc2.data_dict["sessions"][0]["type"])

            # Verify snippet values
            assert_equal("ready", doc2.data_dict["snippets"][0]["status"])
            assert_equal(sql, doc2.data_dict["snippets"][0]["statement"])
            assert_equal(sql, doc2.data_dict["snippets"][0]["statement_raw"])
            assert_equal("etl", doc2.data_dict["snippets"][0]["database"])

            # Verify snippet properties
            assert_equal(settings, doc2.data_dict["snippets"][0]["properties"]["settings"])

            # Verify default properties
            assert_true(doc2.data_dict["isSaved"])
        finally:
            query.delete()
Exemple #24
0
  def test_convert_impala_query(self):
    sql = 'SELECT * FROM sample_07'
    settings = [
        {'key': 'EXPLAIN_LEVEL', 'value': '2'},
        {'key': 'ABORT_ON_ERROR', 'value': '1'}
    ]
    design = hql_query(sql, settings=settings)

    query = SavedQuery.objects.create(
        type=SavedQuery.TYPES_MAPPING['impala'],
        owner=self.user,
        data=design.dumps(),
        name='Impala query',
        desc='Test Impala query'
    )
    doc = Document.objects.link(query, owner=query.owner, extra=query.type, name=query.name, description=query.desc)

    try:
      # Test that corresponding doc2 is created after convert
      assert_false(Document2.objects.filter(owner=self.user, type='query-impala').exists())

      converter = DocumentConverter(self.user)
      converter.convert()

      doc2 = Document2.objects.get(owner=self.user, type='query-impala')

      # Verify Document2 attributes
      assert_equal(doc.name, doc2.data_dict['name'])
      assert_equal(doc.description, doc2.data_dict['description'])

      # Verify session type
      assert_equal('impala', doc2.data_dict['sessions'][0]['type'])

      # Verify snippet values
      assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
      assert_equal(sql, doc2.data_dict['snippets'][0]['statement'])
      assert_equal(sql, doc2.data_dict['snippets'][0]['statement_raw'])

      # Verify snippet properties
      assert_equal(settings, doc2.data_dict['snippets'][0]['properties']['settings'])
    finally:
      query.delete()
Exemple #25
0
    def test_import_permissions(self):
        make_logged_in_client(username="******",
                              groupname="default",
                              recreate=True,
                              is_superuser=False)

        other_user = User.objects.get(username="******")
        test_group = get_default_user_group()

        # Test that when importing a Document with permissions, the corresponding permissions are created for the Doc2
        sql = 'SELECT * FROM sample_07'
        design = hql_query(sql)

        query = SavedQuery.objects.create(
            type=SavedQuery.TYPES_MAPPING['impala'],
            owner=self.user,
            data=design.dumps(),
            name='Impala query',
            desc='Test Impala query')
        doc = Document.objects.link(query,
                                    owner=query.owner,
                                    extra=query.type,
                                    name=query.name,
                                    description=query.desc)
        read_perm = DocumentPermission.objects.create(doc=doc, perms='read')
        read_perm.users.add(other_user)
        read_perm.groups.add(test_group)
        write_perm = DocumentPermission.objects.create(doc=doc, perms='write')
        write_perm.users.add(other_user)

        try:
            converter = DocumentConverter(self.user)
            converter.convert()

            doc2 = Document2.objects.get(owner=self.user, name=query.name)
            # Test that doc2 has same read permissions
            assert_true(other_user in doc2.get_permission('read').users.all())
            assert_true(test_group in doc2.get_permission('read').groups.all())
            # Test that doc2 has same write permissions
            assert_true(other_user in doc2.get_permission('write').users.all())
        finally:
            query.delete()
Exemple #26
0
    def test_convert_rdbms_query(self):
        sql = "SELECT * FROM auth_user"
        data = {
            "query": {"query": sql, "server": "sqlite", "type": "rdbms", "database": "desktop/desktop.db"},
            "VERSION": "0.0.1",
        }
        data_json = json.dumps(data)
        design = SQLdesign.loads(data_json)
        query = SavedQuery.objects.create(
            type=SavedQuery.TYPES_MAPPING["rdbms"],
            owner=self.user,
            data=design.dumps(),
            name="SQLite query",
            desc="Test SQLite query",
        )
        doc = Document.objects.link(query, owner=query.owner, extra=query.type, name=query.name, description=query.desc)

        try:
            # Test that corresponding doc2 is created after convert
            assert_false(Document2.objects.filter(owner=self.user, type="query-sqlite").exists())

            converter = DocumentConverter(self.user)
            converter.convert()

            doc2 = Document2.objects.get(owner=self.user, type="query-sqlite")

            # Verify Document2 attributes
            assert_equal(doc.name, doc2.data_dict["name"])
            assert_equal(doc.description, doc2.data_dict["description"])

            # Verify session type
            assert_equal("sqlite", doc2.data_dict["sessions"][0]["type"])

            # Verify snippet values
            assert_equal("ready", doc2.data_dict["snippets"][0]["status"])
            assert_equal(sql, doc2.data_dict["snippets"][0]["statement"])
            assert_equal(sql, doc2.data_dict["snippets"][0]["statement_raw"])
        finally:
            query.delete()
Exemple #27
0
    def test_convert_pig_script(self):
        attrs = {
            'user': self.user,
            'id': 1000,
            'name': 'Test',
            'script': 'A = LOAD "$data"; STORE A INTO "$output";',
            'parameters': [],
            'resources': [],
            'hadoopProperties': []
        }
        pig_script = create_or_update_script(**attrs)

        # Setting doc.last_modified to older date
        doc = Document.objects.get(id=pig_script.doc.get().id)
        Document.objects.filter(
            id=doc.id).update(last_modified=datetime.strptime(
                '2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ'))
        doc = Document.objects.get(id=doc.id)

        try:
            # Test that corresponding doc2 is created after convert
            assert_false(
                Document2.objects.filter(owner=self.user,
                                         type='link-pigscript').exists())

            converter = DocumentConverter(self.user)
            converter.convert()

            doc2 = Document2.objects.get(owner=self.user,
                                         type='link-pigscript')

            # Verify absolute_url
            response = self.client.get(doc2.get_absolute_url())
            assert_equal(200, response.status_code)
            assert_equal(doc.last_modified.strftime('%Y-%m-%dT%H:%M:%S'),
                         doc2.last_modified.strftime('%Y-%m-%dT%H:%M:%S'))
        finally:
            pig_script.delete()
Exemple #28
0
  def test_convert_hive_query_with_special_chars(self):
    sql = 'SELECT * FROM sample_07'
    settings = [
      {'key': 'hive.exec.scratchdir', 'value': '/tmp/mydir'},
      {'key': 'hive.querylog.location', 'value': '/tmp/doc2'}
    ]
    file_resources = [{'type': 'jar', 'path': '/tmp/doc2/test.jar'}]
    functions = [{'name': 'myUpper', 'class_name': 'org.hue.udf.MyUpper'}]
    design = hql_query(sql, database='etl', settings=settings, file_resources=file_resources, functions=functions)

    query = SavedQuery.objects.create(
      type=SavedQuery.TYPES_MAPPING['hql'],
      owner=self.user,
      data=design.dumps(),
      name='Test / Hive query',
      desc='Test Hive query'
    )
    doc = Document.objects.link(query, owner=query.owner, extra=query.type, name=query.name, description=query.desc)

    try:
      # Test that corresponding doc2 is created after convert
      assert_equal(0, Document2.objects.filter(owner=self.user, type='query-hive').count())

      converter = DocumentConverter(self.user)
      converter.convert()

      assert_equal(1, Document2.objects.filter(owner=self.user, type='query-hive').count())

      doc2 = Document2.objects.get(owner=self.user, type='query-hive', is_history=False)

      # Verify name is maintained
      assert_equal('Test / Hive query', doc2.name)

      # Verify Document2 path is stripped of invalid chars
      assert_equal('/Test%20/%20Hive%20query', doc2.path)
    finally:
      query.delete()
Exemple #29
0
  def test_import_permissions(self):
    make_logged_in_client(username="******", groupname="default", recreate=True, is_superuser=False)

    other_user = User.objects.get(username="******")
    test_group = get_default_user_group()

    # Test that when importing a Document with permissions, the corresponding permissions are created for the Doc2
    sql = 'SELECT * FROM sample_07'
    design = hql_query(sql)

    query = SavedQuery.objects.create(
        type=SavedQuery.TYPES_MAPPING['impala'],
        owner=self.user,
        data=design.dumps(),
        name='Impala query',
        desc='Test Impala query'
    )
    doc = Document.objects.link(query, owner=query.owner, extra=query.type, name=query.name, description=query.desc)
    read_perm = DocumentPermission.objects.create(doc=doc, perms='read')
    read_perm.users.add(other_user)
    read_perm.groups.add(test_group)
    write_perm = DocumentPermission.objects.create(doc=doc, perms='write')
    write_perm.users.add(other_user)

    try:
      converter = DocumentConverter(self.user)
      converter.convert()

      doc2 = Document2.objects.get(owner=self.user, name=query.name)
      # Test that doc2 has same read permissions
      assert_true(other_user in doc2.get_permission('read').users.all())
      assert_true(test_group in doc2.get_permission('read').groups.all())
      # Test that doc2 has same write permissions
      assert_true(other_user in doc2.get_permission('write').users.all())
    finally:
      query.delete()
Exemple #30
0
def home2(request, is_embeddable=False):
  try:
    converter = DocumentConverter(request.user)
    converter.convert()
  except Exception, e:
    LOG.warning("Failed to convert and import documents: %s" % e)
Exemple #31
0
def home2(request):
  try:
    converter = DocumentConverter(request.user)
    converter.convert()
  except Exception, e:
    LOG.warning("Failed to convert and import documents: %s" % e)
Exemple #32
0
  def test_convert_hive_query(self):
    sql = 'SELECT * FROM sample_07'
    settings = [
        {'key': 'hive.exec.scratchdir', 'value': '/tmp/mydir'},
        {'key': 'hive.querylog.location', 'value': '/tmp/doc2'}
    ]
    file_resources = [{'type': 'jar', 'path': '/tmp/doc2/test.jar'}]
    functions = [{'name': 'myUpper', 'class_name': 'org.hue.udf.MyUpper'}]
    design = hql_query(sql, database='etl', settings=settings, file_resources=file_resources, functions=functions)

    query = SavedQuery.objects.create(
        type=SavedQuery.TYPES_MAPPING['hql'],
        owner=self.user,
        data=design.dumps(),
        name='Hive query',
        desc='Test Hive query'
    )
    doc = Document.objects.link(query, owner=query.owner, extra=query.type, name=query.name, description=query.desc)

    # Setting doc.last_modified to older date
    Document.objects.filter(id=doc.id).update(last_modified=datetime.strptime('2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ'))
    doc = Document.objects.get(id=doc.id)

    query2 = SavedQuery.objects.create(
        type=SavedQuery.TYPES_MAPPING['hql'],
        owner=self.user,
        data=design.dumps(),
        name='Hive query history',
        desc='Test Hive query history',
        is_auto=True
    )
    doch = Document.objects.link(query2, owner=query2.owner, extra=query2.type, name=query2.name, description=query2.desc)
    doch.add_to_history()

    try:
      # Test that corresponding doc2 is created after convert
      assert_equal(0, Document2.objects.filter(owner=self.user, type='query-hive').count())

      converter = DocumentConverter(self.user)
      converter.convert()

      assert_equal(2, Document2.objects.filter(owner=self.user, type='query-hive').count())

      #
      # Query
      #
      doc2 = Document2.objects.get(owner=self.user, type='query-hive', is_history=False)

      # Verify Document2 attributes
      assert_equal(doc.name, doc2.data_dict['name'])
      assert_equal(doc.description, doc2.data_dict['description'])

      # Verify session type
      assert_equal('hive', doc2.data_dict['sessions'][0]['type'])

      # Verify snippet values
      assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
      assert_equal(sql, doc2.data_dict['snippets'][0]['statement'])
      assert_equal(sql, doc2.data_dict['snippets'][0]['statement_raw'])
      assert_equal('etl', doc2.data_dict['snippets'][0]['database'])

      # Verify snippet properties
      assert_equal(settings, doc2.data_dict['snippets'][0]['properties']['settings'])
      assert_equal(file_resources, doc2.data_dict['snippets'][0]['properties']['files'])
      assert_equal(functions, doc2.data_dict['snippets'][0]['properties']['functions'])

      # Verify default properties
      assert_true(doc2.data_dict['isSaved'])
      assert_equal(doc.last_modified.strftime('%Y-%m-%dT%H:%M:%S'), doc2.last_modified.strftime('%Y-%m-%dT%H:%M:%S'))

      #
      # Query History
      #
      doc2 = Document2.objects.get(owner=self.user, type='query-hive', is_history=True)

      # Verify Document2 attributes
      assert_equal(doch.name, doc2.data_dict['name'])
      assert_equal(doch.description, doc2.data_dict['description'])
      assert_equal(doch.last_modified.strftime('%Y-%m-%dT%H:%M:%S'), doc2.last_modified.strftime('%Y-%m-%dT%H:%M:%S'))

      # Verify session type
      assert_false(doc2.data_dict['sessions'])

      # Verify snippet values
      assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
      assert_equal(sql, doc2.data_dict['snippets'][0]['statement'])
      assert_equal(sql, doc2.data_dict['snippets'][0]['statement_raw'])
      assert_equal('etl', doc2.data_dict['snippets'][0]['database'])

      # Verify snippet properties
      assert_equal(settings, doc2.data_dict['snippets'][0]['properties']['settings'])
      assert_equal(file_resources, doc2.data_dict['snippets'][0]['properties']['files'])
      assert_equal(functions, doc2.data_dict['snippets'][0]['properties']['functions'])

      # Verify default properties
      assert_false(doc2.data_dict['isSaved'])


      #
      # Check that we don't re-import again
      #
      converter = DocumentConverter(self.user)
      converter.convert()

      assert_equal(2, Document2.objects.filter(owner=self.user, type='query-hive').count())
    finally:
      query.delete()
      query2.delete()
Exemple #33
0
    def test_convert_java(self):
        wf = Workflow.objects.new_workflow(self.user)
        wf.save()
        Workflow.objects.initialize(wf)
        Link.objects.filter(parent__workflow=wf).delete()
        action = add_node(
            wf, 'action-name-1', 'java', [wf.start], {
                'name': 'MyTeragen',
                "description": "Generate N number of records",
                "main_class": "org.apache.hadoop.examples.terasort.TeraGen",
                "args": "1000 ${output_dir}/teragen",
                "files": '["my_file","my_file2"]',
                "job_xml": "",
                "java_opts": "-Dexample-property=natty",
                "jar_path":
                "/user/hue/oozie/workspaces/lib/hadoop-examples.jar",
                'job_properties':
                '[{"name": "mapred.job.queue.name", "value": "test"}]',
                "prepares": '[{"value":"/test","type":"mkdir"}]',
                "archives":
                '[{"dummy":"","name":"my_archive"},{"dummy":"","name":"my_archive2"}]',
                "capture_output": True,
            })
        Link(parent=action, child=wf.end, name="ok").save()

        # Setting doc.last_modified to older date
        doc = Document.objects.get(id=wf.doc.get().id)
        Document.objects.filter(
            id=doc.id).update(last_modified=datetime.strptime(
                '2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ'))
        doc = Document.objects.get(id=doc.id)

        try:
            # Test that corresponding doc2 is created after convert
            assert_false(
                Document2.objects.filter(owner=self.user,
                                         type='query-java').exists())

            converter = DocumentConverter(self.user)
            converter.convert()

            doc2 = Document2.objects.get(owner=self.user, type='query-java')

            # Verify snippet values
            assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
            assert_equal(
                '/user/hue/oozie/workspaces/lib/hadoop-examples.jar',
                doc2.data_dict['snippets'][0]['properties']['app_jar'])
            assert_equal('org.apache.hadoop.examples.terasort.TeraGen',
                         doc2.data_dict['snippets'][0]['properties']['class'])
            assert_equal('1000 ${output_dir}/teragen',
                         doc2.data_dict['snippets'][0]['properties']['args'])
            assert_equal(
                '-Dexample-property=natty',
                doc2.data_dict['snippets'][0]['properties']['java_opts'])
            assert_equal([
                'mapred.job.queue.name=test'
            ], doc2.data_dict['snippets'][0]['properties']['hadoopProperties'])
            assert_equal(
                ['my_archive', 'my_archive2'],
                doc2.data_dict['snippets'][0]['properties']['archives'])
            assert_equal([{
                'type': 'file',
                'path': 'my_file'
            }, {
                'type': 'file',
                'path': 'my_file2'
            }], doc2.data_dict['snippets'][0]['properties']['files'])
            assert_equal(
                True,
                doc2.data_dict['snippets'][0]['properties']['capture_output'])
        finally:
            wf.delete()
Exemple #34
0
    def test_convert_hive_query(self):
        sql = 'SELECT * FROM sample_07'
        settings = [{
            'key': 'hive.exec.scratchdir',
            'value': '/tmp/mydir'
        }, {
            'key': 'hive.querylog.location',
            'value': '/tmp/doc2'
        }]
        file_resources = [{'type': 'jar', 'path': '/tmp/doc2/test.jar'}]
        functions = [{'name': 'myUpper', 'class_name': 'org.hue.udf.MyUpper'}]
        design = hql_query(sql,
                           settings=settings,
                           file_resources=file_resources,
                           functions=functions)

        query = SavedQuery.objects.create(type=SavedQuery.TYPES_MAPPING['hql'],
                                          owner=self.user,
                                          data=design.dumps(),
                                          name='Hive query',
                                          desc='Test Hive query')
        doc = Document.objects.link(query,
                                    owner=query.owner,
                                    extra=query.type,
                                    name=query.name,
                                    description=query.desc)

        try:
            # Test that corresponding doc2 is created after convert
            assert_false(
                Document2.objects.filter(owner=self.user,
                                         type='query-hive').exists())

            converter = DocumentConverter(self.user)
            converter.convert()

            doc2 = Document2.objects.get(owner=self.user, type='query-hive')

            # Verify Document2 attributes
            assert_equal(doc.name, doc2.data_dict['name'])
            assert_equal(doc.description, doc2.data_dict['description'])

            # Verify session type
            assert_equal('hive', doc2.data_dict['sessions'][0]['type'])

            # Verify snippet values
            assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
            assert_equal(sql, doc2.data_dict['snippets'][0]['statement'])
            assert_equal(sql, doc2.data_dict['snippets'][0]['statement_raw'])

            # Verify snippet properties
            assert_equal(
                settings,
                doc2.data_dict['snippets'][0]['properties']['settings'])
            assert_equal(file_resources,
                         doc2.data_dict['snippets'][0]['properties']['files'])
            assert_equal(
                functions,
                doc2.data_dict['snippets'][0]['properties']['functions'])
        finally:
            query.delete()
Exemple #35
0
  def test_is_trashed_migration(self):

    # Skipping to prevent failing tests in TestOozieSubmissions
    raise SkipTest

    start_migration = '0024_auto__add_field_document2_is_managed'
    mid_migration = '0025_auto__add_field_document2_is_trashed'
    end_migration = '0026_change_is_trashed_default_to_false'
    APP = 'desktop'

    # Making sure Migration is up-to-date with fake migrations
    management.call_command('migrate', 'desktop', fake=True, verbosity=0)

    dir = Directory.objects.create(name='test_dir', owner=self.user, parent_directory=self.home_dir)
    query = Document2.objects.create(name='query1.sql', type='query-hive', owner=self.user, data={}, parent_directory=dir)
    trashed_query = Document2.objects.create(name='query2.sql', type='query-hive', owner=self.user, data={}, parent_directory=dir)
    trashed_query.trash()

    try:
      assert_false(dir.is_trashed)
      assert_false(query.is_trashed)
      assert_true(trashed_query.is_trashed)

      # Reverse migrate to 0025
      management.call_command('migrate', APP, mid_migration, verbosity=0)

      dir = Document2.objects.get(uuid=dir.uuid)
      query = Document2.objects.get(uuid=query.uuid)
      trashed_query = Document2.objects.get(uuid=trashed_query.uuid)
      assert_false(dir.is_trashed)
      assert_false(query.is_trashed)
      assert_true(trashed_query.is_trashed)

      # Reverse migrate to 0024. Deletes 'is_trashed' field from desktop_documents2
      management.call_command('migrate', APP, start_migration, verbosity=0)

      assert_raises(OperationalError, Document2.objects.get, uuid=dir.uuid)
      assert_raises(OperationalError, Document2.objects.get, uuid=query.uuid)
      assert_raises(OperationalError, Document2.objects.get, uuid=trashed_query.uuid)

      # Forward migrate to 0025
      management.call_command('migrate', APP, mid_migration, verbosity=0)
      dir = Document2.objects.get(uuid=dir.uuid)
      query = Document2.objects.get(uuid=query.uuid)
      trashed_query = Document2.objects.get(uuid=trashed_query.uuid)
      assert_true(dir.is_trashed is None)
      assert_true(query.is_trashed is None)
      assert_true(trashed_query.is_trashed is None)

      # Forward migrate to 0026
      management.call_command('migrate', APP, end_migration, verbosity=0)
      dir = Document2.objects.get(uuid=dir.uuid)
      query = Document2.objects.get(uuid=query.uuid)
      trashed_query = Document2.objects.get(uuid=trashed_query.uuid)
      assert_true(dir.is_trashed is None)
      assert_true(query.is_trashed is None)
      assert_true(trashed_query.is_trashed is None)

      # New Documents should have is_trashed=False
      query1 = Document2.objects.create(name='new_query.sql', type='query-hive', owner=self.user, data={}, parent_directory=dir)
      assert_true(query1.is_trashed is False)

      # Converter sets is_trashed=True for currently trashed docs
      converter = DocumentConverter(self.user)
      converter.convert()
      trashed_query = Document2.objects.get(uuid=trashed_query.uuid)
      dir = Document2.objects.get(uuid=dir.uuid)
      query = Document2.objects.get(uuid=query.uuid)
      assert_true(trashed_query.is_trashed)
      assert_true(dir.is_trashed is False)
      assert_true(query.is_trashed is False)
    finally:
      # Delete docs
      dir.delete()
      query.delete()
      query1.delete()
      trashed_query.delete()
Exemple #36
0
    def test_convert_hive_query(self):
        sql = 'SELECT * FROM sample_07'
        settings = [{
            'key': 'hive.exec.scratchdir',
            'value': '/tmp/mydir'
        }, {
            'key': 'hive.querylog.location',
            'value': '/tmp/doc2'
        }]
        file_resources = [{'type': 'jar', 'path': '/tmp/doc2/test.jar'}]
        functions = [{'name': 'myUpper', 'class_name': 'org.hue.udf.MyUpper'}]
        design = hql_query(sql,
                           database='etl',
                           settings=settings,
                           file_resources=file_resources,
                           functions=functions)

        query = SavedQuery.objects.create(type=SavedQuery.TYPES_MAPPING['hql'],
                                          owner=self.user,
                                          data=design.dumps(),
                                          name='Hive query',
                                          desc='Test Hive query')
        doc = Document.objects.link(query,
                                    owner=query.owner,
                                    extra=query.type,
                                    name=query.name,
                                    description=query.desc)

        query2 = SavedQuery.objects.create(
            type=SavedQuery.TYPES_MAPPING['hql'],
            owner=self.user,
            data=design.dumps(),
            name='Hive query history',
            desc='Test Hive query history',
            is_auto=True)
        doch = Document.objects.link(query2,
                                     owner=query2.owner,
                                     extra=query2.type,
                                     name=query2.name,
                                     description=query2.desc)
        doch.add_to_history()

        try:
            # Test that corresponding doc2 is created after convert
            assert_equal(
                0,
                Document2.objects.filter(owner=self.user,
                                         type='query-hive').count())

            converter = DocumentConverter(self.user)
            converter.convert()

            assert_equal(
                2,
                Document2.objects.filter(owner=self.user,
                                         type='query-hive').count())

            #
            # Query
            #
            doc2 = Document2.objects.get(owner=self.user,
                                         type='query-hive',
                                         is_history=False)

            # Verify Document2 attributes
            assert_equal(doc.name, doc2.data_dict['name'])
            assert_equal(doc.description, doc2.data_dict['description'])

            # Verify session type
            assert_equal('hive', doc2.data_dict['sessions'][0]['type'])

            # Verify snippet values
            assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
            assert_equal(sql, doc2.data_dict['snippets'][0]['statement'])
            assert_equal(sql, doc2.data_dict['snippets'][0]['statement_raw'])
            assert_equal('etl', doc2.data_dict['snippets'][0]['database'])

            # Verify snippet properties
            assert_equal(
                settings,
                doc2.data_dict['snippets'][0]['properties']['settings'])
            assert_equal(file_resources,
                         doc2.data_dict['snippets'][0]['properties']['files'])
            assert_equal(
                functions,
                doc2.data_dict['snippets'][0]['properties']['functions'])

            # Verify default properties
            assert_true(doc2.data_dict['isSaved'])
            assert_false(doc.last_modified == doc2.last_modified)

            #
            # Query History
            #
            doc2 = Document2.objects.get(owner=self.user,
                                         type='query-hive',
                                         is_history=True)

            # Verify Document2 attributes
            assert_equal(doch.name, doc2.data_dict['name'])
            assert_equal(doch.description, doc2.data_dict['description'])
            assert_equal(doch.last_modified, doc2.last_modified)

            # Verify session type
            assert_equal('hive', doc2.data_dict['sessions'][0]['type'])

            # Verify snippet values
            assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
            assert_equal(sql, doc2.data_dict['snippets'][0]['statement'])
            assert_equal(sql, doc2.data_dict['snippets'][0]['statement_raw'])
            assert_equal('etl', doc2.data_dict['snippets'][0]['database'])

            # Verify snippet properties
            assert_equal(
                settings,
                doc2.data_dict['snippets'][0]['properties']['settings'])
            assert_equal(file_resources,
                         doc2.data_dict['snippets'][0]['properties']['files'])
            assert_equal(
                functions,
                doc2.data_dict['snippets'][0]['properties']['functions'])

            # Verify default properties
            assert_false(doc2.data_dict['isSaved'])

            #
            # Check that we don't re-import again
            #
            converter = DocumentConverter(self.user)
            converter.convert()

            assert_equal(
                2,
                Document2.objects.filter(owner=self.user,
                                         type='query-hive').count())
        finally:
            query.delete()
            query2.delete()
Exemple #37
0
    def test_convert_hive_query(self):
        sql = "SELECT * FROM sample_07"
        settings = [
            {"key": "hive.exec.scratchdir", "value": "/tmp/mydir"},
            {"key": "hive.querylog.location", "value": "/tmp/doc2"},
        ]
        file_resources = [{"type": "jar", "path": "/tmp/doc2/test.jar"}]
        functions = [{"name": "myUpper", "class_name": "org.hue.udf.MyUpper"}]
        design = hql_query(sql, database="etl", settings=settings, file_resources=file_resources, functions=functions)

        query = SavedQuery.objects.create(
            type=SavedQuery.TYPES_MAPPING["hql"],
            owner=self.user,
            data=design.dumps(),
            name="Hive query",
            desc="Test Hive query",
        )
        doc = Document.objects.link(query, owner=query.owner, extra=query.type, name=query.name, description=query.desc)

        # Setting doc.last_modified to older date
        Document.objects.filter(id=doc.id).update(
            last_modified=datetime.strptime("2000-01-01T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ")
        )
        doc = Document.objects.get(id=doc.id)

        query2 = SavedQuery.objects.create(
            type=SavedQuery.TYPES_MAPPING["hql"],
            owner=self.user,
            data=design.dumps(),
            name="Hive query history",
            desc="Test Hive query history",
            is_auto=True,
        )
        doch = Document.objects.link(
            query2, owner=query2.owner, extra=query2.type, name=query2.name, description=query2.desc
        )
        doch.add_to_history()

        try:
            # Test that corresponding doc2 is created after convert
            assert_equal(0, Document2.objects.filter(owner=self.user, type="query-hive").count())

            converter = DocumentConverter(self.user)
            converter.convert()

            assert_equal(2, Document2.objects.filter(owner=self.user, type="query-hive").count())

            #
            # Query
            #
            doc2 = Document2.objects.get(owner=self.user, type="query-hive", is_history=False)

            # Verify Document2 attributes
            assert_equal(doc.name, doc2.data_dict["name"])
            assert_equal(doc.description, doc2.data_dict["description"])

            # Verify session type
            assert_equal("hive", doc2.data_dict["sessions"][0]["type"])

            # Verify snippet values
            assert_equal("ready", doc2.data_dict["snippets"][0]["status"])
            assert_equal(sql, doc2.data_dict["snippets"][0]["statement"])
            assert_equal(sql, doc2.data_dict["snippets"][0]["statement_raw"])
            assert_equal("etl", doc2.data_dict["snippets"][0]["database"])

            # Verify snippet properties
            assert_equal(settings, doc2.data_dict["snippets"][0]["properties"]["settings"])
            assert_equal(file_resources, doc2.data_dict["snippets"][0]["properties"]["files"])
            assert_equal(functions, doc2.data_dict["snippets"][0]["properties"]["functions"])

            # Verify default properties
            assert_true(doc2.data_dict["isSaved"])
            assert_false(
                doc.last_modified.strftime("%Y-%m-%dT%H:%M:%S") == doc2.last_modified.strftime("%Y-%m-%dT%H:%M:%S")
            )

            #
            # Query History
            #
            doc2 = Document2.objects.get(owner=self.user, type="query-hive", is_history=True)

            # Verify Document2 attributes
            assert_equal(doch.name, doc2.data_dict["name"])
            assert_equal(doch.description, doc2.data_dict["description"])
            assert_equal(
                doch.last_modified.strftime("%Y-%m-%dT%H:%M:%S"), doc2.last_modified.strftime("%Y-%m-%dT%H:%M:%S")
            )

            # Verify session type
            assert_equal("hive", doc2.data_dict["sessions"][0]["type"])

            # Verify snippet values
            assert_equal("ready", doc2.data_dict["snippets"][0]["status"])
            assert_equal(sql, doc2.data_dict["snippets"][0]["statement"])
            assert_equal(sql, doc2.data_dict["snippets"][0]["statement_raw"])
            assert_equal("etl", doc2.data_dict["snippets"][0]["database"])

            # Verify snippet properties
            assert_equal(settings, doc2.data_dict["snippets"][0]["properties"]["settings"])
            assert_equal(file_resources, doc2.data_dict["snippets"][0]["properties"]["files"])
            assert_equal(functions, doc2.data_dict["snippets"][0]["properties"]["functions"])

            # Verify default properties
            assert_false(doc2.data_dict["isSaved"])

            #
            # Check that we don't re-import again
            #
            converter = DocumentConverter(self.user)
            converter.convert()

            assert_equal(2, Document2.objects.filter(owner=self.user, type="query-hive").count())
        finally:
            query.delete()
            query2.delete()