コード例 #1
0
ファイル: test_utils.py プロジェクト: zimiao552147572/hue
def test_get_ensemble():
    # Non ascii
    data = string_io('fieldA\nrel=""nofollow"">Twitter for Péché')
    result = list(
        field_values_from_separated_file(data,
                                         delimiter='\t',
                                         quote_character='"'))
    assert_equal(u'rel=""nofollow"">Twitter for Péché', result[0]['fieldA'])

    data = string_io('fieldA\nrel=""nofollow"">Twitter for BlackBerry®')
    result = list(
        field_values_from_separated_file(data,
                                         delimiter='\t',
                                         quote_character='"'))
    assert_equal(u'rel=""nofollow"">Twitter for BlackBerry®',
                 result[0]['fieldA'])

    # Bad binary
    test_str = b'fieldA\naaa\x80\x02\x03'
    if sys.version_info[0] > 2:
        data = string_io(force_unicode(test_str, errors='ignore'))
    else:
        data = string_io(test_str)
    result = list(
        field_values_from_separated_file(data,
                                         delimiter='\t',
                                         quote_character='"'))
    assert_equal(u'aaa\x02\x03', result[0]['fieldA'])
コード例 #2
0
    def test_compare_to_xxd(self):
        """
    Runs xxd on some random text, and compares output with our xxd.

    It's conceivable that this isn't portable: xxd may have different
    default options.

    To be honest, this test was written after this was working.
    I tested using a temporary file and a side-by-side diff tool (vimdiff).
    """
        # Skip as blocking CI and low usage feature
        raise SkipTest
        try:
            subprocess.check_output('type xxd', shell=True)
        except subprocess.CalledProcessError as e:
            LOG.warning('xxd not found')
            raise SkipTest
        # /dev/random tends to hang on Linux, so we use python instead.
        # It's inefficient, but it's not terrible.
        random_text = "".join(
            chr(random.getrandbits(8)) for _ in range(LENGTH))
        p = Popen(["xxd"], shell=True, stdin=PIPE, stdout=PIPE, close_fds=True)
        (stdin, stderr) = p.communicate(random_text)
        self.assertFalse(stderr)

        output = string_io()
        xxd.main(string_io(random_text), output)
        self._verify_content(stdin, output.getvalue())
コード例 #3
0
ファイル: thread_util_test.py プロジェクト: ziq211/hue
def test_dump_traceback():
    started = threading.Event()
    stopped = threading.Event()

    class Thread(threading.Thread):
        def run(self):
            started.set()
            stopped.wait(10.0)
            assert_true(stopped.is_set())

    thread = Thread(name='thread_util_test thread')
    thread.start()
    thread_ident = str(thread.ident)

    header = 'Thread thread_util_test thread %s' % thread_ident

    try:
        started.wait(10.0)
        assert_true(started.is_set())

        out = string_io()
        dump_traceback(file=out)

        assert_true(header in out.getvalue())

        out = string_io()
        dump_traceback(file=out, all_threads=False)

        assert_true(header not in out.getvalue())
    finally:
        stopped.set()
        thread.join()
コード例 #4
0
ファイル: thrift_sasl.py プロジェクト: ymping/hue
 def __init__(self, sasl_client_factory, mechanism, trans):
     """
 @param sasl_client_factory: a callable that returns a new sasl.Client object
 @param mechanism: the SASL mechanism (e.g. "GSSAPI", "PLAIN")
 @param trans: the underlying transport over which to communicate.
 """
     self._trans = trans
     self.sasl_client_factory = sasl_client_factory
     self.sasl = None
     self.mechanism = mechanism
     self.__wbuf = string_io()
     self.__rbuf = string_io()
     self.opened = False
     self.encode = None
コード例 #5
0
def dump_traceback(file=sys.stderr, all_threads=True):
    """Print a thread stacktrace"""

    current_thread = threading.current_thread()

    if all_threads:
        threads = threading.enumerate()
    else:
        threads = [current_thread]

    for thread in threads:
        if thread == current_thread:
            name = "Current thread"
        else:
            name = "Thread"

        trace_buffer = string_io()
        print("%s: %s %s %s (most recent call last):" %
              (socket.gethostname(), name, thread.name, thread.ident),
              file=trace_buffer)
        frame = sys._current_frames()[thread.ident]
        traceback.print_stack(frame, file=trace_buffer)

        print(trace_buffer.getvalue(), file=file)
        logging.debug(trace_buffer.getvalue())
コード例 #6
0
ファイル: tasks.py プロジェクト: xuejunshuang/hue
def get_log(notebook,
            snippet,
            startFrom=None,
            size=None,
            postdict=None,
            user_id=None):
    result = download_to_file.AsyncResult(notebook['uuid'])
    state = result.state
    if state == states.PENDING:
        raise QueryExpired()
    elif state == 'SUBMITTED' or states.state(state) < states.state(
            'PROGRESS'):
        return ''
    elif state in states.EXCEPTION_STATES:
        return ''

    if TASK_SERVER.RESULT_CACHE.get():
        return ''
    else:
        if not startFrom:
            with storage.open(_log_key(notebook), 'r') as f:
                return f.read()
        else:
            count = 0
            output = string_io()
            with storage.open(_log_key(notebook), 'r') as f:
                for line in f:
                    count += 1
                    if count <= startFrom:
                        continue
                    output.write(line)
            return output.getvalue()
コード例 #7
0
  def _fixup(self):
    """
    Fixup:
      - time fields as struct_time
      - config dict
    """
    super(WorkflowAction, self)._fixup()

    if self.startTime:
      self.startTime = parse_timestamp(self.startTime)
    if self.endTime:
      self.endTime = parse_timestamp(self.endTime)
    if self.retries:
      self.retries = int(self.retries)

    if self.conf:
      conf_data = i18n.smart_str(self.conf)
      if not isinstance(conf_data, bytes):
        conf_data = conf_data.encode('utf-8')
      xml = string_io(conf_data)
      try:
        self.conf_dict = hadoop.confparse.ConfParse(xml)
      except Exception as e:
        LOG.error('Failed to parse XML configuration for Workflow action %s: %s' % (self.name, e))
        self.conf_dict = {}
    else:
      self.conf_dict = {}
コード例 #8
0
  def _fixup(self):
    """
    Fixup:
      - time fields as struct_time
      - config dict
    """
    super(CoordinatorAction, self)._fixup()

    if self.createdTime:
      self.createdTime = parse_timestamp(self.createdTime)
    if self.nominalTime:
      self.nominalTime = parse_timestamp(self.nominalTime)
    if self.lastModifiedTime:
      self.lastModifiedTime = parse_timestamp(self.lastModifiedTime)

    if self.runConf:
      conf_data = i18n.smart_str(self.runConf)
      if not isinstance(conf_data, bytes):
        conf_data = conf_data.encode('utf-8')
      xml = string_io(conf_data)
      self.conf_dict = hadoop.confparse.ConfParse(xml)
    else:
      self.conf_dict = {}

    self.title = ' %s-%s'% (self.actionNumber, format_time(self.nominalTime))
コード例 #9
0
    def _guess_from_file_stream(cls, file_stream):
        for sample_data, sample_lines in cls._get_sample(file_stream):
            try:
                lines = itertools.islice(string_io(sample_data),
                                         IMPORT_PEEK_NLINES)
                sample_data_lines = ''
                for line in lines:
                    sample_data_lines += line
                dialect, has_header = cls._guess_dialect(
                    sample_data_lines
                )  # Only use first few lines for guessing. Greatly improves performance of CSV library.
                delimiter = dialect.delimiter
                line_terminator = dialect.lineterminator
                quote_char = dialect.quotechar

                return cls(
                    **{
                        "delimiter": delimiter,
                        "line_terminator": line_terminator,
                        "quote_char": quote_char,
                        "has_header": has_header,
                        "sample": sample_data
                    })
            except Exception:
                LOG.exception('Warning, cannot read the file format.')

        # Guess dialect failed, fall back to defaults:
        return cls()
コード例 #10
0
    def _fixup(self):
        """
    Fixup fields:
      - expand actions
      - time fields are struct_time
      - run is integer
      - configuration dict
      - log
      - definition
    """
        if self.startTime:
            self.startTime = parse_timestamp(self.startTime)
        if self.endTime:
            self.endTime = parse_timestamp(self.endTime)

        self.actions = [
            Action.create(self.ACTION, act_dict) for act_dict in self.actions
        ]
        if self.conf is not None:
            conf_data = i18n.smart_str(self.conf)
            if not isinstance(conf_data, bytes):
                conf_data = conf_data.encode('utf-8')
            xml = string_io(conf_data)
            self.conf_dict = hadoop.confparse.ConfParse(xml)
        else:
            self.conf_dict = {}
コード例 #11
0
ファイル: api2.py プロジェクト: sandredd/hue-1
def export_documents(request):
    if request.GET.get('documents'):
        selection = json.loads(request.GET.get('documents'))
    else:
        selection = json.loads(request.POST.get('documents'))

    # Only export documents the user has permissions to read
    docs = Document2.objects.documents(user=request.user, perms='both', include_history=True, include_trashed=True).\
      filter(id__in=selection).order_by('-id')

    # Add any dependencies to the set of exported documents
    export_doc_set = _get_dependencies(docs)

    # For directories, add any children docs to the set of exported documents
    export_doc_set.update(_get_dependencies(docs, deps_mode=False))

    # Get PKs of documents to export
    doc_ids = [doc.pk for doc in export_doc_set]
    num_docs = len(doc_ids)

    if len(selection) == 1 and num_docs >= len(selection) and docs[0].name:
        filename = docs[0].name
    else:
        filename = 'hue-documents-%s-(%s)' % (
            datetime.today().strftime('%Y-%m-%d'), num_docs)

    f = string_io()

    if doc_ids:
        doc_ids = ','.join(map(str, doc_ids))
        management.call_command('dumpdata',
                                'desktop.Document2',
                                primary_keys=doc_ids,
                                indent=2,
                                use_natural_foreign_keys=True,
                                verbosity=2,
                                stdout=f)

    if request.GET.get('format') == 'json':
        return JsonResponse(f.getvalue(), safe=False)
    elif request.GET.get('format') == 'zip':
        zfile = zipfile.ZipFile(f, 'w')
        zfile.writestr("hue.json", f.getvalue())
        for doc in docs:
            if doc.type == 'notebook':
                try:
                    from spark.models import Notebook
                    zfile.writestr("notebook-%s-%s.txt" % (doc.name, doc.id),
                                   smart_str(Notebook(document=doc).get_str()))
                except Exception as e:
                    LOG.exception(e)
        zfile.close()
        response = HttpResponse(content_type="application/zip")
        response["Content-Length"] = len(f.getvalue())
        response[
            'Content-Disposition'] = 'attachment; filename="%s".zip' % filename
        response.write(f.getvalue())
        return response
    else:
        return make_response(f.getvalue(), 'json', filename)
コード例 #12
0
ファイル: morphline_tests.py プロジェクト: sandredd/hue-1
  def test_guess_format_invalid_csv_format(self):
    indexer = MorphlineIndexer("test", solr_client=self.solr_client)
    stream = string_io(TestIndexer.simpleCSVString)

    guessed_format = indexer.guess_format({'file': {"stream": stream, "name": "test.csv"}})

    guessed_format["fieldSeparator"] = "invalid separator"

    fields = indexer.guess_field_types({"file": {"stream": stream, "name": "test.csv"}, "format": guessed_format})['columns']
    assert_equal(fields, [])

    stream.seek(0)
    guessed_format = indexer.guess_format({'file':  {"stream": stream, "name": "test.csv"}})

    guessed_format["recordSeparator"] = "invalid separator"

    fields = indexer.guess_field_types({"file": {"stream": stream, "name": "test.csv"}, "format": guessed_format})['columns']
    assert_equal(fields, [])

    stream.seek(0)
    guessed_format = indexer.guess_format({'file':  {"stream": stream, "name": "test.csv"}})

    guessed_format["quoteChar"] = "invalid quoteChar"

    fields = indexer.guess_field_types({"file": {"stream": stream, "name": "test.csv"}, "format": guessed_format})['columns']
    assert_equal(fields, [])
コード例 #13
0
ファイル: http_client.py プロジェクト: zimiao552147572/hue
    def flush(self):
        data = self._wbuf.getvalue()
        self._wbuf = string_io()

        # POST
        self._root = Resource(self._client)
        self._data = self._root.post('', data=data, headers=self._headers)
コード例 #14
0
ファイル: morphline_tests.py プロジェクト: ziq211/hue
    def test_guess_csv_format(self):
        stream = string_io(TestIndexer.simpleCSVString)
        indexer = MorphlineIndexer("test", solr_client=self.solr_client)

        guessed_format = indexer.guess_format(
            {'file': {
                "stream": stream,
                "name": "test.csv"
            }})

        fields = indexer.guess_field_types({
            "file": {
                "stream": stream,
                "name": "test.csv"
            },
            "format": guessed_format
        })['columns']
        # test format
        expected_format = self.simpleCSVFormat

        assert_equal(expected_format, guessed_format)

        # test fields
        expected_fields = self.simpleCSVFields

        for expected, actual in zip(expected_fields, fields):
            for key in ("name", "type"):
                assert_equal(expected[key], actual[key])
コード例 #15
0
 def readlines(fileobj, encoding):
     try:
         data = fileobj.read(IMPORT_PEEK_SIZE)
         return data, itertools.islice(csv.reader(string_io(data)),
                                       IMPORT_PEEK_NLINES)
     except UnicodeError:
         return None, None
コード例 #16
0
ファイル: file_format.py プロジェクト: zimiao552147572/hue
 def readlines(fileobj, encoding):
   try:
     data = fileobj.read(IMPORT_PEEK_SIZE)
     if not isinstance(data, str):
       data = data.decode('utf-8')
     return data, itertools.islice(csv.reader(string_io(data)), IMPORT_PEEK_NLINES)
   except UnicodeError:
     return None, None
コード例 #17
0
ファイル: thrift_sasl.py プロジェクト: ymping/hue
 def cstringio_refill(self, prefix, reqlen):
     # self.__rbuf will already be empty here because fastbinary doesn't
     # ask for a refill until the previous buffer is empty.  Therefore,
     # we can start reading new frames immediately.
     while len(prefix) < reqlen:
         self._read_frame()
         prefix += self.__rbuf.getvalue()
     self.__rbuf = string_io(prefix)
     return self.__rbuf
コード例 #18
0
def threads(request):
  """Dumps out server threads. Useful for debugging."""
  out = string_io()
  dump_traceback(file=out)

  if request.is_ajax():
    return HttpResponse(out.getvalue(), content_type="text/plain")
  else:
    return render("threads.mako", request, {'text': out.getvalue(), 'is_embeddable': request.GET.get('is_embeddable', False)})
コード例 #19
0
 def _get_sample_reader(self, sample):
     if self.line_terminator != '\n':
         sample = sample.replace('\n', '\\n')
         return csv.reader(sample.split(self.line_terminator),
                           delimiter=self.delimiter,
                           quotechar=self.quote_char)
     else:
         return csv.reader(string_io(sample),
                           delimiter=self.delimiter,
                           quotechar=self.quote_char)
コード例 #20
0
ファイル: export_csvxls_tests.py プロジェクト: e11it/hue-1
def _read_xls_sheet_data(response):
  content = bytes(response.content)

  data = string_io()
  data.write(content)

  wb = load_workbook(filename=data, read_only=True)
  ws = wb.active

  return [[cell.value if cell else cell for cell in row] for row in ws.rows]
コード例 #21
0
ファイル: file_format.py プロジェクト: zimiao552147572/hue
 def readlines(fileobj, encoding):
   gz = gzip.GzipFile(fileobj=fileobj, mode='rb')
   try:
     data = gz.read(IMPORT_PEEK_SIZE)
   except IOError:
     return None, None
   try:
     return data, itertools.islice(csv.reader(string_io(data)), IMPORT_PEEK_NLINES)
   except UnicodeError:
     return None, None
コード例 #22
0
    def _parse(self, data):
        """ Parse the output from the 'mntr' 4letter word command """
        h = string_io(data)

        result = {}
        for line in h.readlines():
            try:
                key, value = self._parse_line(line)
                result[key] = value
            except ValueError:
                pass  # ignore broken lines

        return result
コード例 #23
0
    def _parse_stat(self, data):
        """ Parse the output from the 'stat' 4letter word command """

        result = {}
        if not data:
            return result
        h = string_io(data)

        version = h.readline()
        if version:
            result['zk_version'] = version[version.index(':') + 1:].strip()

        # skip all lines until we find the empty one
        while h.readline().strip():
            pass

        for line in h.readlines():
            m = re.match('Latency min/avg/max: (\d+)/(\d+)/(\d+)', line)
            if m is not None:
                result['zk_min_latency'] = int(m.group(1))
                result['zk_avg_latency'] = int(m.group(2))
                result['zk_max_latency'] = int(m.group(3))
                continue

            m = re.match('Received: (\d+)', line)
            if m is not None:
                result['zk_packets_received'] = int(m.group(1))
                continue

            m = re.match('Sent: (\d+)', line)
            if m is not None:
                result['zk_packets_sent'] = int(m.group(1))
                continue

            m = re.match('Outstanding: (\d+)', line)
            if m is not None:
                result['zk_outstanding_requests'] = int(m.group(1))
                continue

            m = re.match('Mode: (.*)', line)
            if m is not None:
                result['zk_server_state'] = m.group(1)
                continue

            m = re.match('Node count: (\d+)', line)
            if m is not None:
                result['zk_znode_count'] = int(m.group(1))
                continue

        return result
コード例 #24
0
ファイル: conf_test.py プロジェクト: ziq211/hue
 def setup_class(cls):
     logging.basicConfig(level=logging.DEBUG)
     cls.conf = ConfigSection(members=dict(
         FOO=Config("foo", help="A vanilla configuration param", type=int),
         BAR=Config(
             "bar", default=456, help="Config with default", type=int),
         REQ=Config(
             "req", required=True, help="A required config", type=int),
         OPT_NOT_THERE=Config("blahblah"),
         REQ_NOT_THERE=Config(
             "blah", required=True, help="Another required"),
         PRIVATE_CONFIG=Config("dontseeme", private=True),
         DYNAMIC_DEF=Config("dynamic_default",
                            dynamic_default=my_dynamic_default,
                            type=int),
         SOME_SECTION=ConfigSection("some_section",
                                    private=True,
                                    members=dict(BAZ=Config(
                                        "baz", default="baz_default"))),
         LIST=Config("list", type=list),
         CLUSTERS=UnspecifiedConfigSection(
             "clusters",
             help="Details about your Hadoop cluster(s)",
             each=ConfigSection(
                 help="Details about a cluster - one section for each.",
                 members=dict(HOST=Config(
                     "host", help="Hostname for the NN", required=True),
                              PORT=Config("port",
                                          help="Thrift port for the NN",
                                          type=int,
                                          default=10090))))))
     cls.conf = cls.conf.bind(load_confs([
         configobj.ConfigObj(infile=string_io(cls.CONF_ONE)),
         configobj.ConfigObj(infile=string_io(cls.CONF_TWO))
     ]),
                              prefix='')
コード例 #25
0
ファイル: test_utils.py プロジェクト: ziq211/hue
def test_get_ensemble():
    # Non ascii
    data = string_io('fieldA\nrel=""nofollow"">Twitter for Péché')
    result = list(
        field_values_from_separated_file(data,
                                         delimiter='\t',
                                         quote_character='"'))
    assert_equal(u'rel=""nofollow"">Twitter for Péché', result[0]['fieldA'])

    data = string_io('fieldA\nrel=""nofollow"">Twitter for BlackBerry®')
    result = list(
        field_values_from_separated_file(data,
                                         delimiter='\t',
                                         quote_character='"'))
    assert_equal(u'rel=""nofollow"">Twitter for BlackBerry®',
                 result[0]['fieldA'])

    # Bad binary
    data = string_io('fieldA\naaa\x80\x02\x03')
    result = list(
        field_values_from_separated_file(data,
                                         delimiter='\t',
                                         quote_character='"'))
    assert_equal(u'aaa\x02\x03', result[0]['fieldA'])
コード例 #26
0
ファイル: analyze_test.py プロジェクト: ziq211/hue
 def test_performance(self):
   pr = cProfile.Profile()
   pr.enable()
   ts1 = time.time()*1000.0
   self.analyze.pre_process(self.profile)
   result = self.analyze.run(self.profile)
   ts2 = time.time()*1000.0
   dts = ts2 - ts1
   pr.disable()
   s = string_io()
   sortby = 'cumulative'
   ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
   ps.print_stats()
   LOG.info(s.getvalue())
   assert_true(dts <= 1000)
コード例 #27
0
def config_gen(dic):
    """
  config_gen(dic) -> xml for Oozie workflow configuration
  """
    sio = string_io()
    print('<?xml version="1.0" encoding="UTF-8"?>', file=sio)
    print("<configuration>", file=sio)
    # if dic's key contains <,>,& then it will be escaped and if dic's value contains ']]>' then ']]>' will be stripped
    for k, v in dic.items():
        print("<property>\n  <name>%s</name>\n  <value><![CDATA[%s]]></value>\n</property>\n" \
            % (escape(k), v.replace(']]>', '') if isinstance(v, basestring) else v), file=sio)
    print("</configuration>", file=sio)
    sio.flush()
    sio.seek(0)
    return sio.read()
コード例 #28
0
    def _fixup(self):
        """
    Fixup:
      - time fields as struct_time
      - config dict
    """
        super(BundleAction, self)._fixup()

        self.type = 'coord-action'
        self.name = self.coordJobName

        if self.conf:
            xml = string_io(i18n.smart_str(self.conf))
            self.conf_dict = hadoop.confparse.ConfParse(xml)
        else:
            self.conf_dict = {}
コード例 #29
0
ファイル: thrift_sasl.py プロジェクト: ymping/hue
 def _read_frame(self):
     header = self._trans.readAll(4)
     (length, ) = struct.unpack(">I", header)
     if self.encode:
         # If the frames are encoded (i.e. you're using a QOP of auth-int or
         # auth-conf), then make sure to include the header in the bytes you send to
         # sasl.decode()
         encoded = header + self._trans.readAll(length)
         success, decoded = self.sasl.decode(encoded)
         if not success:
             raise TTransportException(type=TTransportException.UNKNOWN,
                                       message=self.sasl.getError())
     else:
         # If the frames are not encoded, just pass it through
         decoded = self._trans.readAll(length)
     self.__rbuf = string_io(decoded)
コード例 #30
0
ファイル: conf_test.py プロジェクト: ziq211/hue
    def test_print_help(self):
        out = string_io()
        self.conf.print_help(out=out, skip_header=True)
        out = out.getvalue().strip()
        assert_false("dontseeme" in out)
        assert_equals(
            re.sub(
                "^    (?m)", "", """
    Key: bar (optional)
      Default: 456
      Config with default

    Key: blah (required)
      Another required

    Key: blahblah (optional)
      [no help text provided]

    [clusters]
      Details about your Hadoop cluster(s)

      Consists of some number of sections like:
      [<user specified name>]
        Details about a cluster - one section for each.

        Key: host (required)
          Hostname for the NN

        Key: port (optional)
          Default: 10090
          Thrift port for the NN

    Key: dynamic_default (optional)
      Dynamic default: Calculates a sum
      [no help text provided]

    Key: foo (optional)
      A vanilla configuration param

    Key: list (optional)
      [no help text provided]

    Key: req (required)
      A required config
    """).strip(), out)