def test_get_ensemble(): # Non ascii data = string_io('fieldA\nrel=""nofollow"">Twitter for Péché') result = list( field_values_from_separated_file(data, delimiter='\t', quote_character='"')) assert_equal(u'rel=""nofollow"">Twitter for Péché', result[0]['fieldA']) data = string_io('fieldA\nrel=""nofollow"">Twitter for BlackBerry®') result = list( field_values_from_separated_file(data, delimiter='\t', quote_character='"')) assert_equal(u'rel=""nofollow"">Twitter for BlackBerry®', result[0]['fieldA']) # Bad binary test_str = b'fieldA\naaa\x80\x02\x03' if sys.version_info[0] > 2: data = string_io(force_unicode(test_str, errors='ignore')) else: data = string_io(test_str) result = list( field_values_from_separated_file(data, delimiter='\t', quote_character='"')) assert_equal(u'aaa\x02\x03', result[0]['fieldA'])
def test_compare_to_xxd(self): """ Runs xxd on some random text, and compares output with our xxd. It's conceivable that this isn't portable: xxd may have different default options. To be honest, this test was written after this was working. I tested using a temporary file and a side-by-side diff tool (vimdiff). """ # Skip as blocking CI and low usage feature raise SkipTest try: subprocess.check_output('type xxd', shell=True) except subprocess.CalledProcessError as e: LOG.warning('xxd not found') raise SkipTest # /dev/random tends to hang on Linux, so we use python instead. # It's inefficient, but it's not terrible. random_text = "".join( chr(random.getrandbits(8)) for _ in range(LENGTH)) p = Popen(["xxd"], shell=True, stdin=PIPE, stdout=PIPE, close_fds=True) (stdin, stderr) = p.communicate(random_text) self.assertFalse(stderr) output = string_io() xxd.main(string_io(random_text), output) self._verify_content(stdin, output.getvalue())
def test_dump_traceback(): started = threading.Event() stopped = threading.Event() class Thread(threading.Thread): def run(self): started.set() stopped.wait(10.0) assert_true(stopped.is_set()) thread = Thread(name='thread_util_test thread') thread.start() thread_ident = str(thread.ident) header = 'Thread thread_util_test thread %s' % thread_ident try: started.wait(10.0) assert_true(started.is_set()) out = string_io() dump_traceback(file=out) assert_true(header in out.getvalue()) out = string_io() dump_traceback(file=out, all_threads=False) assert_true(header not in out.getvalue()) finally: stopped.set() thread.join()
def __init__(self, sasl_client_factory, mechanism, trans): """ @param sasl_client_factory: a callable that returns a new sasl.Client object @param mechanism: the SASL mechanism (e.g. "GSSAPI", "PLAIN") @param trans: the underlying transport over which to communicate. """ self._trans = trans self.sasl_client_factory = sasl_client_factory self.sasl = None self.mechanism = mechanism self.__wbuf = string_io() self.__rbuf = string_io() self.opened = False self.encode = None
def dump_traceback(file=sys.stderr, all_threads=True): """Print a thread stacktrace""" current_thread = threading.current_thread() if all_threads: threads = threading.enumerate() else: threads = [current_thread] for thread in threads: if thread == current_thread: name = "Current thread" else: name = "Thread" trace_buffer = string_io() print("%s: %s %s %s (most recent call last):" % (socket.gethostname(), name, thread.name, thread.ident), file=trace_buffer) frame = sys._current_frames()[thread.ident] traceback.print_stack(frame, file=trace_buffer) print(trace_buffer.getvalue(), file=file) logging.debug(trace_buffer.getvalue())
def get_log(notebook, snippet, startFrom=None, size=None, postdict=None, user_id=None): result = download_to_file.AsyncResult(notebook['uuid']) state = result.state if state == states.PENDING: raise QueryExpired() elif state == 'SUBMITTED' or states.state(state) < states.state( 'PROGRESS'): return '' elif state in states.EXCEPTION_STATES: return '' if TASK_SERVER.RESULT_CACHE.get(): return '' else: if not startFrom: with storage.open(_log_key(notebook), 'r') as f: return f.read() else: count = 0 output = string_io() with storage.open(_log_key(notebook), 'r') as f: for line in f: count += 1 if count <= startFrom: continue output.write(line) return output.getvalue()
def _fixup(self): """ Fixup: - time fields as struct_time - config dict """ super(WorkflowAction, self)._fixup() if self.startTime: self.startTime = parse_timestamp(self.startTime) if self.endTime: self.endTime = parse_timestamp(self.endTime) if self.retries: self.retries = int(self.retries) if self.conf: conf_data = i18n.smart_str(self.conf) if not isinstance(conf_data, bytes): conf_data = conf_data.encode('utf-8') xml = string_io(conf_data) try: self.conf_dict = hadoop.confparse.ConfParse(xml) except Exception as e: LOG.error('Failed to parse XML configuration for Workflow action %s: %s' % (self.name, e)) self.conf_dict = {} else: self.conf_dict = {}
def _fixup(self): """ Fixup: - time fields as struct_time - config dict """ super(CoordinatorAction, self)._fixup() if self.createdTime: self.createdTime = parse_timestamp(self.createdTime) if self.nominalTime: self.nominalTime = parse_timestamp(self.nominalTime) if self.lastModifiedTime: self.lastModifiedTime = parse_timestamp(self.lastModifiedTime) if self.runConf: conf_data = i18n.smart_str(self.runConf) if not isinstance(conf_data, bytes): conf_data = conf_data.encode('utf-8') xml = string_io(conf_data) self.conf_dict = hadoop.confparse.ConfParse(xml) else: self.conf_dict = {} self.title = ' %s-%s'% (self.actionNumber, format_time(self.nominalTime))
def _guess_from_file_stream(cls, file_stream): for sample_data, sample_lines in cls._get_sample(file_stream): try: lines = itertools.islice(string_io(sample_data), IMPORT_PEEK_NLINES) sample_data_lines = '' for line in lines: sample_data_lines += line dialect, has_header = cls._guess_dialect( sample_data_lines ) # Only use first few lines for guessing. Greatly improves performance of CSV library. delimiter = dialect.delimiter line_terminator = dialect.lineterminator quote_char = dialect.quotechar return cls( **{ "delimiter": delimiter, "line_terminator": line_terminator, "quote_char": quote_char, "has_header": has_header, "sample": sample_data }) except Exception: LOG.exception('Warning, cannot read the file format.') # Guess dialect failed, fall back to defaults: return cls()
def _fixup(self): """ Fixup fields: - expand actions - time fields are struct_time - run is integer - configuration dict - log - definition """ if self.startTime: self.startTime = parse_timestamp(self.startTime) if self.endTime: self.endTime = parse_timestamp(self.endTime) self.actions = [ Action.create(self.ACTION, act_dict) for act_dict in self.actions ] if self.conf is not None: conf_data = i18n.smart_str(self.conf) if not isinstance(conf_data, bytes): conf_data = conf_data.encode('utf-8') xml = string_io(conf_data) self.conf_dict = hadoop.confparse.ConfParse(xml) else: self.conf_dict = {}
def export_documents(request): if request.GET.get('documents'): selection = json.loads(request.GET.get('documents')) else: selection = json.loads(request.POST.get('documents')) # Only export documents the user has permissions to read docs = Document2.objects.documents(user=request.user, perms='both', include_history=True, include_trashed=True).\ filter(id__in=selection).order_by('-id') # Add any dependencies to the set of exported documents export_doc_set = _get_dependencies(docs) # For directories, add any children docs to the set of exported documents export_doc_set.update(_get_dependencies(docs, deps_mode=False)) # Get PKs of documents to export doc_ids = [doc.pk for doc in export_doc_set] num_docs = len(doc_ids) if len(selection) == 1 and num_docs >= len(selection) and docs[0].name: filename = docs[0].name else: filename = 'hue-documents-%s-(%s)' % ( datetime.today().strftime('%Y-%m-%d'), num_docs) f = string_io() if doc_ids: doc_ids = ','.join(map(str, doc_ids)) management.call_command('dumpdata', 'desktop.Document2', primary_keys=doc_ids, indent=2, use_natural_foreign_keys=True, verbosity=2, stdout=f) if request.GET.get('format') == 'json': return JsonResponse(f.getvalue(), safe=False) elif request.GET.get('format') == 'zip': zfile = zipfile.ZipFile(f, 'w') zfile.writestr("hue.json", f.getvalue()) for doc in docs: if doc.type == 'notebook': try: from spark.models import Notebook zfile.writestr("notebook-%s-%s.txt" % (doc.name, doc.id), smart_str(Notebook(document=doc).get_str())) except Exception as e: LOG.exception(e) zfile.close() response = HttpResponse(content_type="application/zip") response["Content-Length"] = len(f.getvalue()) response[ 'Content-Disposition'] = 'attachment; filename="%s".zip' % filename response.write(f.getvalue()) return response else: return make_response(f.getvalue(), 'json', filename)
def test_guess_format_invalid_csv_format(self): indexer = MorphlineIndexer("test", solr_client=self.solr_client) stream = string_io(TestIndexer.simpleCSVString) guessed_format = indexer.guess_format({'file': {"stream": stream, "name": "test.csv"}}) guessed_format["fieldSeparator"] = "invalid separator" fields = indexer.guess_field_types({"file": {"stream": stream, "name": "test.csv"}, "format": guessed_format})['columns'] assert_equal(fields, []) stream.seek(0) guessed_format = indexer.guess_format({'file': {"stream": stream, "name": "test.csv"}}) guessed_format["recordSeparator"] = "invalid separator" fields = indexer.guess_field_types({"file": {"stream": stream, "name": "test.csv"}, "format": guessed_format})['columns'] assert_equal(fields, []) stream.seek(0) guessed_format = indexer.guess_format({'file': {"stream": stream, "name": "test.csv"}}) guessed_format["quoteChar"] = "invalid quoteChar" fields = indexer.guess_field_types({"file": {"stream": stream, "name": "test.csv"}, "format": guessed_format})['columns'] assert_equal(fields, [])
def flush(self): data = self._wbuf.getvalue() self._wbuf = string_io() # POST self._root = Resource(self._client) self._data = self._root.post('', data=data, headers=self._headers)
def test_guess_csv_format(self): stream = string_io(TestIndexer.simpleCSVString) indexer = MorphlineIndexer("test", solr_client=self.solr_client) guessed_format = indexer.guess_format( {'file': { "stream": stream, "name": "test.csv" }}) fields = indexer.guess_field_types({ "file": { "stream": stream, "name": "test.csv" }, "format": guessed_format })['columns'] # test format expected_format = self.simpleCSVFormat assert_equal(expected_format, guessed_format) # test fields expected_fields = self.simpleCSVFields for expected, actual in zip(expected_fields, fields): for key in ("name", "type"): assert_equal(expected[key], actual[key])
def readlines(fileobj, encoding): try: data = fileobj.read(IMPORT_PEEK_SIZE) return data, itertools.islice(csv.reader(string_io(data)), IMPORT_PEEK_NLINES) except UnicodeError: return None, None
def readlines(fileobj, encoding): try: data = fileobj.read(IMPORT_PEEK_SIZE) if not isinstance(data, str): data = data.decode('utf-8') return data, itertools.islice(csv.reader(string_io(data)), IMPORT_PEEK_NLINES) except UnicodeError: return None, None
def cstringio_refill(self, prefix, reqlen): # self.__rbuf will already be empty here because fastbinary doesn't # ask for a refill until the previous buffer is empty. Therefore, # we can start reading new frames immediately. while len(prefix) < reqlen: self._read_frame() prefix += self.__rbuf.getvalue() self.__rbuf = string_io(prefix) return self.__rbuf
def threads(request): """Dumps out server threads. Useful for debugging.""" out = string_io() dump_traceback(file=out) if request.is_ajax(): return HttpResponse(out.getvalue(), content_type="text/plain") else: return render("threads.mako", request, {'text': out.getvalue(), 'is_embeddable': request.GET.get('is_embeddable', False)})
def _get_sample_reader(self, sample): if self.line_terminator != '\n': sample = sample.replace('\n', '\\n') return csv.reader(sample.split(self.line_terminator), delimiter=self.delimiter, quotechar=self.quote_char) else: return csv.reader(string_io(sample), delimiter=self.delimiter, quotechar=self.quote_char)
def _read_xls_sheet_data(response): content = bytes(response.content) data = string_io() data.write(content) wb = load_workbook(filename=data, read_only=True) ws = wb.active return [[cell.value if cell else cell for cell in row] for row in ws.rows]
def readlines(fileobj, encoding): gz = gzip.GzipFile(fileobj=fileobj, mode='rb') try: data = gz.read(IMPORT_PEEK_SIZE) except IOError: return None, None try: return data, itertools.islice(csv.reader(string_io(data)), IMPORT_PEEK_NLINES) except UnicodeError: return None, None
def _parse(self, data): """ Parse the output from the 'mntr' 4letter word command """ h = string_io(data) result = {} for line in h.readlines(): try: key, value = self._parse_line(line) result[key] = value except ValueError: pass # ignore broken lines return result
def _parse_stat(self, data): """ Parse the output from the 'stat' 4letter word command """ result = {} if not data: return result h = string_io(data) version = h.readline() if version: result['zk_version'] = version[version.index(':') + 1:].strip() # skip all lines until we find the empty one while h.readline().strip(): pass for line in h.readlines(): m = re.match('Latency min/avg/max: (\d+)/(\d+)/(\d+)', line) if m is not None: result['zk_min_latency'] = int(m.group(1)) result['zk_avg_latency'] = int(m.group(2)) result['zk_max_latency'] = int(m.group(3)) continue m = re.match('Received: (\d+)', line) if m is not None: result['zk_packets_received'] = int(m.group(1)) continue m = re.match('Sent: (\d+)', line) if m is not None: result['zk_packets_sent'] = int(m.group(1)) continue m = re.match('Outstanding: (\d+)', line) if m is not None: result['zk_outstanding_requests'] = int(m.group(1)) continue m = re.match('Mode: (.*)', line) if m is not None: result['zk_server_state'] = m.group(1) continue m = re.match('Node count: (\d+)', line) if m is not None: result['zk_znode_count'] = int(m.group(1)) continue return result
def setup_class(cls): logging.basicConfig(level=logging.DEBUG) cls.conf = ConfigSection(members=dict( FOO=Config("foo", help="A vanilla configuration param", type=int), BAR=Config( "bar", default=456, help="Config with default", type=int), REQ=Config( "req", required=True, help="A required config", type=int), OPT_NOT_THERE=Config("blahblah"), REQ_NOT_THERE=Config( "blah", required=True, help="Another required"), PRIVATE_CONFIG=Config("dontseeme", private=True), DYNAMIC_DEF=Config("dynamic_default", dynamic_default=my_dynamic_default, type=int), SOME_SECTION=ConfigSection("some_section", private=True, members=dict(BAZ=Config( "baz", default="baz_default"))), LIST=Config("list", type=list), CLUSTERS=UnspecifiedConfigSection( "clusters", help="Details about your Hadoop cluster(s)", each=ConfigSection( help="Details about a cluster - one section for each.", members=dict(HOST=Config( "host", help="Hostname for the NN", required=True), PORT=Config("port", help="Thrift port for the NN", type=int, default=10090)))))) cls.conf = cls.conf.bind(load_confs([ configobj.ConfigObj(infile=string_io(cls.CONF_ONE)), configobj.ConfigObj(infile=string_io(cls.CONF_TWO)) ]), prefix='')
def test_get_ensemble(): # Non ascii data = string_io('fieldA\nrel=""nofollow"">Twitter for Péché') result = list( field_values_from_separated_file(data, delimiter='\t', quote_character='"')) assert_equal(u'rel=""nofollow"">Twitter for Péché', result[0]['fieldA']) data = string_io('fieldA\nrel=""nofollow"">Twitter for BlackBerry®') result = list( field_values_from_separated_file(data, delimiter='\t', quote_character='"')) assert_equal(u'rel=""nofollow"">Twitter for BlackBerry®', result[0]['fieldA']) # Bad binary data = string_io('fieldA\naaa\x80\x02\x03') result = list( field_values_from_separated_file(data, delimiter='\t', quote_character='"')) assert_equal(u'aaa\x02\x03', result[0]['fieldA'])
def test_performance(self): pr = cProfile.Profile() pr.enable() ts1 = time.time()*1000.0 self.analyze.pre_process(self.profile) result = self.analyze.run(self.profile) ts2 = time.time()*1000.0 dts = ts2 - ts1 pr.disable() s = string_io() sortby = 'cumulative' ps = pstats.Stats(pr, stream=s).sort_stats(sortby) ps.print_stats() LOG.info(s.getvalue()) assert_true(dts <= 1000)
def config_gen(dic): """ config_gen(dic) -> xml for Oozie workflow configuration """ sio = string_io() print('<?xml version="1.0" encoding="UTF-8"?>', file=sio) print("<configuration>", file=sio) # if dic's key contains <,>,& then it will be escaped and if dic's value contains ']]>' then ']]>' will be stripped for k, v in dic.items(): print("<property>\n <name>%s</name>\n <value><![CDATA[%s]]></value>\n</property>\n" \ % (escape(k), v.replace(']]>', '') if isinstance(v, basestring) else v), file=sio) print("</configuration>", file=sio) sio.flush() sio.seek(0) return sio.read()
def _fixup(self): """ Fixup: - time fields as struct_time - config dict """ super(BundleAction, self)._fixup() self.type = 'coord-action' self.name = self.coordJobName if self.conf: xml = string_io(i18n.smart_str(self.conf)) self.conf_dict = hadoop.confparse.ConfParse(xml) else: self.conf_dict = {}
def _read_frame(self): header = self._trans.readAll(4) (length, ) = struct.unpack(">I", header) if self.encode: # If the frames are encoded (i.e. you're using a QOP of auth-int or # auth-conf), then make sure to include the header in the bytes you send to # sasl.decode() encoded = header + self._trans.readAll(length) success, decoded = self.sasl.decode(encoded) if not success: raise TTransportException(type=TTransportException.UNKNOWN, message=self.sasl.getError()) else: # If the frames are not encoded, just pass it through decoded = self._trans.readAll(length) self.__rbuf = string_io(decoded)
def test_print_help(self): out = string_io() self.conf.print_help(out=out, skip_header=True) out = out.getvalue().strip() assert_false("dontseeme" in out) assert_equals( re.sub( "^ (?m)", "", """ Key: bar (optional) Default: 456 Config with default Key: blah (required) Another required Key: blahblah (optional) [no help text provided] [clusters] Details about your Hadoop cluster(s) Consists of some number of sections like: [<user specified name>] Details about a cluster - one section for each. Key: host (required) Hostname for the NN Key: port (optional) Default: 10090 Thrift port for the NN Key: dynamic_default (optional) Dynamic default: Calculates a sum [no help text provided] Key: foo (optional) A vanilla configuration param Key: list (optional) [no help text provided] Key: req (required) A required config """).strip(), out)