def test_ampstoxml(): data = {'netid':['sisfrance'], 'code':[1], 'name':['m1'], 'loc':['somewhere'], 'lat':[32.7], 'lon':[-145.6], 'dist':[23.6], 'source':['sisfrance'], 'insttype':['OBSERVED'], 'commtype':['UNK'], 'intensity':[4.7]} df = pd.DataFrame(data=data) ret = Retriever(os.path.expanduser('~'),os.path.expanduser('~')) eventinfo = {'id':'usp0007m27', 'time':datetime.utcnow(), 'lat':46.015, 'lon':5.977, 'depth':5.0, 'mag':4.5, 'location':'France', 'time':datetime(1996,7,15,0,13,28), 'network':'us'} ret.setEventInfo(eventinfo) xmlstr = ret.ampsToXML(amps=df) df2 = ret.xmlToAmps(xmlstr) w = pd.get_option("display.max_columns") pd.set_option("display.max_columns",200) w2 = pd.get_option("display.max_columns") pd.set_option('display.max_columns', 500) pd.set_option('display.expand_frame_repr', False) print(df) print() print(df2[df.columns])
def setup_class(cls): cls.data = {} cls.data['string'] = mkdf(5, 3, c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) cls.data['int'] = mkdf(5, 3, data_gen_f=lambda *args: randint(2), c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) cls.data['float'] = mkdf(5, 3, data_gen_f=lambda r, c: float(r) + 0.01, c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) cls.data['mixed'] = DataFrame({'a': np.arange(1.0, 6.0) + 0.01, 'b': np.arange(1, 6), 'c': list('abcde')}) # Test columns exceeding "max_colwidth" (GH8305) _cw = get_option('display.max_colwidth') + 1 cls.data['colwidth'] = mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw, c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) # Test GH-5346 max_rows = get_option('display.max_rows') cls.data['longdf'] = mkdf(max_rows + 1, 3, data_gen_f=lambda *args: randint(2), c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) # Test for non-ascii text: GH9263 cls.data['nonascii'] = pd.DataFrame({'en': 'in English'.split(), 'es': 'en español'.split()}) # unicode round trip test for GH 13747, GH 12529 cls.data['utf8'] = pd.DataFrame({'a': ['µasd', 'Ωœ∑´'], 'b': ['øπ∆˚¬', 'œ∑´®']}) cls.data_types = list(cls.data.keys())
def check_main(): try: import __main__ as main except ModuleNotFoundError: return get_option('mode.sim_interactive') return (not hasattr(main, '__file__') or get_option('mode.sim_interactive'))
def _f(*args, **kwargs): old_max_rows = pd.get_option("display.max_rows") old_max_info_rows = pd.get_option("display.max_info_rows") o = f(*args, **kwargs) pd.set_option("display.max_rows", old_max_rows) pd.set_option("display.max_info_rows", old_max_info_rows) return o
def setUpClass(cls): super(TestClipboard, cls).setUpClass() cls.data = {} cls.data['string'] = mkdf(5, 3, c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) cls.data['int'] = mkdf(5, 3, data_gen_f=lambda *args: randint(2), c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) cls.data['float'] = mkdf(5, 3, data_gen_f=lambda r, c: float(r) + 0.01, c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) cls.data['mixed'] = DataFrame({'a': np.arange(1.0, 6.0) + 0.01, 'b': np.arange(1, 6), 'c': list('abcde')}) # Test columns exceeding "max_colwidth" (GH8305) _cw = get_option('display.max_colwidth') + 1 cls.data['colwidth'] = mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw, c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) # Test GH-5346 max_rows = get_option('display.max_rows') cls.data['longdf'] = mkdf(max_rows + 1, 3, data_gen_f=lambda *args: randint(2), c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) # Test for non-ascii text: GH9263 cls.data['nonascii'] = pd.DataFrame({'en': 'in English'.split(), 'es': 'en español'.split()}) cls.data_types = list(cls.data.keys())
def _repr_html_(self): if self.index.nlevels > 1: return None else: max_rows = pd.get_option("display.max_rows") max_cols = pd.get_option("display.max_columns") show_dimensions = pd.get_option("display.show_dimensions") return self.to_html(max_rows=max_rows, max_cols=max_cols, show_dimensions=show_dimensions, notebook=True)
def test_use_bottleneck(): if nanops._BOTTLENECK_INSTALLED: pd.set_option('use_bottleneck', True) assert pd.get_option('use_bottleneck') pd.set_option('use_bottleneck', False) assert not pd.get_option('use_bottleneck') pd.set_option('use_bottleneck', use_bn)
def main(): """ Time monitoring application. """ # global monitoring parser = argparse.ArgumentParser(description='Time monitoring application') parser.add_argument('work', help='work file') parser.add_argument('-f', '--from', help='date from (including)') parser.add_argument('-t', '--toex', help='date to (excluding)') parser.add_argument('-w', '--width', help='display width (columns)') args = parser.parse_args() args.f = args.__dict__['from'] args.t = args.__dict__['toex'] args.w = args.__dict__['width'] today = datetime.date.today() if args.f and args.t: date_from = parse(args.f).date() date_toex = parse(args.t).date() if date_toex <= date_from: print "Warning: to-date must be strictly greater than from-date" return elif args.f and not args.t: date_from = parse(args.f).date() date_toex = date_from + relativedelta(months=1) elif not args.f and args.t: date_toex = parse(args.t).date() date_from = date_toex - relativedelta(months=1) else: date_toex = datetime.date(today.year, today.month, 1) date_from = date_toex - relativedelta(months=1) try: display_max_rows = pd.get_option('display.max_rows') display_max_columns = pd.get_option('display.max_columns') display_height = pd.get_option('display.height') display_width = pd.get_option('display.width') pd.set_option('display.max_rows', 1000) pd.set_option('display.max_columns', 1000) pd.set_option('display.height', 1000) if args.w: pd.set_option('display.width', int(args.w)) f = open(args.work, 'rU') monitoring = Monitoring(date_from, date_toex) monitoring.process(f) monitoring.print_result() except IOError: sys.stderr.write('Problem reading: ' + args.work) finally: pd.set_option('display.max_rows', display_max_rows) pd.set_option('display.max_columns', display_max_columns) pd.set_option('display.height', display_height) pd.set_option('display.width', display_width) f.close()
def get_engine(engine): """ return our implementation """ if engine == 'auto': engine = get_option('io.parquet.engine') if engine == 'auto': # try engines in this order try: return PyArrowImpl() except ImportError: pass try: return FastParquetImpl() except ImportError: pass if engine not in ['pyarrow', 'fastparquet']: raise ValueError("engine must be one of 'pyarrow', 'fastparquet'") if engine == 'pyarrow': return PyArrowImpl() elif engine == 'fastparquet': return FastParquetImpl()
def _get_head_tail(self): if self._empty: return pd.DataFrame() self.update_size() maxrows = pd.get_option('display.max_rows') if self._size <= maxrows: newdf = aku.DataFrame() for col in self._columns: if isinstance(self[col], ak.Categorical): newdf[col] = self[col].categories[self[col].codes] else: newdf[col] = self[col] return newdf.to_pandas(retain_index=True) # Being 1 above the threshold caises the PANDAS formatter to split the data frame vertically idx = ak.array( list(range(maxrows // 2 + 1)) + list(range(self._size - (maxrows // 2), self._size))) newdf = aku.DataFrame() for col in self._columns[1:]: if isinstance(self[col], ak.Categorical): newdf[col] = self[col].categories[self[col].codes[idx]] else: newdf[col] = self[col][idx] newdf['index'] = self['index'][idx] return newdf.to_pandas(retain_index=True)
def read_clipboard(**kwargs): # pragma: no cover """ Read text from clipboard and pass to read_table. See read_table for the full argument list If unspecified, `sep` defaults to '\s+' Returns ------- parsed : DataFrame """ if kwargs.get('sep') is None and kwargs.get('delim_whitespace') is None: kwargs['sep'] = '\s+' from pandas.util.clipboard import clipboard_get from pandas.io.parsers import read_table text = clipboard_get() # try to decode (if needed on PY3) if compat.PY3: try: text = compat.bytes_to_str( text, encoding=(kwargs.get('encoding') or get_option('display.encoding')) ) except: pass return read_table(StringIO(text), **kwargs)
def get_engine(engine: str) -> "BaseImpl": """ return our implementation """ if engine == "auto": engine = get_option("io.parquet.engine") if engine == "auto": # try engines in this order try: return PyArrowImpl() except ImportError: pass try: return FastParquetImpl() except ImportError: pass raise ImportError( "Unable to find a usable engine; " "tried using: 'pyarrow', 'fastparquet'.\n" "pyarrow or fastparquet is required for parquet support") if engine == "pyarrow": return PyArrowImpl() elif engine == "fastparquet": return FastParquetImpl() raise ValueError("engine must be one of 'pyarrow', 'fastparquet'")
def produce_summary_txt(df, outfname): df = produce_summary_df(df) oldwidth = pd.get_option("display.max_colwidth") pd.set_option("display.max_colwidth", 10000) with open(outfname, 'w') as f: f.write(df.to_string()) pd.set_option("display.max_colwidth", oldwidth)
def _repr_html_(self): self = self.copy() if self.index.nlevels > 1: return None else: name = self.index.name or 'index' if self.columns.name is None: self.columns.name = name max_rows = pd.get_option("display.max_rows") max_cols = pd.get_option("display.max_columns") show_dimensions = pd.get_option("display.show_dimensions") return self.to_html(max_rows=max_rows, max_cols=max_cols, show_dimensions=show_dimensions, notebook=True)
def print_table(table, name=None, fmt=None): """ Pretty print a pandas DataFrame. Uses HTML output if running inside Jupyter Notebook, otherwise formatted text output. Parameters ---------- table : pd.Series or pd.DataFrame Table to pretty-print. name : str, optional Table name to display in upper left corner. fmt : str, optional Formatter to use for displaying table elements. E.g. '{0:.2f}%' for displaying 100 as '100.00%'. Restores original setting after displaying. """ if isinstance(table, pd.Series): table = pd.DataFrame(table) if isinstance(table, pd.DataFrame): table.columns.name = name prev_option = pd.get_option('display.float_format') if fmt is not None: pd.set_option('display.float_format', lambda x: fmt.format(x)) display(table) if fmt is not None: pd.set_option('display.float_format', prev_option)
def get_engine(engine: str) -> BaseImpl: """return our implementation""" if engine == "auto": engine = get_option("io.parquet.engine") if engine == "auto": # try engines in this order engine_classes = [PyArrowImpl, FastParquetImpl] error_msgs = "" for engine_class in engine_classes: try: return engine_class() except ImportError as err: error_msgs += "\n - " + str(err) raise ImportError( "Unable to find a usable engine; " "tried using: 'pyarrow', 'fastparquet'.\n" "A suitable version of " "pyarrow or fastparquet is required for parquet " "support.\n" "Trying to import the above resulted in these errors:" f"{error_msgs}" ) if engine == "pyarrow": return PyArrowImpl() elif engine == "fastparquet": return FastParquetImpl() raise ValueError("engine must be one of 'pyarrow', 'fastparquet'")
def test_backend_is_correct(monkeypatch, restore_backend): monkeypatch.setitem(sys.modules, "pandas_dummy_backend", dummy_backend) pandas.set_option("plotting.backend", "pandas_dummy_backend") assert pandas.get_option("plotting.backend") == "pandas_dummy_backend" assert (pandas.plotting._core._get_plot_backend("pandas_dummy_backend") is dummy_backend)
def __init__(self, data_frame): self.data_frame = data_frame self.div_id = str(uuid.uuid4()) self.df_copy = data_frame.copy() if type(self.df_copy.index) == pd.core.index.MultiIndex: self.df_copy.reset_index(inplace=True) else: self.df_copy.insert(0, self.df_copy.index.name, self.df_copy.index) tc = dict(np.typecodes) for key in np.typecodes.keys(): if "All" in key: del tc[key] self.column_types = [] for col_name, dtype in self.df_copy.dtypes.iteritems(): found_type = False column_type = {'field': col_name} for type_name, type_codes in tc.items(): if dtype.kind in type_codes: found_type = True column_type['type'] = type_name break self.column_types.append(column_type) self.precision = pd.get_option('display.precision') - 1
def main(comparison_fn): old_width = pd.get_option('display.max_colwidth') pd.set_option('display.max_colwidth', -1) with open(comparison_fn, 'wt') as f: print(header, file=f) def title_from_fns(fns): return ' + '.join(fns) for idx, fns in enumerate(find_templates()): title = title_from_fns(fns) print(f'<li><a href="#{idx}">{title}</li>', file=f) for idx, fns in enumerate(find_templates()): df = compare(fns) html_comparison = df.to_html( formatters=[html_formatter] * len(versions), escape=False) title = title_from_fns(fns) print(f'<h1><a name={idx}>{title}</a></h1>', file=f) print(html_comparison, file=f) print(f'</body>', file=f) pd.set_option('display.max_colwidth', old_width)
def test_api_for_categorical(any_string_method, any_string_dtype, request): # https://github.com/pandas-dev/pandas/issues/10661 if any_string_dtype == "string[pyarrow]" or ( any_string_dtype == "string" and get_option("string_storage") == "pyarrow"): # unsupported operand type(s) for +: 'ArrowStringArray' and 'str' mark = pytest.mark.xfail(raises=TypeError, reason="Not Implemented") request.node.add_marker(mark) s = Series(list("aabb"), dtype=any_string_dtype) s = s + " " + s c = s.astype("category") assert isinstance(c.str, strings.StringMethods) method_name, args, kwargs = any_string_method result = getattr(c.str, method_name)(*args, **kwargs) expected = getattr(s.astype("object").str, method_name)(*args, **kwargs) if isinstance(result, DataFrame): tm.assert_frame_equal(result, expected) elif isinstance(result, Series): tm.assert_series_equal(result, expected) else: # str.cat(others=None) returns string, for example assert result == expected
def __init__(self, data_frame, remote_js=False): self.data_frame = data_frame self.remote_js = remote_js self.div_id = str(uuid.uuid4()) self.df_copy = data_frame.copy() if type(self.df_copy.index) == pd.core.index.MultiIndex: self.df_copy.reset_index(inplace=True) else: self.df_copy.insert(0, self.df_copy.index.name, self.df_copy.index) tc = dict(np.typecodes) for key in np.typecodes.keys(): if "All" in key: del tc[key] self.column_types = [] for col_name, dtype in self.df_copy.dtypes.iteritems(): column_type = {'field': col_name} for type_name, type_codes in tc.items(): if dtype.kind in type_codes: column_type['type'] = type_name break self.column_types.append(column_type) self.precision = pd.get_option('display.precision') - 1
def _read_obs(self, stns_ids=None): # Saw extreme decreased performance due to garbage collection when # pandas ran checks for a chained assignment. Turn off this check # temporarily. opt_val = pd.get_option("mode.chained_assignment") pd.set_option("mode.chained_assignment", None) try: if stns_ids is None: stns_obs = self.stns else: stns_obs = self.stns.loc[stns_ids] obs = [self._parse_stn_obs(a_id, elem) for elem, a_id in itertools.product(self.elems, stns_obs.station_id)] obs = pd.concat(obs, ignore_index=True) finally: pd.set_option("mode.chained_assignment", opt_val) obs = obs.set_index(["station_id", "elem", "time"]) obs = obs.sortlevel(0, sort_remaining=True) return obs
def _predict(args, cell): headers = args['headers'].split(',') img_cols = args['image_columns'].split(',') if args['image_columns'] else [] data = args['prediction_data'] df = _local_predict.get_prediction_results( args['model'], data, headers, img_cols=img_cols, cloud=args['cloud'], show_image=not args['no_show_image']) def _show_img(img_bytes): return '<img src="data:image/png;base64,' + img_bytes + '" />' def _truncate_text(text): return (text[:37] + '...') if isinstance(text, six.string_types) and len(text) > 40 else text # Truncate text explicitly here because we will set display.max_colwidth to -1. # This applies to images to but images will be overriden with "_show_img()" later. formatters = {x: _truncate_text for x in df.columns if df[x].dtype == np.object} if not args['no_show_image'] and img_cols: formatters.update({x + '_image': _show_img for x in img_cols}) # Set display.max_colwidth to -1 so we can display images. old_width = pd.get_option('display.max_colwidth') pd.set_option('display.max_colwidth', -1) try: IPython.display.display(IPython.display.HTML( df.to_html(formatters=formatters, escape=False, index=False))) finally: pd.set_option('display.max_colwidth', old_width)
def print_full(df): ''' prints a pandas data frame completely. ''' if not isinstance(df, pd.core.frame.DataFrame): raise ValueError('df should be a pandas.core.frame.DataFrame') options_print = { 'display.max_rows': len(df), 'display.max_columns': None, 'display.width': 2000, 'display.float_format': '{:20,.2f}'.format, 'display.max_colwidth': None } options_ori = { opt_name: pd.get_option(opt_name) for opt_name in options_print.keys() } for opt_name, opt_value in options_print.items(): pd.set_option(opt_name, opt_value) print(df) for opt_name, opt_value in options_ori.items(): pd.set_option(opt_name, opt_value)
def print_table(table, name=None, fmt=None): """Pretty print a pandas DataFrame. Uses HTML output if running inside Jupyter Notebook, otherwise formatted text output. Parameters ---------- table : pandas.Series or pandas.DataFrame Table to pretty-print. name : str, optional Table name to display in upper left corner. fmt : str, optional Formatter to use for displaying table elements. E.g. '{0:.2f}%' for displaying 100 as '100.00%'. Restores original setting after displaying. """ if isinstance(table, pd.Series): table = pd.DataFrame(table) if fmt is not None: prev_option = pd.get_option('display.float_format') pd.set_option('display.float_format', lambda x: fmt.format(x)) if name is not None: table.columns.name = name display(table) if fmt is not None: pd.set_option('display.float_format', prev_option)
def get_engine(engine): """ return our implementation """ if engine == 'auto': engine = get_option('io.parquet.engine') if engine == 'auto': # try engines in this order try: return PyArrowImpl() except ImportError: pass try: return FastParquetImpl() except ImportError: pass raise ImportError("Unable to find a usable engine; " "tried using: 'pyarrow', 'fastparquet'.\n" "pyarrow or fastparquet is required for parquet " "support") if engine not in ['pyarrow', 'fastparquet']: raise ValueError("engine must be one of 'pyarrow', 'fastparquet'") if engine == 'pyarrow': return PyArrowImpl() elif engine == 'fastparquet': return FastParquetImpl()
def report(master_df): _set_image_paths(master_df) master_df[_LINK] = master_df[_PREDICT_LABELS].map( lambda s: '<a href="report_label_{}.html" target="new"/>' 'Image of test result "Success"</a>'.format(s)) master_df = master_df.sort_values(by=[_PREDICT_LABELS, _INDEX]) master_df = master_df.drop(_INDEX, axis=1) master_df = master_df.loc[:, [ _IMAGE, _SIGN_MEANING_PREDICT, _OUT_RANGES, _LINK ]] master_df = master_df.rename( columns={ _IMAGE: 'Image of test result "Inconsistent"', _SIGN_MEANING_PREDICT: 'Target DNN prediction result', _OUT_RANGES: 'Acceptable prediction result' }) # Output HTML old_colwidth = pd.get_option('display.max_colwidth') pd.set_option('display.max_colwidth', -1) table = master_df.to_html(escape=False, index=False, justify='center') # create html with open(str(Path(_HTML).joinpath('report.html')), "w") as f: f.write(meta_text) f.write('<title>Result list(Failure only)</title>\n') f.write(table) pd.set_option('display.max_colwidth', old_colwidth) return Path(_HTML).joinpath('report.html')
def _repr_html_(self): ''' Create an HTML representation for IPython Returns ------- string HTML representation of CASResults object ''' if pdcom.in_qtconsole(): return None if not pd.get_option('display.notebook.repr_html'): return None output = [] i = 0 for key, item in six.iteritems(self): if i: sfmt = '<div class="cas-results-key"><hr/><b>§ %s</b></div>' else: sfmt = '<div class="cas-results-key"><b>§ %s</b></div>' output.append(sfmt % key) output.append('<div class="cas-results-body">') if hasattr(item, '_repr_html_'): res = item._repr_html_() if res is None: output.append('<div>%s</div>' % res) else: output.append(res) else: output.append('<div>%s</div>' % item) output.append('</div>') i += 1 output.append('<div class="cas-output-area"></div>') if getattr(self, 'performance'): stats = [] if getattr(self.performance, 'elapsed_time'): stats.append('<span class="cas-elapsed">elapsed %.3gs</span>' % self.performance.elapsed_time) if getattr(self.performance, 'cpu_user_time'): stats.append('<span class="cas-user">user %.3gs</span>' % self.performance.cpu_user_time) if getattr(self.performance, 'cpu_system_time'): stats.append('<span class="cas-sys">sys %.3gs</span>' % self.performance.cpu_system_time) if getattr(self.performance, 'memory'): stats.append('<span class="cas-memory">mem %.3gMB</span>' % (self.performance.memory / 1048576.0)) if stats: output.append( '<p class="cas-results-performance"><small>%s</small></p>' % ' · '.join(stats)) return '\n'.join(output)
def _get_image(x): """displayhook function for PNG data""" s = b64encode(x).decode('ascii') pd.set_option('display.max_columns', len(s) + 1000) pd.set_option('display.max_rows', len(s) + 1000) if len(s) + 100 > pd.get_option("display.max_colwidth"): pd.set_option("display.max_colwidth", len(s) + 1000) return s
def with_plotting_backend(request): default = pd.get_option("plotting.backend") pd.set_option("plotting.backend", request.config.getoption("backend_name")) yield try: pd.set_option("plotting.backend", default) except ImportError: pass # matplotlib is not installed.
def get_past_scans(): old_width = pd.get_option('display.max_colwidth') pd.set_option('display.max_colwidth', -1) ret = pd.read_csv('past_scans.csv').to_html(escape=True, classes='table table-striped', index=False) pd.set_option('display.max_colwidth', old_width) return ret
def setvalue(self, value, silent=False): if self._use_pd: import pandas as pd key = '.'.join(self._items) if value != pd.get_option(key): pd.set_option(key, value) else: self._val = value
def setvalue(self, value): if self._use_pd: import pandas as pd key = '.'.join(self._items) if value != pd.get_option(key): pd.set_option(key, value) else: self._val = value
def repr_DataFrame(df, _): from pandas import get_option return df.to_string( max_rows=repr_DataFrame.max_rows, max_cols=repr_DataFrame.max_cols, show_dimensions=get_option("display.show_dimensions"), )
def setPandasWide(): # allow full terminal output termrows, termcols = os.popen('stty size', 'r').read().split() termheight = int(termrows) termwidth = int(termcols) deport("stty rows x cols", '{0:d} x {1:d}'.format(termheight, termwidth)) pandas.set_option('display.width', termwidth) # default width is 80 pandaswidth = pandas.get_option('display.width') deport("pandas reporting width", pandaswidth)
def read_clipboard(sep='\s+', **kwargs): # pragma: no cover r""" Read text from clipboard and pass to read_table. See read_table for the full argument list Parameters ---------- sep : str, default '\s+'. A string or regex delimiter. The default of '\s+' denotes one or more whitespace characters. Returns ------- parsed : DataFrame """ encoding = kwargs.pop('encoding', 'utf-8') # only utf-8 is valid for passed value because that's what clipboard # supports if encoding is not None and encoding.lower().replace('-', '') != 'utf8': raise NotImplementedError( 'reading from clipboard only supports utf-8 encoding') from pandas.io.clipboard import clipboard_get from pandas.io.parsers import read_table text = clipboard_get() # try to decode (if needed on PY3) # Strange. linux py33 doesn't complain, win py33 does if compat.PY3: try: text = compat.bytes_to_str( text, encoding=(kwargs.get('encoding') or get_option('display.encoding'))) except: pass # Excel copies into clipboard with \t separation # inspect no more then the 10 first lines, if they # all contain an equal number (>0) of tabs, infer # that this came from excel and set 'sep' accordingly lines = text[:10000].split('\n')[:-1][:10] # Need to remove leading white space, since read_table # accepts: # a b # 0 1 2 # 1 3 4 counts = set([x.lstrip().count('\t') for x in lines]) if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0: sep = '\t' if sep is None and kwargs.get('delim_whitespace') is None: sep = '\s+' return read_table(StringIO(text), sep=sep, **kwargs)
def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover r""" Read text from clipboard and pass to read_table. See read_table for the full argument list Parameters ---------- sep : str, default '\s+'. A string or regex delimiter. The default of '\s+' denotes one or more whitespace characters. Returns ------- parsed : DataFrame """ encoding = kwargs.pop('encoding', 'utf-8') # only utf-8 is valid for passed value because that's what clipboard # supports if encoding is not None and encoding.lower().replace('-', '') != 'utf8': raise NotImplementedError( 'reading from clipboard only supports utf-8 encoding') from pandas.io.clipboard import clipboard_get from pandas.io.parsers import read_table text = clipboard_get() # try to decode (if needed on PY3) # Strange. linux py33 doesn't complain, win py33 does if compat.PY3: try: text = compat.bytes_to_str( text, encoding=(kwargs.get('encoding') or get_option('display.encoding')) ) except: pass # Excel copies into clipboard with \t separation # inspect no more then the 10 first lines, if they # all contain an equal number (>0) of tabs, infer # that this came from excel and set 'sep' accordingly lines = text[:10000].split('\n')[:-1][:10] # Need to remove leading white space, since read_table # accepts: # a b # 0 1 2 # 1 3 4 counts = {x.lstrip().count('\t') for x in lines} if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0: sep = r'\t' if sep is None and kwargs.get('delim_whitespace') is None: sep = r'\s+' return read_table(StringIO(text), sep=sep, **kwargs)
def getvalue(self, silent=False): if self._use_pd: import pandas as pd try: return pd.get_option('.'.join(self._items)) except (KeyError, LookupError, AttributeError): self._use_pd = False else: return self._val
def test_pandas_context_warn(temp_url): initial_mode = pd.get_option('mode.chained_assignment') with Flow('test_pandas_context_warn') as flow: task = PandasModeSpy(pandas_chained_assignment='warn') task() with warnings.catch_warnings(record=True) as w: with prefect.context(caches={}): flow_state = flow.run() # Keep only the SettingWithCopyWarning warnings, there could be others w = [_w for _w in w if _w.category == pandas.core.common.SettingWithCopyWarning] assert len(w) == 1 result = list(flow_state.result.values())[0].result assert result == 'warn' assert pd.get_option('mode.chained_assignment') == initial_mode
def repr_pandas_Series(series, _): from pandas import get_option return series.to_string( max_rows=repr_pandas_Series.max_rows, name=series.name, dtype=series.dtype, length=get_option("display.show_dimensions"), )
def get_train_test_sets(train_file, test_file): # print all rows and columns when needed pd.get_option('display.max_rows') pd.set_option('display.max_rows', None) pd.set_option('display.max_columns', None) # pd.set_option('display.max_column_width', None) # load into pd.DataFrame train_set = pd.read_csv(train_file) test_set = pd.read_csv(test_file) # extract test_set id Series, for submission test_id = test_set.loc[:, 'id'] # use id as index train_set.set_index('id', inplace=True) # set id as index test_set.set_index('id', inplace=True) return (train_set, test_set, test_id)
def test_edit_multi_index_df(): df_multi = create_multi_index_df() view = QgridWidget(df=df_multi) old_val = df_multi.loc[('bar', 'two'), 1] check_edit_success(view, 1, 1, old_val, round(old_val, pd.get_option('display.precision') - 1), 3.45678, 3.45678)
def _get_image(x): """displayhook function for PIL Images, rendered as PNG""" import pandas sio = StringIO() x.save(sio, format='PNG') s = b64encode(sio.getvalue()) if len(s) + 100 > pd.get_option("display.max_colwidth"): pd.set_option("display.max_colwidth", len(s) + 100) return s
def show(self, info: bool = False, all_columns: bool = False, all_rows: bool = False, max_columns: int = None, max_rows: int = None, precision: int = None): """ Display the info and details of data. The display options would be reset to the previous state after printing data, in other words, the options that was edited in show() would not be inherited. :param info: stay True if a display of information is required :param all_columns: reset the display of columns if columns equals to True :param all_rows: reset the display of rows if rows equals to True :param max_columns: the max columns of display :param max_rows: the max rows of display :param precision: a fast adjustment to the precision of display :return None """ if info: self.info(memory_usage=False) print( '\n\n - Shape: {}\n - Index: {}\n - Memory usage: {:.3f} MB\n'. format(self.shape, ", ".join(self.columns), self.memory_usage().sum() / 1024**2)) original_max_columns = pd.get_option('display.max_columns') original_max_rows = pd.get_option('display.max_rows') original_display_precision = pd.get_option('display.precision') if max_columns: pd.set_option('display.max_columns', max_columns) if max_rows: pd.set_option('display.max_rows', max_rows) if all_columns: pd.set_option('display.max_columns', None) if all_rows: pd.set_option('display.max_rows', None) if precision: pd.set_option('display.precision', precision) print(self) pd.set_option('display.max_rows', original_max_rows) pd.set_option('display.max_columns', original_max_columns) pd.set_option('display.precision', original_display_precision)
def _repr_html_(self): ''' Return a html representation for a particular DataFrame ''' # Calling a private DataFrame method here, so protect it. if getattr(self, '_info_repr', lambda: False)(): buf = six.StringIO('') self.info(buf=buf) # need to escape the <class>, should be the first line. val = buf.getvalue().replace('<', r'<', 1) val = val.replace('>', r'>', 1) return '<pre>' + val + '</pre>' kwargs = {} if get_option('display.apply_formats'): kwargs['formatters'] = self._get_formatters() kwargs['na_rep'] = '.' if pd.get_option('display.notebook_repr_html'): try: kwargs['min_rows'] = pd.get_option('display.min_rows') except: # noqa: E722 pass max_rows = pd.get_option('display.max_rows') max_cols = pd.get_option('display.max_columns') show_dimensions = pd.get_option('display.show_dimensions') formatter = pdfmt.DataFrameFormatter( self, max_rows=max_rows, max_cols=max_cols, show_dimensions=show_dimensions, **kwargs) # NOTE: Patch for bug in pandas DataFrameFormatter when using # formatters on a DataFrame that is truncated in the console. formatter.columns = formatter.tr_frame.columns html = formatter.to_html(**notebook_opts) if html is None: if getattr(formatter, 'buf', None) is not None: html = formatter.buf.getvalue() else: return None return self._post_process_html(html) return None
def df(request): data_type = request.param if data_type == 'delims': return pd.DataFrame({'a': ['"a,\t"b|c', 'd\tef´'], 'b': ['hi\'j', 'k\'\'lm']}) elif data_type == 'utf8': return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'], 'b': ['øπ∆˚¬', 'œ∑´®']}) elif data_type == 'utf16': return pd.DataFrame({'a': ['\U0001f44d\U0001f44d', '\U0001f44d\U0001f44d'], 'b': ['abc', 'def']}) elif data_type == 'string': return mkdf(5, 3, c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) elif data_type == 'long': max_rows = get_option('display.max_rows') return mkdf(max_rows + 1, 3, data_gen_f=lambda *args: randint(2), c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) elif data_type == 'nonascii': return pd.DataFrame({'en': 'in English'.split(), 'es': 'en español'.split()}) elif data_type == 'colwidth': _cw = get_option('display.max_colwidth') + 1 return mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw, c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) elif data_type == 'mixed': return DataFrame({'a': np.arange(1.0, 6.0) + 0.01, 'b': np.arange(1, 6), 'c': list('abcde')}) elif data_type == 'float': return mkdf(5, 3, data_gen_f=lambda r, c: float(r) + 0.01, c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) elif data_type == 'int': return mkdf(5, 3, data_gen_f=lambda *args: randint(2), c_idx_type='s', r_idx_type='i', c_idx_names=[None], r_idx_names=[None]) else: raise ValueError
def render(self, df, encoding, output): with output: max_rows = pd.get_option("display.max_rows") max_cols = pd.get_option("display.max_columns") show_dimensions = pd.get_option("display.show_dimensions") # This will hide the index column for pandas df. self.display.html(""" <style> table.dataframe.hideme thead th:first-child { display: none; } table.dataframe.hideme tbody th { display: none; } </style> """) self.display.html(df.to_html(max_rows=max_rows, max_cols=max_cols, show_dimensions=show_dimensions, notebook=True, classes="hideme"))
def _get_image(x): """displayhook function for PIL Images, rendered as PNG""" bio = BytesIO() x.save(bio, format='PNG') s = b64encode(bio.getvalue()).decode('ascii') pd.set_option('display.max_columns', len(s) + 1000) pd.set_option('display.max_rows', len(s) + 1000) if len(s) + 100 > pd.get_option("display.max_colwidth"): pd.set_option("display.max_colwidth", len(s) + 1000) return s
def _get_image(x): """displayhook function for PIL Images, rendered as PNG""" import pandas sio = StringIO() x.save(sio, format="PNG") s = b64encode(sio.getvalue()) if len(s) + 100 > pd.get_option("display.max_colwidth"): pd.set_option("display.max_colwidth", len(s) + 100) return s
def getvalue(self): if self._use_pd: import pandas as pd from pandas.core.config import OptionError as PandasOptionError try: return pd.get_option('.'.join(self._items)) except PandasOptionError: self._use_pd = False else: return self._val
def _get_image(x): """displayhook function for PIL Images, rendered as PNG""" import pandas as pd bio = BytesIO() x.save(bio, format="PNG") s = b64encode(bio.getvalue()).decode("ascii") pd.set_option("display.max_columns", len(s) + 1000) pd.set_option("display.max_rows", len(s) + 1000) if len(s) + 100 > pd.get_option("display.max_colwidth"): pd.set_option("display.max_colwidth", len(s) + 1000) return s
def _repr_html_(self): if self._info_repr(): buf = StringIO(u("")) self.info(buf=buf) return '<pre>' + buf.getvalue() + '</pre>' max_rows = pd.get_option("display.max_rows") max_cols = pd.get_option("display.max_columns") html = self.to_html(max_rows=max_rows, max_cols=max_cols, show_dimensions=False, classes='table table-bordered table-striped') text = '<div>\n' text += html text += '\n</div>' #text = text.replace('border="1" ','border="2" ') text = text.replace('dataframe ','') text = text.replace('<table ','<table id="{}" '.format(div_id)) return text
def get_console_size(): """Return console size as tuple = (width, height). Returns (None,None) in non-interactive session. """ from pandas import get_option from pandas.core import common as com display_width = get_option('display.width') # deprecated. display_height = get_option('display.height', silent=True) # Consider # interactive shell terminal, can detect term size # interactive non-shell terminal (ipnb/ipqtconsole), cannot detect term # size non-interactive script, should disregard term size # in addition # width,height have default values, but setting to 'None' signals # should use Auto-Detection, But only in interactive shell-terminal. # Simple. yeah. if com.in_interactive_session(): if com.in_ipython_frontend(): # sane defaults for interactive non-shell terminal # match default for width,height in config_init from pandas.core.config import get_default_val terminal_width = get_default_val('display.width') terminal_height = get_default_val('display.height') else: # pure terminal terminal_width, terminal_height = get_terminal_size() else: terminal_width, terminal_height = None, None # Note if the User sets width/Height to None (auto-detection) # and we're in a script (non-inter), this will return (None,None) # caller needs to deal. return (display_width or terminal_width, display_height or terminal_height)
def test_edit_multi_index_df(): df_multi = create_multi_index_df() df_multi.index.set_names('first', level=0, inplace=True) view = QgridWidget(df=df_multi) old_val = df_multi.loc[('bar', 'two'), 1] check_edit_success(view, 1, 1, old_val, round(old_val, pd.get_option('display.precision') - 1), 3.45678, 3.45678)
def hover_table(self): """ Return a html representation for a particular DataFrame. Mainly for IPython notebook. """ # ipnb in html repr mode allows scrolling # users strongly prefer to h-scroll a wide HTML table in the browser # then to get a summary view. GH3541, GH3573 ipnbh = com.in_ipnb() and pd.get_option('display.notebook_repr_html') # qtconsole doesn't report it's line width, and also # behaves badly when outputting an HTML table # that doesn't fit the window, so disable it. if com.in_qtconsole(): raise NotImplementedError('HTML output is disabled in QtConsole') if self._info_repr(): buf = StringIO(u("")) self.info(buf=buf) return '<pre>' + buf.getvalue() + '</pre>' if pd.get_option("display.notebook_repr_html"): max_rows = pd.get_option("display.max_rows") max_cols = pd.get_option("display.max_columns") html = self.to_html(max_rows=max_rows, max_cols=max_cols, show_dimensions=False, classes='table table-hover') text = '<div style="max-height:1000px; max-width:900px;overflow:auto;">\n' text += html text += '\n</div>' text = text.replace('dataframe ','') return text else: return None