def save(self): """ Create the writer & save """ # GH21227 internal compression is not used when file-like passed. if self.compression and hasattr(self.path_or_buf, 'write'): msg = ("compression has no effect when passing file-like " "object as input.") warnings.warn(msg, RuntimeWarning, stacklevel=2) # when zip compression is called. is_zip = isinstance(self.path_or_buf, ZipFile) or ( not hasattr(self.path_or_buf, 'write') and self.compression == 'zip') if is_zip: # zipfile doesn't support writing string to archive. uses string # buffer to receive csv writing and dump into zip compression # file handle. GH21241, GH21118 f = StringIO() close = False elif hasattr(self.path_or_buf, 'write'): f = self.path_or_buf close = False else: f, handles = _get_handle(self.path_or_buf, self.mode, encoding=self.encoding, compression=self.compression) close = True try: writer_kwargs = dict(lineterminator=self.line_terminator, delimiter=self.sep, quoting=self.quoting, doublequote=self.doublequote, escapechar=self.escapechar, quotechar=self.quotechar) if self.encoding == 'ascii': self.writer = csvlib.writer(f, **writer_kwargs) else: writer_kwargs['encoding'] = self.encoding self.writer = UnicodeWriter(f, **writer_kwargs) self._save() finally: if is_zip: # GH17778 handles zip compression separately. buf = f.getvalue() if hasattr(self.path_or_buf, 'write'): self.path_or_buf.write(buf) else: f, handles = _get_handle(self.path_or_buf, self.mode, encoding=self.encoding, compression=self.compression) f.write(buf) close = True if close: f.close() for _fh in handles: _fh.close()
def _coef_table(self): buffer = StringIO() buffer.write( "%13s %13s %13s %13s %13s %13s\n" % ("Variable", "Beta", "Std Err", "t-stat", "CI 2.5%", "CI 97.5%") ) template = "%13s %13.4f %13.4f %13.2f %13.4f %13.4f\n" for i, name in enumerate(self._cols): if i and not (i % 5): buffer.write("\n" + common.banner("")) mean_beta = self._results["mean_beta"][i] std_beta = self._results["std_beta"][i] t_stat = self._results["t_stat"][i] ci1 = mean_beta - 1.96 * std_beta ci2 = mean_beta + 1.96 * std_beta values = "(%s)" % name, mean_beta, std_beta, t_stat, ci1, ci2 buffer.write(template % values) if self._nw_lags_beta is not None: buffer.write("\n") buffer.write("*** The Std Err, t-stat are Newey-West " "adjusted with Lags %5d\n" % self._nw_lags_beta) return buffer.getvalue()
def _coef_table(self): buffer = StringIO() buffer.write('%13s %13s %13s %13s %13s %13s\n' % ('Variable', 'Beta', 'Std Err', 't-stat', 'CI 2.5%', 'CI 97.5%')) template = '%13s %13.4f %13.4f %13.2f %13.4f %13.4f\n' for i, name in enumerate(self._cols): if i and not (i % 5): buffer.write('\n' + common.banner('')) mean_beta = self._results['mean_beta'][i] std_beta = self._results['std_beta'][i] t_stat = self._results['t_stat'][i] ci1 = mean_beta - 1.96 * std_beta ci2 = mean_beta + 1.96 * std_beta values = '(%s)' % name, mean_beta, std_beta, t_stat, ci1, ci2 buffer.write(template % values) if self._nw_lags_beta is not None: buffer.write('\n') buffer.write('*** The Std Err, t-stat are Newey-West ' 'adjusted with Lags %5d\n' % self._nw_lags_beta) return buffer.getvalue()
def _read_one_data(self, ftppath, params): if re.search(_ZIP_RE, ftppath) is not None: index_file = self._read_zipfile(ftppath) elif re.search(_GZ_RE, ftppath) is not None: index_file = self._read_gzfile(ftppath) else: index_file = StringIO() index_list = [] try: self._sec_ftp_session.retrlines('RETR ' + ftppath, index_list.append) except EOFError: raise RemoteDataError('FTP server has closed the connection.') for line in index_list: index_file.write(line + '\n') index_file.seek(0) index_file = self._remove_header(index_file) index = read_csv(index_file, delimiter='|', header=None, index_col=False, names=_COLUMNS, low_memory=False, dtype=_COLUMN_TYPES) index['filename'] = index['filename'].map(self._fix_old_file_paths) return index
def _remove_header(self, data): header = True cleaned_datafile = StringIO() for line in data: if header is False: cleaned_datafile.write(line + '\n') elif re.search(_DIVIDER, line) is not None: header = False cleaned_datafile.seek(0) return cleaned_datafile
def _read_url_as_StringIO(self, url, params=None): """ Open url (and retry) """ response = self._get_response(url, params=params) out = StringIO() if isinstance(response.content, compat.binary_type): out.write(bytes_to_str(response.content)) else: out.write(response.content) out.seek(0) return out
def save(self): # create the writer & save if self.encoding is None: if compat.PY2: encoding = 'ascii' else: encoding = 'utf-8' else: encoding = self.encoding # PR 21300 uses string buffer to receive csv writing and dump into # file-like output with compression as option. GH 21241, 21118 f = StringIO() if not is_file_like(self.path_or_buf): # path_or_buf is path path_or_buf = self.path_or_buf elif hasattr(self.path_or_buf, 'name'): # path_or_buf is file handle path_or_buf = self.path_or_buf.name else: # path_or_buf is file-like IO objects. f = self.path_or_buf path_or_buf = None try: writer_kwargs = dict(lineterminator=self.line_terminator, delimiter=self.sep, quoting=self.quoting, doublequote=self.doublequote, escapechar=self.escapechar, quotechar=self.quotechar) if encoding == 'ascii': self.writer = csvlib.writer(f, **writer_kwargs) else: writer_kwargs['encoding'] = encoding self.writer = UnicodeWriter(f, **writer_kwargs) self._save() finally: # GH 17778 handles zip compression for byte strings separately. buf = f.getvalue() if path_or_buf: f, handles = _get_handle(path_or_buf, self.mode, encoding=encoding, compression=self.compression) f.write(buf) f.close() for _fh in handles: _fh.close()
def _read_url_as_StringIO(self, url, params=None): """ Open url (and retry) """ response = self._get_response(url, params=params) text = self._sanitize_response(response) out = StringIO() if len(text) == 0: service = self.__class__.__name__ raise IOError("{} request returned no data; check URL for invalid " "inputs: {}".format(service, self.url)) if isinstance(text, compat.binary_type): out.write(bytes_to_str(text)) else: out.write(text) out.seek(0) return out
def _read_url_as_StringIO(self, url, params=None, min=0, errors='ignore'): """重写基类同名方法 根据派生类提供的encoding解析文本 """ response = self._get_response(url, params=params) text = self._sanitize_response(response) out = StringIO() if len(text) <= self._read_url_as_StringIO_min_len: if self._read_url_as_StringIO_less_min_len: service = self.__class__.__name__ raise IOError("{} request returned no data; check URL for " "invalid inputs: {}".format(service, self.url)) else: return None if isinstance(text, compat.binary_type): out.write(bytes_to_str(text, encoding=self._encoding)) else: out.write(text) out.seek(0) return out
def _read_raw(self, **kwargs): if self._raw_content is None: response = self._requests_get() content_length = response.headers.get("content-length") out = StringIO() try: content_length = int(content_length) pb = network.ProgressBar(total=content_length) for chunk in response.iter_content(self._chunk_size): if chunk: out.write(chunk) pb.update(self._chunk_size) self._raw_content = out except Exception as e: # print(e) # no content_length or any errors if isinstance(response.content, binary_type): out.write(bytes_to_str(response.content)) else: out.write(response.content) self._raw_content = out return self._raw_content
def _read_raw(self, **kwargs): if self._raw_content is None: response = self._requests_get() content_length = response.headers.get('content-length') out = StringIO() try: content_length = int(content_length) pb = network.ProgressBar(total=content_length) for chunk in response.iter_content(self._chunk_size): if chunk: out.write(chunk) pb.update(self._chunk_size) self._raw_content = out except Exception as e: # print(e) # no content_length or any errors if isinstance(response.content, binary_type): out.write(bytes_to_str(response.content)) else: out.write(response.content) self._raw_content = out return self._raw_content
def summary(self): """ This returns the formatted result of the OLS computation """ template = """ %(bannerTop)s Formula: Y ~ %(formula)s Number of Observations: %(nobs)d Number of Degrees of Freedom: %(df)d R-squared: %(r2)10.4f Adj R-squared: %(r2_adj)10.4f Rmse: %(rmse)10.4f F-stat %(f_stat_shape)s: %(f_stat)10.4f, p-value: %(f_stat_p_value)10.4f Degrees of Freedom: model %(df_model)d, resid %(df_resid)d %(bannerCoef)s %(coef_table)s %(bannerEnd)s """ coef_table = self._coef_table results = self._results f_stat = results['f_stat'] bracketed = ['<%s>' % str(c) for c in results['beta'].index] formula = StringIO() formula.write(bracketed[0]) tot = len(bracketed[0]) line = 1 for coef in bracketed[1:]: tot = tot + len(coef) + 3 if tot // (68 * line): formula.write('\n' + ' ' * 12) line += 1 formula.write(' + ' + coef) params = { 'bannerTop': scom.banner('Summary of Regression Analysis'), 'bannerCoef': scom.banner('Summary of Estimated Coefficients'), 'bannerEnd': scom.banner('End of Summary'), 'formula': formula.getvalue(), 'r2': results['r2'], 'r2_adj': results['r2_adj'], 'nobs': results['nobs'], 'df': results['df'], 'df_model': results['df_model'], 'df_resid': results['df_resid'], 'coef_table': coef_table, 'rmse': results['rmse'], 'f_stat': f_stat['f-stat'], 'f_stat_shape': '(%d, %d)' % (f_stat['DF X'], f_stat['DF Resid']), 'f_stat_p_value': f_stat['p-value'], } return template % params
def _coef_table(self): buf = StringIO() buf.write('%14s %10s %10s %10s %10s %10s %10s\n' % ('Variable', 'Coef', 'Std Err', 't-stat', 'p-value', 'CI 2.5%', 'CI 97.5%')) buf.write(scom.banner('')) coef_template = '\n%14s %10.4f %10.4f %10.2f %10.4f %10.4f %10.4f' results = self._results beta = results['beta'] for i, name in enumerate(beta.index): if i and not (i % 5): buf.write('\n' + scom.banner('')) std_err = results['std_err'][name] CI1 = beta[name] - 1.96 * std_err CI2 = beta[name] + 1.96 * std_err t_stat = results['t_stat'][name] p_value = results['p_value'][name] line = coef_template % (name, beta[name], std_err, t_stat, p_value, CI1, CI2) buf.write(line) if self.nw_lags is not None: buf.write('\n') buf.write('*** The calculations are Newey-West ' 'adjusted with lags %5d\n' % self.nw_lags) return buf.getvalue()
def _coef_table(self): buf = StringIO() buf.write( "%14s %10s %10s %10s %10s %10s %10s\n" % ("Variable", "Coef", "Std Err", "t-stat", "p-value", "CI 2.5%", "CI 97.5%") ) buf.write(scom.banner("")) coef_template = "\n%14s %10.4f %10.4f %10.2f %10.4f %10.4f %10.4f" results = self._results beta = results["beta"] for i, name in enumerate(beta.index): if i and not (i % 5): buf.write("\n" + scom.banner("")) std_err = results["std_err"][name] CI1 = beta[name] - 1.96 * std_err CI2 = beta[name] + 1.96 * std_err t_stat = results["t_stat"][name] p_value = results["p_value"][name] line = coef_template % (name, beta[name], std_err, t_stat, p_value, CI1, CI2) buf.write(line) if self.nw_lags is not None: buf.write("\n") buf.write("*** The calculations are Newey-West " "adjusted with lags %5d\n" % self.nw_lags) return buf.getvalue()
def summary(self): """ This returns the formatted result of the OLS computation """ template = """ %(bannerTop)s Formula: Y ~ %(formula)s Number of Observations: %(nobs)d Number of Degrees of Freedom: %(df)d R-squared: %(r2)10.4f Adj R-squared: %(r2_adj)10.4f Rmse: %(rmse)10.4f F-stat %(f_stat_shape)s: %(f_stat)10.4f, p-value: %(f_stat_p_value)10.4f Degrees of Freedom: model %(df_model)d, resid %(df_resid)d %(bannerCoef)s %(coef_table)s %(bannerEnd)s """ coef_table = self._coef_table results = self._results f_stat = results["f_stat"] bracketed = ["<%s>" % str(c) for c in results["beta"].index] formula = StringIO() formula.write(bracketed[0]) tot = len(bracketed[0]) line = 1 for coef in bracketed[1:]: tot = tot + len(coef) + 3 if tot // (68 * line): formula.write("\n" + " " * 12) line += 1 formula.write(" + " + coef) params = { "bannerTop": scom.banner("Summary of Regression Analysis"), "bannerCoef": scom.banner("Summary of Estimated Coefficients"), "bannerEnd": scom.banner("End of Summary"), "formula": formula.getvalue(), "r2": results["r2"], "r2_adj": results["r2_adj"], "nobs": results["nobs"], "df": results["df"], "df_model": results["df_model"], "df_resid": results["df_resid"], "coef_table": coef_table, "rmse": results["rmse"], "f_stat": f_stat["f-stat"], "f_stat_shape": "(%d, %d)" % (f_stat["DF X"], f_stat["DF Resid"]), "f_stat_p_value": f_stat["p-value"], } return template % params