def __init__(self): buff = _StringIO(data.data_450) df_450 = _pd.read_csv(buff, sep=' ', index_col=1, names=['450'], engine='python') df_450.sort_index(inplace=True) self.channel_450 = df_450 buff = _StringIO(data.data_550) df_550 = _pd.read_csv(buff, sep=' ', index_col=1, names=['550'], engine='python') df_550.sort_index(inplace=True) self.channel_550 = df_550 buff = _StringIO(data.data_700) df_700 = _pd.read_csv(buff, sep=' ', index_col=1, names=['700'], engine='python') df_700.sort_index(inplace=True) self.channel_700 = df_700
def main(): """ Pull the page and parse it into the pieces we need. td class="viBodyBorderNorm" find the second table... find all the tr's """ options, url = doOptions() opener = urllib2.build_opener() response = opener.open(url) page = None if response.info().get("Content-Encoding") == "gzip": import gzip f = gzip.GzipFile(fileobj=_StringIO(response.read())) page = f.read() else: page = response.read() dumpPage(page, options)
def open(path, mode="r"): if "w" in mode or "a" in mode: raise IOError(_errno.EINVAL, path, "Write access not supported") elif "r+" in mode: raise IOError(_errno.EINVAL, path, "Write access not supported") full_path = path path, rest = _locate(path) if not rest: return _open(path, mode) else: try: zf = _zipfile.ZipFile(path, "r") except _zipfile.error: raise IOError(_errno.ENOENT, full_path, "No such file or directory") try: data = zf.read(rest) except (_zipfile.error, KeyError): zf.close() raise IOError(_errno.ENOENT, full_path, "No such file or directory") zf.close() if mode == "rb": return _BytesIO(data) else: if _sys.version_info[0] == 3: data = data.decode("ascii") return _StringIO(data)
def gradient(grad_str): """ read a gradient (hartree bohr^-1) from a string (hartree bohr^-1) """ grad_str_io = _StringIO(grad_str) grad = numpy.loadtxt(grad_str_io) assert grad.ndim == 2 and grad.shape[1] == 3 return tuple(map(tuple, grad))
def __str__(self): sio = _StringIO() # ouput buffer tf = self.total_frame() # get the frame count = 0 mdict = {"col0":0} for k in tf: # first, find the min column sizes not to truncate val = tf[k] if count == 0: mdict.update({(k,len(k)) for k in val._fields}) if len(k) > mdict["col0"]: mdict["col0"] = len(k) for f in val._fields: l = len(str(getattr(val,f))) if l > mdict[f]: mdict[f] = l count += 1 count = 0 for k in tf: # next, 'print' the formatted frame data val = tf[k] if count == 0: print(" " * mdict["col0"], end=' ', file=sio) for f in val._fields: print(f.ljust(mdict[f]),end=' ', file=sio) print('',file=sio) print(k.ljust(mdict["col0"]),end=' ', file=sio) for f in val._fields: print(str(getattr(val,f)).ljust(mdict[f]), end=' ', file=sio) print('',file=sio) count += 1 sio.seek(0) return sio.read()
def __str__(self): sio = _StringIO() # ouput buffer tf = self.total_frame() # get the frame count = 0 mdict = {"col0": 0} for k in tf: # first, find the min column sizes not to truncate val = tf[k] if count == 0: mdict.update({(k, len(k)) for k in val._fields}) if len(k) > mdict["col0"]: mdict["col0"] = len(k) for f in val._fields: l = len(str(getattr(val, f))) if l > mdict[f]: mdict[f] = l count += 1 count = 0 for k in tf: # next, 'print' the formatted frame data val = tf[k] if count == 0: print(" " * mdict["col0"], end=' ', file=sio) for f in val._fields: print(f.ljust(mdict[f]), end=' ', file=sio) print('', file=sio) print(k.ljust(mdict["col0"]), end=' ', file=sio) for f in val._fields: print(str(getattr(val, f)).ljust(mdict[f]), end=' ', file=sio) print('', file=sio) count += 1 sio.seek(0) return sio.read()
def f_decodeCastReader(reader, showError): decodedStr = "" alreadyread = reader.read() if 'gzip' in reader.headers.get('content-encoding', '').lower(): try: alreadyread = gzip.GzipFile(fileobj=_StringIO(alreadyread)).read() except: alreadyread = "" readString = str(alreadyread) encodingFound = False searchString = readString.replace('"', "'") xmlIndex = str.find(searchString, "<?xml ",0) if xmlIndex>=0: encodingIndex = str.find(searchString, "encoding='", xmlIndex)+10 if encodingIndex>xmlIndex: encodingEndIndex = str.find(searchString, "'", encodingIndex) if encodingEndIndex>encodingIndex: decodedStr = alreadyread.decode(searchString[encodingIndex:encodingEndIndex], errors='ignore') encodingFound = True if encodingFound is False: if 'encoding="ISO-8859-1"'.lower() in readString.lower(): decodedStr = alreadyread.decode('iso-8859-1', errors='ignore') elif 'encoding="utf-8"'.lower() in readString.lower(): decodedStr = alreadyread.decode('utf_8', errors='ignore') else: if showError: print("UNKNOWN ENCODING!!!") decodedStr = readString return decodedStr
def __init__(self, filename, delimiter=',', data_start=7, header_start=2, header_len=4, *args, **kws): with open(filename, 'r') as f: data = dict() lines = f.readlines() lineno = list() for i, ln in enumerate(lines): if "Cycle" in ln: lineno.append(i) lineno.append(-1) self.scan = list() for i in range(0, len(lineno) - 1): a = lineno[i] b = lineno[i + 1] f = "".join(lines[a:b]) stream = _StringIO(f) casa = CasaXPS(stream, delimiter, data_start, header_start, header_len, *args, **kws) self.scan.append(casa)
def read(self, pathfilename): with _codecs.open(pathfilename, 'r', encoding='utf-8') as input_file: config_pairs = input_file.read() with _closing(_StringIO("[{0}]{1}{2}".format(self._default_section, _os.linesep, config_pairs))) \ as default_section: _RawConfigParser.readfp(self, default_section)
def loadRemote(site, lItem, data, compression): items = CItemTypes() if compression == 'gzip': try: data = gzip.GzipFile(fileobj=_StringIO(data)).read() except (IOError, struct.error), e: log('Skipping due to gzip decompression failure') return items
def hessian(hess_str): """ read a hessian (hartree bohr^-2) from a string (hartree bohr^-2) """ hess_str_io = _StringIO(hess_str) hess = numpy.loadtxt(hess_str_io) assert hess.ndim == 2 assert hess.shape[0] % 3 == 0 and hess.shape[0] == hess.shape[1] return tuple(map(tuple, hess))
def _2d_square_matrix(mat_str): """ comma seperated string to 2D tuple of floats """ mat_str_io = _StringIO(mat_str) mat = numpy.loadtxt(mat_str_io) assert mat.ndim == 2 assert mat.shape[0] == mat.shape[1] return tuple(map(tuple, mat))
def read(self, pathfilename): with codecs.open(pathfilename, 'r', encoding='utf-8') as input_file: config_pairs = input_file.read() with _closing(_StringIO("[{0}]{1}{2}".format(self._default_section, os.linesep, config_pairs))) \ as default_section: _RawConfigParser.readfp(self, default_section)
def _frequencies(freq_str): if len(freq_str.split()) == 1: freqs = [float(freq) for freq in freq_str.split()] else: freq_str_io = _StringIO(freq_str) freqs = numpy.loadtxt(freq_str_io) assert freqs.ndim == 1 return tuple(freqs)
def test_get_model_summary_recursive(self): # suppress summary print output with _redirect_stdout(_StringIO()) as _: # test recursive network model = _RecursiveNetwork() _tu.get_model_summary(model, _torch.rand((1, 10))) # pass if no error in printing summary self.assertTrue(True)
def load_numpy_string_file(path_lst, file_name, path=PATH): """ Read a file with numpy """ file_str = read_text_file(path_lst, file_name, path=path) # file_path = os.path.join(PATH, *path_lst, file_name) file_str_io = _StringIO(file_str) file_lst = numpy.loadtxt(file_str_io) return file_lst
def test_get_model_summary_multi_input(self): # suppress summary print output with _redirect_stdout(_StringIO()) as _: # test multiple input network model = _MultipleInputNetwork() _tu.get_model_summary(model, _torch.rand((1, 3, 28, 28)), _torch.rand((1, 3, 28, 28))) # pass if no error in printing summary self.assertTrue(True)
def _open_resource(url_file_stream_or_string, handle_url): """URL, filename, or string --> stream This function lets you define parsers that take any input source (URL, pathname to local or network file, or actual data as a string) and deal with it in a uniform manner. Returned object is guaranteed to have all the basic stdio read methods (read, readline, readlines). Just .close() the object when you're done with it. The handle_url argument will be passed the input source if it's determined to be a retrievable URL. """ if hasattr(url_file_stream_or_string, 'read'): return url_file_stream_or_string if url_file_stream_or_string == '-': return sys.stdin if urlparse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp', 'file', 'feed'): # Deal with the feed URI scheme if url_file_stream_or_string.startswith('feed:http'): url_file_stream_or_string = url_file_stream_or_string[5:] elif url_file_stream_or_string.startswith('feed:'): url_file_stream_or_string = 'http:' + url_file_stream_or_string[5:] # test for inline user:password for basic auth auth = None if base64: urltype, rest = urllib.splittype(url_file_stream_or_string) realhost, rest = urllib.splithost(rest) if realhost: user_passwd, realhost = urllib.splituser(realhost) if user_passwd: url_file_stream_or_string = '%s://%s%s' % (urltype, realhost, rest) auth = base64.standard_b64encode(user_passwd).strip() # iri support try: if isinstance(url_file_stream_or_string,unicode): url_file_stream_or_string = url_file_stream_or_string.encode('idna').decode('utf-8') else: url_file_stream_or_string = url_file_stream_or_string.decode('utf-8').encode('idna').decode('utf-8') except: pass # Handle it as a URL. return handle_url(url_file_stream_or_string) # try to open with native open function (if url_file_stream_or_string is a filename) try: return open(url_file_stream_or_string, 'rb') except: pass # treat url_file_stream_or_string as string return _StringIO(str(url_file_stream_or_string))
def _2d_square_matrix(mat): mat = numpy.array(mat) assert mat.ndim == 2 assert mat.shape[0] == mat.shape[1] mat_str_io = _StringIO() numpy.savetxt(mat_str_io, mat) mat_str = mat_str_io.getvalue() mat_str_io.close() return mat_str
def test_get_model_summary_compact(self): # suppress summary print output with _redirect_stdout(_StringIO()) as _: # test compact representation model = _torchvision.models.alexnet() _tu.get_model_summary(model, _torch.rand((1, 3, 224, 224)), compact=True) # pass if no error in printing summary self.assertTrue(True)
def _np_arr_string(arr): """ Use numpy to write an array """ arr_str_io = _StringIO() np.savetxt(arr_str_io, arr) arr_str = arr_str_io.getvalue() arr_str_io.close() return arr_str
def vibro_rot_alpha_matrix(vibro_rot_str): """ read an vibro-rot alpha matrix (cm^-1) from a string (cm^-1) """ mat_str_io = _StringIO(vibro_rot_str) mat = numpy.loadtxt(mat_str_io) assert mat.ndim == 2 or mat.ndim == 0 if mat.ndim == 2: assert mat.shape[0] == mat.shape[1] return tuple(map(tuple, mat))
def _frequencies(freq_str): """ comma seperated string to tuple of floats """ if len(freq_str.split()) == 1: freqs = [float(freq) for freq in freq_str.split()] else: freq_str_io = _StringIO(freq_str) freqs = numpy.loadtxt(freq_str_io) assert freqs.ndim == 1 return tuple(freqs)
def gradient(grad): """ write a gradient (hartree bohr^-1) to a string (hartree bohr^-1) """ grad = numpy.array(grad) assert grad.ndim == 2 and grad.shape[1] == 3 grad_str_io = _StringIO() numpy.savetxt(grad_str_io, grad) grad_str = grad_str_io.getvalue() grad_str_io.close() return grad_str
def read(self, pathfilename): #seb: expand path to allow using homedir and relative paths pathfilename = os.path.realpath(os.path.expanduser(pathfilename)) with codecs.open(pathfilename, 'r', encoding=ServiceDefault.CHAR_CODEC) as input_file: config_pairs = input_file.read() with _closing(_StringIO("[{0}]{1}{2}".format(self._default_section, os.linesep, config_pairs))) \ as default_section: _RawConfigParser.readfp(self, default_section)
def gradient(grad_str): """ read a gradient (hartree bohr^-1) from a string (hartree bohr^-1) :param grad_str: gradient string :type grad_str: str :return: gradient as internally used tuple object :rtype: tuple """ grad_str_io = _StringIO(grad_str) grad = numpy.loadtxt(grad_str_io) assert grad.ndim == 2 and grad.shape[1] == 3 return tuple(map(tuple, grad))
def vibro_rot_alpha_matrix(vibro_rot_mat): """ write vibro-rot alph matrix (cm^-1) to a string (cm^-1) """ vibro_rot_mat = numpy.array(vibro_rot_mat) assert vibro_rot_mat.ndim == 2 mat_str_io = _StringIO() numpy.savetxt(mat_str_io, vibro_rot_mat) mat_str = mat_str_io.getvalue() mat_str_io.close() return mat_str
def hessian(hess): """ write a hessian (hartree bohr^-2) to a string (hartree bohr^-2) """ hess = numpy.array(hess) assert hess.ndim == 2 assert hess.shape[0] % 3 == 0 and hess.shape[0] == hess.shape[1] hess_str_io = _StringIO() numpy.savetxt(hess_str_io, hess) hess_str = hess_str_io.getvalue() hess_str_io.close() return hess_str
def dipole_moment(dip_mom_str): """ reads the x,y,z dipole moment vector from a string :param dip_mom_str: x,y,z dipole moment vector :type dip_mom_str: str :return: x, y, z dipole moment tuple :rtype: tuple """ dip_mom_str_io = _StringIO(dip_mom_str) dip_mom = numpy.loadtxt(dip_mom_str_io) assert dip_mom.ndim == 1 assert dip_mom.shape[0] == 3 return tuple(dip_mom)
def polarizability(polar_str): """ read a polarizability tensor () from a string :param polar_str: polarizability tensor :type polar_str: str :return: polarizability tensor :rtype: tuple """ polar_str_io = _StringIO(polar_str) polar = numpy.loadtxt(polar_str_io) assert polar.ndim == 2 assert polar.shape[0] == polar.shape[1] == 3 return tuple(map(tuple, polar))
def hessian(hess_str): """ read a hessian (hartree bohr^-2) from a string (hartree bohr^-2) :param hess_str: hessian string :type hess_str: str :return: hessian as 3nx3n tuple :rtype: tuple """ hess_str_io = _StringIO(hess_str) hess = numpy.loadtxt(hess_str_io) assert hess.ndim == 2 assert hess.shape[0] % 3 == 0 and hess.shape[0] == hess.shape[1] return tuple(map(tuple, hess))
def anharmonicity_matrix(xmat_str): """ read an anharmonicity matrix (cm^-1) from a string (cm^-1) """ mat_str_io = _StringIO(xmat_str) mat = numpy.loadtxt(mat_str_io) assert mat.ndim == 2 or mat.ndim == 0 if mat.ndim == 2: assert mat.shape[0] == mat.shape[1] xmat = tuple(map(tuple, mat)) else: xmat = ((mat,),) return xmat
def csv(self, value): """ Set the Visual Studio Code document text by converting a list of lists to CSV. """ output = _StringIO() writer = csv.writer(output) for row in value: writer.writerow(row) self.text = output.getvalue() output.close()
def anharmonicity_matrix(xmat): """ write anharmonicity matrix (cm^-1) to a string (cm^-1) """ mat = numpy.array(xmat) assert mat.ndim == 2 or mat.ndim == 0 if mat.ndim == 2: assert mat.shape[0] == mat.shape[1] mat_str_io = _StringIO() numpy.savetxt(mat_str_io, mat) mat_str = mat_str_io.getvalue() mat_str_io.close() return mat_str
def _to_pil_image(self): from PIL import Image as _PIL_image if self._format_enum == _format[_RAW]: if self.channels == 1: img = _PIL_image.frombytes('L', (self._width, self._height), bytes(self._image_data)) elif self.channels == 3: img = _PIL_image.frombytes('RGB', (self._width, self._height), bytes(self._image_data)) elif self.channels == 4: img = _PIL_image.frombytes('RGBA', (self._width, self._height), bytes(self._image_data)) else: raise ValueError('Unsupported channel size: ' + str(self.channels)) else: img = _PIL_image.open(_StringIO(self._image_data)) return img
def gradient(grad): """ write a gradient (hartree bohr^-1) to a string (hartree bohr^-1) :param grad: gradient tuple :type grad: tuple :return: gradient string :rtype: str """ grad = numpy.array(grad) assert grad.ndim == 2 and grad.shape[1] == 3 grad_str_io = _StringIO() numpy.savetxt(grad_str_io, grad) grad_str = grad_str_io.getvalue() grad_str_io.close() return grad_str
def vibro_rot_alpha_matrix(vibro_rot_mat): """ write vibro-rot alph matrix (cm^-1) to a string (cm^-1) :param vibro_rot: matrix as tuple :type vibro_rot: tuple :return: vibro-rot alpha matrix string :rtype: str """ vibro_rot_mat = numpy.array(vibro_rot_mat) assert vibro_rot_mat.ndim == 2 mat_str_io = _StringIO() numpy.savetxt(mat_str_io, vibro_rot_mat) mat_str = mat_str_io.getvalue() mat_str_io.close() return mat_str
def decoded(self): """Returns the full text of the NOTAM, with ICAO abbreviations decoded into their un-abbreviated form where appropriate.""" with _StringIO() as sb: indices = [getattr(self, 'indices_item_{}'.format(i)) for i in ('d', 'e', 'f', 'g')] indices = [i for i in indices if i is not None] indices.sort() # The items should already be listed in the order of their apperance in the text, but # we sort them here just in case indices = [(0, 0)] + indices + [(-1, -1)] for (cur, nxt) in zip(indices, indices[1:]): (cs, ce) = cur (ns, ne) = nxt sb.write(self.decode_abbr(self.full_text[cs:ce])) # decode the text of this range sb.write(self.full_text[ce:ns]) # copy the text from end of current range to start # of next verbatim return sb.getvalue()
def open(path, mode='r'): if 'w' in mode or 'a' in mode: raise IOError( _errno.EINVAL, path, "Write access not supported") elif 'r+' in mode: raise IOError( _errno.EINVAL, path, "Write access not supported") full_path = path path, rest = _locate(path) if not rest: return _open(path, mode) else: try: zf = _zipfile.ZipFile(path, 'r') except _zipfile.error: raise IOError( _errno.ENOENT, full_path, "No such file or directory") try: data = zf.read(rest) except (_zipfile.error, KeyError): zf.close() raise IOError( _errno.ENOENT, full_path, "No such file or directory") zf.close() if mode == 'rb': return _BytesIO(data) else: if _sys.version_info[0] == 3: data = data.decode('ascii') return _StringIO(data)
def getOpeningLines(): opener = urllib2.build_opener() response = opener.open ("http://www.vegasinsider.com/nhl/odds/las-vegas/") if response.info().get( 'Content-Encoding' ) == 'gzip': f = gzip.GzipFile( fileobj=_StringIO( response.read() )) page = f.read() else: print "Page wasn't coded with expected Content Encoding. Ending execution." pass soup = BeautifulSoup( page ) mainTable = soup.findChild( 'td', { "class" : "viBodyBorderNorm" }) tables = mainTable.findAll( 'table' ) oddsTable = tables[ 1 ] rows = oddsTable.findAll( 'tr' ) for aRow in rows: teams = aRow.findChildren( 'a', { "class" : "tabletext" }) print teams
def format(self, object, *a,**k): typ = type(object) if isinstance(object, string_types): if PY2 and isinstance(typ,str): object = object.decode("utf-8") elif typ is _dt.datetime: return "DT( %s )"%(format_dt(object),),True,False else: return super(UTFPrinter,self).format(object,*a,**k) s = repr(object) if '\\' not in s: if s[0] in ('"',"'"): return s,True,False else: return s[1:],True,False # more work here if "'" in s[2:-1] and '"' not in s[2:-1]: closure = '"' quotes = {'"': '\\"'} else: closure = "'" quotes = {"'": "\\'"} qget = quotes.get sio = _StringIO() write = sio.write for char in object: if not char.isalpha(): char = qget(char, text_type(repr(char))) if char[0] == 'u': char = char[2:-1] else: char = char[1:-1] else: char = text_type(char) write(char) return ("%s%s%s" % (closure, sio.getvalue(), closure)), True, False
def format(self, object, context, maxlevels, level): typ = type(object) if typ in string_types: object = object.decode("utf-8") elif typ is _dt.datetime: return "DT( %s )"%(format_dt(object),),True,False elif typ is not unicode: return _safe_repr(object, context, maxlevels, level) if "'" in object and '"' not in object: closure = '"' quotes = {'"': '\\"'} else: closure = "'" quotes = {"'": "\\'"} qget = quotes.get sio = _StringIO() write = sio.write for char in object: if char.isalpha(): write(char) else: write(qget(char, repr(char)[2:-1])) return ("%s%s%s" % (closure, sio.getvalue(), closure)), True, False
def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=None, request_headers=None, response_headers=None, resolve_relative_uris=None, sanitize_html=None): '''Parse a feed from a URL, file, stream, or string. :param url_file_stream_or_string: File-like object, URL, file path, or string. Both byte and text strings are accepted. If necessary, encoding will be derived from the response headers or automatically detected. Note that strings may trigger network I/O or filesystem access depending on the value. Wrap an untrusted string in a :class:`io.StringIO` or :class:`io.BytesIO` to avoid this. Do not pass untrusted strings to this function. When a URL is not passed the feed location to use in relative URL resolution should be passed in the ``Content-Location`` response header (see ``response_headers`` below). :param str etag: HTTP ``ETag`` request header. :param modified: HTTP ``Last-Modified`` request header. :type modified: :class:`str`, :class:`time.struct_time` 9-tuple, or :class:`datetime.datetime` :param str agent: HTTP ``User-Agent`` request header, which defaults to the value of :data:`feedparser.USER_AGENT`. :param referrer: HTTP ``Referer`` [sic] request header. :param request_headers: A mapping of HTTP header name to HTTP header value to add to the request, overriding internally generated values. :type request_headers: :class:`dict` mapping :class:`str` to :class:`str` :param response_headers: A mapping of HTTP header name to HTTP header value. Multiple values may be joined with a comma. If a HTTP request was made, these headers override any matching headers in the response. Otherwise this specifies the entirety of the response headers. :type response_headers: :class:`dict` mapping :class:`str` to :class:`str` :param bool resolve_relative_uris: Should feedparser attempt to resolve relative URIs absolute ones within HTML content? Defaults to the value of :data:`feedparser.RESOLVE_RELATIVE_URIS`, which is ``True``. :param bool sanitize_html: Should feedparser skip HTML sanitization? Only disable this if you know what you are doing! Defaults to the value of :data:`feedparser.SANITIZE_HTML`, which is ``True``. :return: A :class:`FeedParserDict`. ''' if not agent or sanitize_html is None or resolve_relative_uris is None: import feedparser if not agent: agent = feedparser.USER_AGENT if sanitize_html is None: sanitize_html = feedparser.SANITIZE_HTML if resolve_relative_uris is None: resolve_relative_uris = feedparser.RESOLVE_RELATIVE_URIS result = FeedParserDict( bozo = False, entries = [], feed = FeedParserDict(), headers = {}, ) data = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers, result) if not data: return result # overwrite existing headers using response_headers result['headers'].update(response_headers or {}) data = convert_to_utf8(result['headers'], data, result) use_strict_parser = result['encoding'] and True or False result['version'], data, entities = replace_doctype(data) # Ensure that baseuri is an absolute URI using an acceptable URI scheme. contentloc = result['headers'].get('content-location', '') href = result.get('href', '') baseuri = _makeSafeAbsoluteURI(href, contentloc) or _makeSafeAbsoluteURI(contentloc) or href baselang = result['headers'].get('content-language', None) if isinstance(baselang, bytes_) and baselang is not None: baselang = baselang.decode('utf-8', 'ignore') if not _XML_AVAILABLE: use_strict_parser = 0 if use_strict_parser: # initialize the SAX parser feedparser = StrictFeedParser(baseuri, baselang, 'utf-8') feedparser.resolve_relative_uris = resolve_relative_uris feedparser.sanitize_html = sanitize_html saxparser = xml.sax.make_parser(PREFERRED_XML_PARSERS) saxparser.setFeature(xml.sax.handler.feature_namespaces, 1) try: # disable downloading external doctype references, if possible saxparser.setFeature(xml.sax.handler.feature_external_ges, 0) except xml.sax.SAXNotSupportedException: pass saxparser.setContentHandler(feedparser) saxparser.setErrorHandler(feedparser) source = xml.sax.xmlreader.InputSource() source.setByteStream(_StringIO(data)) try: saxparser.parse(source) except xml.sax.SAXException as e: result['bozo'] = 1 result['bozo_exception'] = feedparser.exc or e use_strict_parser = 0 if not use_strict_parser and _SGML_AVAILABLE: feedparser = LooseFeedParser(baseuri, baselang, 'utf-8', entities) feedparser.resolve_relative_uris = resolve_relative_uris feedparser.sanitize_html = sanitize_html feedparser.feed(data.decode('utf-8', 'replace')) result['feed'] = feedparser.feeddata result['entries'] = feedparser.entries result['version'] = result['version'] or feedparser.version result['namespaces'] = feedparser.namespacesInUse return result
def execRun(*args): stderr, stdout = _StringIO(), _StringIO() _S._run(args, stderr, stdout) return stderr.getvalue().strip(), stdout.getvalue().strip()
def collection_to_string(collection, depth=3): with _closing(_StringIO()) as item_list: _itr_printer(collection, depth = depth, stream = item_list) return item_list.getvalue()
for key in topic_dict: if key in TOPIC_VAL_REPLACE: # don't just .update, check all are valid topic_dict[key] = TOPIC_VAL_REPLACE[key] with open(_OUTPUT_PATH,'w') as pfile: pfile.write('# AUTO-GENERATED BY tosdb/setup.py\n') pfile.write('# DO NOT EDIT!\n\n') for c in consts: pfile.write(c.replace('TOSDB_','',1) + ' = ' + consts[c] + '\n') pfile.write('\n\n') pfile.write('from tosdb.meta_enum import MetaEnum\n') pfile.write('class TOPICS(metaclass=MetaEnum):\n') pfile.write(' fields = ' + str(topic_dict) + '\n') if __name__ == '__main__': sio = _StringIO() serr = _sys.stderr _sys.stderr = sio try: print("pulling constants from " + _HEADER_PATH) consts = _pull_consts_from_header() print("pulling topic enum from " + _HEADER_PATH) topics = _pull_topics_from_header() print('auto-generating ' + _OUTPUT_PATH) _create__tosdb(consts, topics) print(' checking ' + _OUTPUT_PATH) try: exec("from " + NAME + " import " + _AUTO_EXT) except ImportError as ie: print(' fatal: auto-generated ' + _OUTPUT_PATH + ' could not be imported !') print(' fatal: ' + ie.args[0])
def pformat(self, object): sio = _StringIO() self._format(object, sio, 0, 0, {}, 0) return sio.getvalue()
def _safe_repr(object, context, maxlevels, level): typ = _type(object) if typ is str: if 'locale' not in _sys.modules: return (repr(object), True, False) if "'" in object and '"' not in object: closure = '"' quotes = {'"': '\\"'} else: closure = "'" quotes = {"'": "\\'"} qget = quotes.get sio = _StringIO() write = sio.write for char in object: if char.isalpha(): write(char) else: write(qget(char, repr(char)[1:-1])) return ('%s%s%s' % (closure, sio.getvalue(), closure), True, False) r = getattr(typ, '__repr__', None) if issubclass(typ, dict) and r is dict.__repr__: if not object: return ('{}', True, False) objid = _id(object) if maxlevels and level >= maxlevels: return ('{...}', False, objid in context) if objid in context: return (_recursion(object), False, True) context[objid] = 1 readable = True recursive = False components = [] append = components.append level += 1 saferepr = _safe_repr items = sorted(object.items(), key=_safe_tuple) for (k, v) in items: (krepr, kreadable, krecur) = saferepr(k, context, maxlevels, level) (vrepr, vreadable, vrecur) = saferepr(v, context, maxlevels, level) append('%s: %s' % (krepr, vrepr)) readable = readable and (kreadable and vreadable) while krecur or vrecur: recursive = True del context[objid] return ('{%s}' % _commajoin(components), readable, recursive) if issubclass(typ, list) and r is list.__repr__ or issubclass(typ, tuple) and r is tuple.__repr__: if issubclass(typ, list): if not object: return ('[]', True, False) format = '[%s]' elif _len(object) == 1: format = '(%s,)' else: if not object: return ('()', True, False) format = '(%s)' objid = _id(object) if maxlevels and level >= maxlevels: return (format % '...', False, objid in context) if objid in context: return (_recursion(object), False, True) context[objid] = 1 readable = True recursive = False components = [] append = components.append level += 1 for o in object: (orepr, oreadable, orecur) = _safe_repr(o, context, maxlevels, level) append(orepr) if not oreadable: readable = False while orecur: recursive = True del context[objid] return (format % _commajoin(components), readable, recursive) rep = repr(object) return (rep, rep and not rep.startswith('<'), False)
def error(self, error_message): with _closing(_StringIO()) as usage: self.print_usage(usage) message = EBSCliAttr.ErrorMsg.format(error_message, usage.getvalue(), self.prog) raise ArgumentError(message)
def fetch_url(self, url_data=None, url_headers=None, force_parse=False): """Fetches the url and returns it.""" # should we even download/parse this? if not force_parse and ONLY_PARSE_SAFE_FILES: url_parts = RE_url_parts.match(self.url).groups() if url_parts[1]: url_fpath = url_parts[1].split('.') if len(url_fpath) == 0: # i have no idea what this file is , it's likely using a directory index pass elif len(url_fpath) > 1: url_fext = url_fpath[-1] if url_fext in PARSE_SAFE_FILES: pass else: raise NotParsable("I don't know what this file is") ## borrowing some ideas from http://code.google.com/p/feedparser/source/browse/trunk/feedparser/feedparser.py#3701 req = None raw = None if not url_headers: url_headers = {} # if someone does usertracking with sharethis.com, they get a hashbang like this: http://example.com/page#.UHeGb2nuVo8 # that f***s things up. url = self.url.split('#')[0] req = urllib2.Request(url, url_data, url_headers) req.add_header('Accept-Encoding', 'gzip, deflate') raw = CustomHTTPRedirectOpener.open(req) html = raw.read() # lowercase all of the HTTP headers for comparisons per RFC 2616 http_headers = dict((k.lower(), v) for k, v in raw.headers.items()) if 'gzip' in http_headers.get('content-encoding', ''): try: html = gzip.GzipFile(fileobj=_StringIO(html)).read() except (IOError, struct.error): try: # apparently the gzip module isn't too good and doesn't follow spec # here's a wonderful workaround # http://stackoverflow.com/questions/4928560/how-can-i-work-with-gzip-files-which-contain-extra-data gzipfile = _StringIO(html) html = zlib.decompress(gzipfile.read()[10:], -zlib.MAX_WBITS) except: raise elif 'deflate' in http_headers.get('content-encoding', ''): try: html = zlib.decompress(html) except zlib.error: try: # The data may have no headers and no checksum. html = zlib.decompress(html, -15) except zlib.error: raise self.url_actual = raw.geturl() self.url_info = raw.info() return html
def get(url, etag=None, modified=None, agent=None, referrer=None, handlers=None, request_headers=None, result=None): if handlers is None: handlers = [] elif not isinstance(handlers, list): handlers = [handlers] if request_headers is None: request_headers = {} # Deal with the feed URI scheme if url.startswith('feed:http'): url = url[5:] elif url.startswith('feed:'): url = 'http:' + url[5:] if not agent: agent = USER_AGENT # Test for inline user:password credentials for HTTP basic auth auth = None if base64 and not url.startswith('ftp:'): urltype, rest = urllib.parse.splittype(url) realhost, rest = urllib.parse.splithost(rest) if realhost: user_passwd, realhost = urllib.parse.splituser(realhost) if user_passwd: url = '%s://%s%s' % (urltype, realhost, rest) auth = base64.standard_b64encode(user_passwd).strip() # iri support if not isinstance(url, bytes_): url = _convert_to_idn(url) # try to open with urllib2 (to use optional headers) request = _build_urllib2_request(url, agent, ACCEPT_HEADER, etag, modified, referrer, auth, request_headers) opener = urllib.request.build_opener(*tuple(handlers + [_FeedURLHandler()])) opener.addheaders = [] # RMK - must clear so we only send our custom User-Agent f = opener.open(request) data = f.read() f.close() # lowercase all of the HTTP headers for comparisons per RFC 2616 result['headers'] = dict((k.lower(), v) for k, v in f.headers.items()) # if feed is gzip-compressed, decompress it if data and 'gzip' in result['headers'].get('content-encoding', ''): try: data = gzip.GzipFile(fileobj=_StringIO(data)).read() except (EOFError, IOError, struct.error) as e: # IOError can occur if the gzip header is bad. # struct.error can occur if the data is damaged. result['bozo'] = True result['bozo_exception'] = e if isinstance(e, struct.error): # A gzip header was found but the data is corrupt. # Ideally, we should re-request the feed without the # 'Accept-encoding: gzip' header, but we don't. data = None elif data and 'deflate' in result['headers'].get('content-encoding', ''): try: data = zlib.decompress(data) except zlib.error as e: try: # The data may have no headers and no checksum. data = zlib.decompress(data, -15) except zlib.error as e: result['bozo'] = True result['bozo_exception'] = e # save HTTP headers if 'etag' in result['headers']: etag = result['headers'].get('etag', '') if isinstance(etag, bytes_): etag = etag.decode('utf-8', 'ignore') if etag: result['etag'] = etag if 'last-modified' in result['headers']: modified = result['headers'].get('last-modified', '') if modified: result['modified'] = modified result['modified_parsed'] = _parse_date(modified) if isinstance(f.url, bytes_): result['href'] = f.url.decode('utf-8', 'ignore') else: result['href'] = f.url result['status'] = getattr(f, 'status', 200) # Stop processing if the server sent HTTP 304 Not Modified. if getattr(f, 'code', 0) == 304: result['version'] = '' result['debug_message'] = 'The feed has not changed since you last checked, ' + \ 'so the server sent no data. This is a feature, not a bug!' return data
# set to true if you want to log queries in DB_FILE LOG_QUERIES = False if LOG_QUERIES: import os, datetime, create_log_db, sqlite3 def cgi_finalizer(input_code, output_trace): """Write JSON output for js/pytutor.js as a CGI result.""" ret = dict(code=input_code, trace=output_trace) json_output = json.dumps(ret, indent=None) # use indent=None for most compact repr # print("Content-type: text/plain; charset=iso-8859-1\n") print(json_output) options_json = '{"cumulative_mode":false,"heap_primitives":false,"show_only_outputs":false}' request = json.loads("".join(line for line in sys.stdin)) user_script = request['user_script'] from io import StringIO as _StringIO raw_input_json = request['raw_input_json'] sys.stdin = _StringIO(raw_input_json) pg_logger.exec_script_str(user_script, raw_input_json, options_json, cgi_finalizer)
def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=None, request_headers=None, response_headers=None): '''Parse a feed from a URL, file, stream, or string. request_headers, if given, is a dict from http header name to value to add to the request; this overrides internally generated values. :return: A :class:`FeedParserDict`. ''' if not agent: agent = USER_AGENT result = FeedParserDict( bozo = False, entries = [], feed = FeedParserDict(), headers = {}, ) data = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers, result) if not data: return result # overwrite existing headers using response_headers result['headers'].update(response_headers or {}) data = convert_to_utf8(result['headers'], data, result) use_strict_parser = result['encoding'] and True or False result['version'], data, entities = replace_doctype(data) # Ensure that baseuri is an absolute URI using an acceptable URI scheme. contentloc = result['headers'].get('content-location', '') href = result.get('href', '') baseuri = _makeSafeAbsoluteURI(href, contentloc) or _makeSafeAbsoluteURI(contentloc) or href baselang = result['headers'].get('content-language', None) if isinstance(baselang, bytes_) and baselang is not None: baselang = baselang.decode('utf-8', 'ignore') if not _XML_AVAILABLE: use_strict_parser = 0 if use_strict_parser: # initialize the SAX parser feedparser = StrictFeedParser(baseuri, baselang, 'utf-8') saxparser = xml.sax.make_parser(PREFERRED_XML_PARSERS) saxparser.setFeature(xml.sax.handler.feature_namespaces, 1) try: # disable downloading external doctype references, if possible saxparser.setFeature(xml.sax.handler.feature_external_ges, 0) except xml.sax.SAXNotSupportedException: pass saxparser.setContentHandler(feedparser) saxparser.setErrorHandler(feedparser) source = xml.sax.xmlreader.InputSource() source.setByteStream(_StringIO(data)) try: saxparser.parse(source) except xml.sax.SAXException as e: result['bozo'] = 1 result['bozo_exception'] = feedparser.exc or e use_strict_parser = 0 if not use_strict_parser and _SGML_AVAILABLE: feedparser = LooseFeedParser(baseuri, baselang, 'utf-8', entities) feedparser.feed(data.decode('utf-8', 'replace')) result['feed'] = feedparser.feeddata result['entries'] = feedparser.entries result['version'] = result['version'] or feedparser.version result['namespaces'] = feedparser.namespacesInUse return result
def _safe_repr(object, context, maxlevels, level): typ = _type(object) if typ is str: if 'locale' not in _sys.modules: return repr(object), True, False if "'" in object and '"' not in object: closure = '"' quotes = {'"': '\\"'} else: closure = "'" quotes = {"'": "\\'"} qget = quotes.get sio = _StringIO() write = sio.write for char in object: if char.isalpha(): write(char) else: write(qget(char, repr(char)[1:-1])) return ("%s%s%s" % (closure, sio.getvalue(), closure)), True, False r = getattr(typ, "__repr__", None) if issubclass(typ, dict) and r is dict.__repr__: if not object: return "{}", True, False objid = _id(object) if maxlevels and level >= maxlevels: return "{...}", False, objid in context if objid in context: return _recursion(object), False, True context[objid] = 1 readable = True recursive = False components = [] append = components.append level += 1 saferepr = _safe_repr items = sorted(object.items(), key=_safe_tuple) for k, v in items: krepr, kreadable, krecur = saferepr( k, context, maxlevels, level ) vrepr, vreadable, vrecur = saferepr( v, context, maxlevels, level ) append("%s: %s" % (krepr, vrepr)) readable = readable and kreadable and vreadable if krecur or vrecur: recursive = True del context[objid] return "{%s}" % _commajoin(components), readable, recursive if (issubclass(typ, list) and r is list.__repr__) or \ (issubclass(typ, tuple) and r is tuple.__repr__): if issubclass(typ, list): if not object: return "[]", True, False format = "[%s]" elif _len(object) == 1: format = "(%s,)" else: if not object: return "()", True, False format = "(%s)" objid = _id(object) if maxlevels and level >= maxlevels: return format % "...", False, objid in context if objid in context: return _recursion(object), False, True context[objid] = 1 readable = True recursive = False components = [] append = components.append level += 1 for o in object: orepr, oreadable, orecur = _safe_repr( o, context, maxlevels, level ) append(orepr) if not oreadable: readable = False if orecur: recursive = True del context[objid] return format % _commajoin(components), readable, recursive rep = repr(object) return rep, (rep and not rep.startswith('<')), False
def body_words(email) -> int: 'Number of words in the body text' return _ilen(_generate_tokens(_StringIO(_body(email)).readline))