def test_interop_array(self): arr = System.Array[System.Byte](b"abc") ars = System.ArraySegment[System.Byte](arr) mem = System.Memory[System.Byte](arr) rom = System.ReadOnlyMemory[System.Byte](arr) self.assertEqual(codecs.latin_1_decode(arr), ("abc", 3)) self.assertEqual(codecs.latin_1_decode(ars), ("abc", 3)) self.assertEqual(codecs.latin_1_decode(mem), ("abc", 3)) self.assertEqual(codecs.latin_1_decode(rom), ("abc", 3))
def write(self, text, line_ending='\n', fg=None): if not isinstance(text, unicode): try: text = codecs.utf_8_decode(text)[0] except: text = codecs.latin_1_decode(text)[0] tags, text = parse_mirc.parse_mirc(text) if fg: tags.append({'data': ("foreground", isinstance(fg, basestring) and ('#%s'%fg) or parse_mirc.get_mirc_color(fg)), 'from': 0, 'to': len(text)}) buffer = self.get_buffer() cc = buffer.get_char_count() buffer.insert_with_tags_by_name( buffer.get_end_iter(), text + line_ending, 'indent' ) for tag in tags: tag_name = str(tag['data']) if not tag_table.lookup(tag_name): buffer.create_tag(tag_name, **prop_to_gtk(self, tag['data'])) buffer.apply_tag_by_name( tag_name, buffer.get_iter_at_offset(tag['from'] + cc), buffer.get_iter_at_offset(tag['to'] + cc) )
def write(self, text, line_ending='\n', fg=None): if not isinstance(text, unicode): try: text = codecs.utf_8_decode(text)[0] except: text = codecs.latin_1_decode(text)[0] tags, text = parse_mirc.parse_mirc(text) if fg: tags.append({ 'data': ("foreground", isinstance(fg, basestring) and ('#%s' % fg) or parse_mirc.get_mirc_color(fg)), 'from': 0, 'to': len(text) }) buffer = self.get_buffer() cc = buffer.get_char_count() for tag in tags: tag_name = str(tag['data']) if not tag_table.lookup(tag_name): buffer.create_tag(tag_name, **prop_to_Gtk(self, tag['data'])) buffer.apply_tag_by_name( tag_name, buffer.get_iter_at_offset(tag['from'] + cc), buffer.get_iter_at_offset(tag['to'] + cc)) buffer.insert_with_tags(buffer.get_end_iter(), text + line_ending, indent_tag)
def _as_unicode(s): """Turn byte string or unicode string into a unicode string.""" if isinstance(s, str): return s #Assume it is a bytes string #Note ISO-8859-1 aka Latin-1 preserves first 256 chars return codecs.latin_1_decode(s)[0]
def _read_json(self, url): """Performs an HTTP GET on the given URL and interprets the response as JSON.""" try: try: conn = self._get_conn() conn.request('GET', url) res = conn.getresponse() if res.status != 200: raise CrabError('server error: ' + self._read_error(res)) return json.loads(latin_1_decode(res.read(), 'replace')[0]) # except HTTPException as err: except HTTPException: err = sys.exc_info()[1] raise CrabError('HTTP error: ' + str(err)) # except socket.error as err: except socket.error: err = sys.exc_info()[1] raise CrabError('socket error: ' + str(err)) # except ValueError as err: except ValueError: err = sys.exc_info()[1] raise CrabError('did not understand response: ' + str(err)) finally: conn.close()
def name2cp(k): if k == 'apos': return ord("'") if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3 return htmlentitydefs.name2codepoint[k] else: k = htmlentitydefs.entitydefs[k] if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1 return ord(codecs.latin_1_decode(k)[0])
def _read_data(cls): """ Read imaging photometry data from the file. """ instrument_names = [ (cls.UFTI, 'UFTI'), (cls.UIST, 'UIST'), (cls.WFCAM, 'WFCAM'), ] data = json.loads( latin_1_decode(get_data('ukirt_itc', 'data/phot.json'))[0]) # Process sky data. data_sky = data.get('sky') if data_sky is None: raise UKIRTITCError('Data file did not contain "sky" section') cls._sky = {} for (filter_, values) in data_sky.items(): if filter_ not in cls.FILTERS: raise UKIRTITCError( 'Sky filter "{0}" not recognised'.format(filter_)) cls._sky[filter_] = SkyInfo(*values) # Process extinction data. data_ext = data.get('extinction') if data_ext is None: raise UKIRTITCError( 'Data file did not contain "extinction" section') cls._extinction = {} for (filter_, value) in data_ext.items(): if filter_ not in cls.FILTERS: raise UKIRTITCError( 'Extinction filter "{0}" not recognised'.format(filter_)) cls._extinction[filter_] = value # Process instrument data. data_instruments = data.get('instrument') if data_instruments is None: raise UKIRTITCError( 'Data file did not contain "instrument" section') cls._info = OrderedDict() for (instrument, name) in instrument_names: data_instrument = data_instruments.get(name) if data_instrument is None: raise UKIRTITCError('Could not find instrument information ' 'for "{0}"'.format(name)) info_obj = InstrumentInfo(name=name, **data_instrument) for filter_ in info_obj.zeropoint: if filter_ not in cls.FILTERS: raise UKIRTITCError( 'Instrument "{0}" filter "{1}" not recognised'.format( name, filter_)) cls._info[instrument] = info_obj
def WaitOnFile(fn): cmd=["lsof",fn] while True: res=subprocess.Popen(cmd, stdout=subprocess.PIPE) l=codecs.latin_1_decode(res.stdout.read())[0] res.wait() if l == '': return sys.stderr.write("Waiting on temporary file.\n")
def name2cp(k): ''' translate common character name into unicode code point ''' if k == "apos": return ord("'") if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3 return htmlentitydefs.name2codepoint[k] else: k = htmlentitydefs.entitydefs[k] if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1 return ord(codecs.latin_1_decode(k)[0])
def strip_text(text): text = latin_1_decode(text)[0] text = normalize('NFD', text).encode('ascii', 'ignore') text = re.sub('&mdash+;', ' ', text) # convert mdash to " " # text = re.sub('&', ' and ', text) # convert mdash to " " text = pte.replace_entities(text) # text = re.sub('&[A-Za-z]+;', '', text) # convert ampersand stuff to "" text = re.sub('<[^>]*>', ' ', text) # strip HTML markup text = re.sub('\s+', ' ', text) # strip whitespace return text
def check_file(path, line): """Test whether the file is the expected ``.vcf.gz`` file """ raw = path.read(mode="rb") assert raw[0] == 0x1F assert raw[1] == 0x8B # compare actual result with expected inflated = gzip.decompress(raw) RESULT = codecs.latin_1_decode(inflated)[0] LINE = "20\t100\t.\tC\tT\t.\t.\t.\tGT\t0/1\t0/0\t1/1\n" EXPECTED = MEDIUM_HEADER + LINE assert EXPECTED == RESULT
def strip_text (text): text = latin_1_decode(text)[0] text = normalize('NFD',text).encode('ascii','ignore') text = re.sub('&mdash+;', ' ', text) # convert mdash to " " # text = re.sub('&', ' and ', text) # convert mdash to " " text = pte.replace_entities(text) # text = re.sub('&[A-Za-z]+;', '', text) # convert ampersand stuff to "" text = re.sub('<[^>]*>', ' ', text) # strip HTML markup text = re.sub('\s+', ' ', text) # strip whitespace return text
def _load_bgzf_block(handle, text_mode=False): """Load the next BGZF block of compressed data (PRIVATE). Returns a tuple (block size and data), or at end of file will raise StopIteration. """ magic = handle.read(4) if not magic: # End of file - should we signal this differently now? # See https://www.python.org/dev/peps/pep-0479/ raise StopIteration if magic != _bgzf_magic: raise ValueError(r"A BGZF (e.g. a BAM file) block should start with " r"%r, not %r; handle.tell() now says %r" % (_bgzf_magic, magic, handle.tell())) gzip_mod_time, gzip_extra_flags, gzip_os, extra_len = struct.unpack( "<LBBH", handle.read(8)) block_size = None x_len = 0 while x_len < extra_len: subfield_id = handle.read(2) subfield_len = struct.unpack("<H", handle.read(2))[0] # uint16_t subfield_data = handle.read(subfield_len) x_len += subfield_len + 4 if subfield_id == _bytes_BC: assert subfield_len == 2, "Wrong BC payload length" assert block_size is None, "Two BC subfields?" block_size = struct.unpack("<H", subfield_data)[0] + 1 # uint16_t assert x_len == extra_len, (x_len, extra_len) assert block_size is not None, "Missing BC, this isn't a BGZF file!" # Now comes the compressed data, CRC, and length of uncompressed data. deflate_size = block_size - 1 - extra_len - 19 d = zlib.decompressobj(-15) # Negative window size means no headers data = d.decompress(handle.read(deflate_size)) + d.flush() expected_crc = handle.read(4) expected_size = struct.unpack("<I", handle.read(4))[0] if expected_size != len(data): raise RuntimeError("Decompressed to %i, not %i" % (len(data), expected_size)) # Should cope with a mix of Python platforms... crc = zlib.crc32(data) if crc < 0: crc = struct.pack("<i", crc) else: crc = struct.pack("<I", crc) if expected_crc != crc: raise RuntimeError("CRC is %s, not %s" % (crc, expected_crc)) if text_mode: # Note ISO-8859-1 aka Latin-1 preserves first 256 chars return block_size, codecs.latin_1_decode(data)[0] else: return block_size, data
def get_entitydefs(): from codecs import latin_1_decode try: htmlentitydefs.name2codepoint except AttributeError: entitydefs = {} for name, char in htmlentitydefs.entitydefs.items(): uc = latin_1_decode(char)[0] if uc.startswith("&#") and uc.endswith(";"): uc = unescape_charref(uc[2:-1], None) codepoint = ord(uc) entitydefs[name] = codepoint else: entitydefs = htmlentitydefs.name2codepoint return entitydefs
def _read_json(self): """Attempts to interpret the HTTP PUT body as JSON and return the corresponding Python object. There could be a correpsonding _write_json method, but there is little need as the caller can just do: return json.dumps(...) and the CherryPy handler needs to pass the response back with return.""" message = latin_1_decode(cherrypy.request.body.read(), 'replace')[0] try: return json.loads(message) except ValueError: cherrypy.log.error('CrabError: Failed to read JSON: ' + message) raise HTTPError(400, message='Did not understand JSON')
def _write_json(self, url, obj, read=False): """Converts the given object to JSON and sends it with an HTTP PUT to the given URL. Optionally attempts to read JSON from the response.""" try: try: conn = self._get_conn() conn.request('PUT', url, json.dumps(obj)) res = conn.getresponse() if res.status != 200: raise CrabError('server error: ' + self._read_error(res)) if read: response = latin_1_decode(res.read(), 'replace')[0] # Check we got a response before attempting to decode # it as JSON. (Some messages did not have responses # for previous server versions.) if response: return json.loads(response) else: return {} #except HTTPException as err: #except HTTPException, err: except HTTPException: err = sys.exc_info()[1] raise CrabError('HTTP error: ' + str(err)) #except socket.error as err: #except socket.error, err: except socket.error: err = sys.exc_info()[1] raise CrabError('socket error: ' + str(err)) #except ValueError as err: #except ValueError, err: except ValueError: err = sys.exc_info()[1] raise CrabError('did not understand response: ' + str(err)) finally: conn.close()
def _read_error(self, res): """Determine the error message to show based on an unsuccessful HTTP response. Currently use the HTTP status phrase or the first paragraph of the body, if found with a regular expression.""" message = res.reason try: body = latin_1_decode(res.read(), 'replace')[0] match = re.search('<p>([^<]*)', body) if match: message = match.group(1) except: pass return message
def file_write(file_name): if not os.path.exists(base_dir + get_path()+file_name): print("File", file_name, "Not Found") return -3 file = open(base_dir + get_path()+file_name, 'rb').read() cont = codecs.latin_1_decode(file) result = requests.post(ns_ip + '/writeFile', json=json.loads(json.dumps({'path': get_path() + file_name, 'cont': cont[0]}))) if result.status_code == 500: return -3 if result.json()['resp'] == 404: print("Not Found") return -3 if result.json()['resp'] == 500: print("Server error") return -3 return 1
def internet_decode(input, errors='strict', final=False): """The core decoding function""" try: # First try utf-8. This should be the usual case by far. return codecs.utf_8_decode(input, errors, final) except UnicodeDecodeError: try: # If that fails, try windows-1252 (aka cp1252), which defines more characters than latin1, # but will fail for five particular bytes: 0x81, 0x8D, 0x8F, 0x90, 0x9D return codecs.charmap_decode(input, errors, encodings.cp1252.decoding_table) except UnicodeDecodeError: # and finally, try latin-1, which never fails, but defines 27 less characters than cp1252. return codecs.latin_1_decode(input, errors) except UnicodeEncodeError: # Was that thing already unicode? Then it's already decoded. if isinstance(input, unicode): return (input, len(input)) else: raise
def test_codecs_builtins(self): s = "abc" encoded = codecs.utf_8_encode(s) self.assertEqual(s, codecs.utf_8_decode(encoded[0])[0]) encoded = codecs.utf_7_encode(s) self.assertEqual(s, codecs.utf_7_decode(encoded[0])[0]) encoded = codecs.utf_16_encode(s) self.assertEqual(s, codecs.utf_16_decode(encoded[0])[0]) encoded = codecs.utf_16_le_encode(s) self.assertEqual(s, codecs.utf_16_le_decode(encoded[0])[0]) encoded = codecs.utf_16_be_encode(s) self.assertEqual(s, codecs.utf_16_be_decode(encoded[0])[0]) encoded = codecs.utf_32_encode(s) self.assertEqual(s, codecs.utf_32_decode(encoded[0])[0]) encoded = codecs.utf_32_le_encode(s) self.assertEqual(s, codecs.utf_32_le_decode(encoded[0])[0]) encoded = codecs.utf_32_be_encode(s) self.assertEqual(s, codecs.utf_32_be_decode(encoded[0])[0]) encoded = codecs.utf_32_be_encode(s) self.assertEqual(s, codecs.utf_32_be_decode(encoded[0])[0]) encoded = codecs.raw_unicode_escape_encode(s) self.assertEqual(s, codecs.raw_unicode_escape_decode(encoded[0])[0]) encoded = codecs.unicode_escape_encode(s) self.assertEqual(s, codecs.unicode_escape_decode(encoded[0])[0]) encoded = codecs.latin_1_encode(s) self.assertEqual(s, codecs.latin_1_decode(encoded[0])[0]) encoded = codecs.ascii_encode(s) self.assertEqual(s, codecs.ascii_decode(encoded[0])[0])
def _read_receiver_info(cls): """ Read receiver information from the "receiver_info.json" file and store it in the class's "_info" attribute. Should not be called if "_info" has already been set up. """ # List specifying how to map the receiver names to the "enum" values # used by this class. (And the ordering in which to display them.) receiver_names = [ (cls.A3, 'RxA3'), (cls.HARP, 'HARP'), (cls.WD, 'RxWD'), ] receiver_data = json.loads(latin_1_decode( get_data('jcmt_itc_heterodyne', 'data/receiver_info.json'))[0]) for (receiver, name) in receiver_names: receiver_info = receiver_data.get(name) if receiver_info is None: raise Exception('Could not find receiver information ' 'for "{0}".'.format(name)) info_obj = ReceiverInfo(name=name, **receiver_info) if info_obj.array is not None: array_obj = ArrayInfo(footprint=None, **info_obj.array) array_obj = array_obj._replace( scan_spacings=OrderedDict(array_obj.scan_spacings), jiggle_patterns=OrderedDict(array_obj.jiggle_patterns), footprint=(array_obj.size * cos(radians(array_obj.f_angle)))) info_obj = info_obj._replace(array=array_obj) cls._info[receiver] = info_obj
except OSError: pass #idp_ = lambda _:"%s/%s" % (os.environ["IDPATH"],_) #uidp_ = lambda _:_.replace(os.environ["IDPATH"],"$IDPATH") idp_ = lambda _: "%s/%s" % (KEYDIR, _) uidp_ = lambda _: _.replace(KEYDIR, "$KEYDIR") gcp_ = lambda _: "%s/%s" % (os.environ["GEOCACHE"], _) import sys, codecs if sys.version_info.major > 2: u_ = lambda _: _ # py3 strings are unicode already b_ = lambda _: codecs.latin_1_encode(_)[ 0] # from py3 unicode string to bytes d_ = lambda _: codecs.latin_1_decode(_)[ 0] # from bytes to py3 unicode string else: u_ = lambda _: unicode(_, "utf-8") # py2 strings are bytes b_ = lambda _: _ d_ = lambda _: _ pass def findfile(base, name, relative=True): paths = [] for root, dirs, files in os.walk(base): if name in files: path = os.path.join(root, name) paths.append(path[len(base) + 1:] if relative else path) pass pass
def latin1_to_utf8(text): "helper to convert when needed from latin input" return utf_8_encode(latin_1_decode(text)[0])[0]
def decode(self, input, final = False): return codecs.latin_1_decode(input, self.errors)[0]
def test_latin_1_decode(self): #sanity new_str, size = codecs.latin_1_decode("abc") self.assertEqual(new_str, u'abc') self.assertEqual(size, 3)
def un_b(x): return codecs.latin_1_decode(x)[0]
def zzz(x): return codecs.latin_1_decode(x)[0]
def __init__(self): self.entity_code_dict = { "amp": 0x0026, "pound": 0x00A3, "aacute": 0x00E1, "ampersand": 0x0026, "Aacute": 0x00C1, "acirc": 0x00E2, "Acirc": 0x00C2, "agrave": 0x00E0, "Agrave": 0x00C0, "aring": 0x00E5, "Aring": 0x00C5, "atilde": 0x00E3, "Atilde": 0x00C3, "auml": 0x00E4, "Auml": 0x00C4, "aelig": 0x00E6, "AElig": 0x00C6, "ccedil": 0x00E7, "Ccedil": 0x00C7, "eth": 0x00F0, "ETH": 0x00D0, "eacute": 0x00E9, "Eacute": 0x00C9, "ecirc": 0x00EA, "Ecirc": 0x00CA, "egrave": 0x00E8, "Egrave": 0x00C8, "euml": 0x00EB, "Euml": 0x00CB, "iacute": 0x00ED, "Iacute": 0x00CD, "icirc": 0x00EE, "Icirc": 0x00CE, "igrave": 0x00EC, "Igrave": 0x00CC, "iuml": 0x00EF, "Iuml": 0x00CF, "ntilde": 0x00F1, "Ntilde": 0x00D1, "oacute": 0x00F3, "Oacute": 0x00D3, "ocirc": 0x00F4, "Ocirc": 0x00D4, "ograve": 0x00F2, "Ograve": 0x00D2, "oslash": 0x00F8, "Oslash": 0x00D8, "otilde": 0x00F5, "Otilde": 0x00D5, "ouml": 0x00F6, "Ouml": 0x00D6, "szlig": 0x00DF, "thorn": 0x00FE, "THORN": 0x00DE, "uacute": 0x00FA, "Uacute": 0x00DA, "ucirc": 0x00FB, "Ucirc": 0x00DB, "ugrave": 0x00F9, "Ugrave": 0x00D9, "uuml": 0x00FC, "Uuml": 0x00DC, "yacute": 0x00FD, "Yacute": 0x00DD, "yuml": 0x00FF, "abreve": 0x0103, "Abreve": 0x0102, "amacr": 0x0101, "Amacr": 0x0100, "aogon": 0x0105, "Aogon": 0x0104, "cacute": 0x0107, "Cacute": 0x0106, "ccaron": 0x010D, "Ccaron": 0x010C, "ccirc": 0x0109, "Ccirc": 0x0108, "cdot": 0x010B, "Cdot": 0x010A, "dcaron": 0x010F, "Dcaron": 0x010E, "dstrok": 0x0111, "Dstrok": 0x0110, "ecaron": 0x011B, "Ecaron": 0x011A, "edot": 0x0117, "Edot": 0x0116, "emacr": 0x0113, "Emacr": 0x0112, "eogon": 0x0119, "Eogon": 0x0118, "gacute": 0x01F5, "gbreve": 0x011F, "Gbreve": 0x011E, "Gcedil": 0x0122, "gcirc": 0x011D, "Gcirc": 0x011C, "gdot": 0x0121, "Gdot": 0x0120, "hcirc": 0x0125, "Hcirc": 0x0124, "hstrok": 0x0127, "Hstrok": 0x0126, "Idot": 0x0130, "Imacr": 0x012A, "imacr": 0x012B, "ijlig": 0x0133, "IJlig": 0x0132, "inodot": 0x0131, "iogon": 0x012F, "Iogon": 0x012E, "itilde": 0x0129, "Itilde": 0x0128, "jcirc": 0x0135, "Jcirc": 0x0134, "kcedil": 0x0137, "Kcedil": 0x0136, "kgreen": 0x0138, "lacute": 0x013A, "Lacute": 0x0139, "lcaron": 0x013E, "Lcaron": 0x013D, "lcedil": 0x013C, "Lcedil": 0x013B, "lmidot": 0x0140, "Lmidot": 0x013F, "lstrok": 0x0142, "Lstrok": 0x0141, "nacute": 0x0144, "Nacute": 0x0143, "eng": 0x014B, "ENG": 0x014A, "napos": 0x0149, "ncaron": 0x0148, "Ncaron": 0x0147, "ncedil": 0x0146, "Ncedil": 0x0145, "odblac": 0x0151, "Odblac": 0x0150, "Omacr": 0x014C, "omacr": 0x014D, "oelig": 0x0153, "OElig": 0x0152, "racute": 0x0155, "Racute": 0x0154, "rcaron": 0x0159, "Rcaron": 0x0158, "rcedil": 0x0157, "Rcedil": 0x0156, "sacute": 0x015B, "Sacute": 0x015A, "scaron": 0x0161, "Scaron": 0x0160, "scedil": 0x015F, "Scedil": 0x015E, "scirc": 0x015D, "Scirc": 0x015C, "tcaron": 0x0165, "Tcaron": 0x0164, "tcedil": 0x0163, "Tcedil": 0x0162, "tstrok": 0x0167, "Tstrok": 0x0166, "ubreve": 0x016D, "Ubreve": 0x016C, "udblac": 0x0171, "Udblac": 0x0170, "umacr": 0x016B, "Umacr": 0x016A, "uogon": 0x0173, "Uogon": 0x0172, "uring": 0x016F, "Uring": 0x016E, "utilde": 0x0169, "Utilde": 0x0168, "wcirc": 0x0175, "Wcirc": 0x0174, "ycirc": 0x0177, "Ycirc": 0x0176, "Yuml": 0x0178, "zacute": 0x017A, "Zacute": 0x0179, "zcaron": 0x017E, "Zcaron": 0x017D, "zdot": 0x017C, "Zdot": 0x017B, "agr": 0x03B1, "Agr": 0x0391, "bgr": 0x03B2, "Bgr": 0x0392, "ggr": 0x03B3, "Ggr": 0x0393, "dgr": 0x03B4, "Dgr": 0x0394, "egr": 0x03B5, "Egr": 0x0395, "zgr": 0x03B6, "Zgr": 0x0396, "eegr": 0x03B7, "EEgr": 0x0397, "thgr": 0x03B8, "THgr": 0x0398, "igr": 0x03B9, "Igr": 0x0399, "kgr": 0x03BA, "Kgr": 0x039A, "lgr": 0x03BB, "Lgr": 0x039B, "mgr": 0x03BC, "Mgr": 0x039C, "ngr": 0x03BD, "Ngr": 0x039D, "xgr": 0x03BE, "Xgr": 0x039E, "ogr": 0x03BF, "Ogr": 0x039F, "pgr": 0x03C0, "Pgr": 0x03A0, "rgr": 0x03C1, "Rgr": 0x03A1, "sgr": 0x03C3, "Sgr": 0x03A3, "sfgr": 0x03C2, "tgr": 0x03C4, "Tgr": 0x03A4, "ugr": 0x03C5, "Ugr": 0x03A5, "phgr": 0x03C6, "PHgr": 0x03A6, "khgr": 0x03C7, "KHgr": 0x03A7, "psgr": 0x03C8, "PSgr": 0x03A8, "ohgr": 0x03C9, "OHgr": 0x03A9, "half": 0x00BD, "frac12": 0x00BD, "frac14": 0x00BC, "frac34": 0x00BE, "frac18": 0x215B, "frac38": 0x215C, "frac58": 0x215D, "frac78": 0x215E, "sup1": 0x00B9, "sup2": 0x00B2, "sup3": 0x00B3, "plus": 0x002B, "plusmn": 0x00B1, "equals": 0x003D, "gt": 0x003E, "divide": 0x00F7, "times": 0x00D7, "curren": 0x00A4, "pound": 0x00A3, "dollar": 0x0024, "cent": 0x00A2, "yen": 0x00A5, "num": 0x0023, "percnt": 0x0025, "ast": 0x2217, "commat": 0x0040, "lsqb": 0x005B, "bsol": 0x005C, "rsqb": 0x005D, "lcub": 0x007B, "horbar": 0x2015, "verbar": 0x007C, "rcub": 0x007D, "micro": 0x00B5, "ohm": 0x2126, "deg": 0x00B0, "ordm": 0x00BA, "ordf": 0x00AA, "sect": 0x00A7, "para": 0x00B6, "middot": 0x00B7, "larr": 0x2190, "rarr": 0x2192, "uarr": 0x2191, "darr": 0x2193, "copy": 0x00A9, "reg": 0x00AF, "trade": 0x2122, "brvbar": 0x00A6, "not": 0x00AC, "sung": 0x2669, "excl": 0x0021, "iexcl": 0x00A1, "quot": 0x0022, "apos": 0x0027, "lpar": 0x0028, "rpar": 0x0029, "comma": 0x002C, "lowbar": 0x005F, "hyphen": 0xE4F8, "period": 0x002E, "sol": 0x002F, "colon": 0x003A, "semi": 0x003B, "quest": 0x003F, "iquest": 0x00BF, "laquo": 0x00AB, "raquo": 0x00BB, "lsquo": 0x2018, "rsquo": 0x2019, "ldquo": 0x201C, "rdquo": 0x201D, "nbsp": 0x00A0, "shy": 0x00AD, "acute": 0x00B4, "breve": 0x02D8, "caron": 0x02C7, "cedil": 0x00B8, "circ": 0x2218, "dblac": 0x02DD, "die": 0x00A8, "dot": 0x02D9, "grave": 0x0060, "macr": 0x00AF, "ogon": 0x02DB, "ring": 0x02DA, "tilde": 0x007E, "uml": 0x00A8, "emsp": 0x2003, "ensp": 0x2002, "emsp13": 0x2004, "emsp14": 0x2005, "numsp": 0x2007, "puncsp": 0x2008, "thinsp": 0x2009, "hairsp": 0x200A, "mdash": 0x2014, "ndash": 0x2013, "dash": 0x2010, "blank": 0x2423, "hellip": 0x2026, "nldr": 0x2025, "frac13": 0x2153, "frac23": 0x2154, "frac15": 0x2155, "frac25": 0x2156, "frac35": 0x2157, "frac45": 0x2158, "frac16": 0x2159, "frac56": 0x215A, "incare": 0x2105, "block": 0x2588, "uhblk": 0x2580, "lhblk": 0x2584, "blk14": 0x2591, "blk12": 0x2592, "blk34": 0x2593, "marker": 0x25AE, "cir": 0x25CB, "squ": 0x25A1, "rect": 0x25AD, "utri": 0x25B5, "dtri": 0x25BF, "star": 0x22C6, "bull": 0x2022, "squf": 0x25AA, "utrif": 0x25B4, "dtrif": 0x25BE, "ltrif": 0x25C2, "rtrif": 0x25B8, "clubs": 0x2663, "diams": 0x2666, "hearts": 0x2665, "spades": 0x2660, "malt": 0x2720, "dagger": 0x2020, "Dagger": 0x2021, "check": 0x2713, "cross": 0x2717, "sharp": 0x266F, "flat": 0x266D, "male": 0x2642, "female": 0x2640, "phone": 0x260E, "telrec": 0x2315, "copysr": 0x2117, "caret": 0x2041, "lsquor": 0x201A, "ldquor": 0x201E, "fflig": 0xFB00, "filig": 0xFB01, "ffilig": 0xFB03, "ffllig": 0xFB04, "fllig": 0xFB02, "mldr": 0x2026, "rdquor": 0x201C, "rsquor": 0x2018, "vellip": 0x22EE, "hybull": 0x2043, "loz": 0x25CA, "lozf": 0x2726, "ltri": 0x25C3, "rtri": 0x25B9, "starf": 0x2605, "natur": 0x266E, "rx": 0x211E, "sext": 0x2736, "target": 0x2316, "dlcrop": 0x230D, "drcrop": 0x230C, "ulcrop": 0x230F, "urcrop": 0x230E, "boxh": 0x2500, "boxv": 0x2502, "boxur": 0x2514, "boxul": 0x2518, "boxdl": 0x2510, "boxdr": 0x250C, "boxvr": 0x251C, "boxhu": 0x2534, "boxvl": 0x2524, "boxhd": 0x252C, "boxvh": 0x253C, "boxvR": 0x255E, "boxhU": 0x2567, "boxvL": 0x2561, "boxhD": 0x2564, "boxvH": 0x256A, "boxH": 0x2550, "boxV": 0x2551, "boxUR": 0x2558, "boxUL": 0x255B, "boxDL": 0x2555, "boxDR": 0x2552, "boxVR": 0x255F, "boxHU": 0x2568, "boxVL": 0x2562, "boxHD": 0x2565, "boxVH": 0x256B, "boxVr": 0x2560, "boxHu": 0x2569, "boxVl": 0x2563, "boxHd": 0x2566, "boxVh": 0x256C, "boxuR": 0x2559, "boxUl": 0x255C, "boxdL": 0x2556, "boxDr": 0x2553, "boxUr": 0x255A, "boxuL": 0x255D, "boxDl": 0x2557, "boxdR": 0x2554 } self.entity_char_dict = {} for ent, code in self.entity_code_dict.iteritems(): try: self.entity_char_dict[ent] = latin_1_decode(chr(code), "utf8")[0] except ValueError, UnicodeEncodeError: self.entity_char_dict[ent] = unichr(code)
def un_b(x): return codecs.latin_1_decode(x)[0] class KVStore(object): """An abstract key-value interface with support for range iteration.""" __metaclass__ = abc.ABCMeta
def makestring(x): return codecs.latin_1_decode(x)[0]
def test_latin_1_decode(self): #sanity new_str, num_processed = codecs.latin_1_decode(b"abc") self.assertEqual(new_str, 'abc') self.assertEqual(num_processed, 3)
except Exception, e: univention.debug.debug(univention.debug.ADMIN, univention.debug.WARN, 'authentication error: %s' % str(e)) try: lo, position=univention.admin.uldap.getMachineConnection() except Exception, e2: univention.debug.debug(univention.debug.ADMIN, univention.debug.WARN, 'authentication error: %s' % str(e2)) out.append('authentication error: %s' % str(e)) out.append('authentication error: %s' % str(e2)) return out pass for i in range(0, len(args)): try: args[i]=codecs.utf_8_decode(args[i])[0] except: args[i]=codecs.latin_1_decode(args[i])[0] if len(args) == 1: if scope == 'machine': machine=args[0] if machine[-1] == '$': machine=machine[0:-1] if configRegistry.has_key('samba/defaultcontainer/computer') and configRegistry['samba/defaultcontainer/computer']: position.setDn(configRegistry['samba/defaultcontainer/computer']) else: position.setDn(univention.admin.config.getDefaultContainer(lo, 'computers/windows')) elif scope == 'group': group=args[0] if configRegistry.has_key('samba/defaultcontainer/group') and configRegistry['samba/defaultcontainer/group']: position.setDn(configRegistry['samba/defaultcontainer/group']) else:
import codecs import re de_utf8 = lambda s: codecs.utf_8_decode(s, "ignore")[0] en_utf8 = lambda s: codecs.utf_8_encode(s, "ignore")[0] de_latin1 = lambda s: codecs.latin_1_decode(s, "ignore")[0] class UnicodeProcessor: """Try to do something senseful with unicode stuff.""" re_unicodexml = re.compile("&#(\d{3,5});") def __init__(self, latin1 = False): self.repl_unicodexml = lambda x: en_utf8(unichr(int(x.group(1)))) if latin1: self.process = self.process_latin1 def process_latin1(self, string): string = de_latin1(string) string = en_utf8(string) return re_unicodexml.sub(self.repl_unicodexml, string) def process(self, string): return re_unicodexml.sub(self.repl_unicodexml, string) re_unicodexml = re.compile("&#(\d{3,5});") repl_unicodexml = lambda x: en_utf8(unichr(int(x.group(1)))) def to_unicode(string): """Convert Unicode &#xxx; stuff and try to handle other stuff. Still doesn't work like it should. Decoding latin_1 and encoding
def SpliceTestLine(svs): svIndex = 0 for sv in svs: if sv.chrom not in refFai: continue if svIndex == 0: prefixStart = max(0, sv.start-args.flank) prefixEnd = sv.start sem.acquire() gapPrefix = SafeFetch(ref, sv.chrom, prefixStart, prefixEnd) sem.release() spliceSeqs = [gapPrefix] refPos = sv.start if args.mssm: spliceSeqs.append(sv[5]) refPos = sv[2] if svIndex < len(svs)-1: if refPos > svs[svIndex+1][1]: continue sem.acquire() between = SafeFetch(ref, sv[0], refPos, svs[svIndex+1][1]) sem.release() spliceSeqs.append(between) elif args.falcon: # only checking deletions refPos = sv[2] if svIndex < len(svs)-1: if refPos > svs[svIndex+1][1]: continue else: if sv.svType == "insertion": svSeq = sv.seq if len(svSeq) > args.maxInsertion: mid=args.maxInsertion/2 svSeq = sv.seq[0:mid] +sv.seq[-mid:] sys.stderr.write("keeping center of insertion {}\n".format(len(sv.seq))) spliceSeqs.append(svSeq) refPos = sv.start else: refPos = sv.end if svIndex < len(svs)-1: if refPos > svs[svIndex+1].start: sys.stderr.write("Ignoring an sv {} {} {}\n".format(svs[svIndex+1].chrom, svs[svIndex+1].start, svs[svIndex+1].end)) svIndex+=1 continue else: sem.acquire() between = SafeFetch(ref, sv.chrom, refPos, svs[svIndex+1].start) sem.release() refPos += len(between) spliceSeqs.append(between) svIndex +=1 svIndex = len(svs)-1 sem.acquire() suffix = SafeFetch(ref, svs[svIndex].chrom, refPos, min(refFai[svs[svIndex].chrom], refPos + args.flank)) sem.release() spliceSeqs.append(suffix) dbSeq= ''.join(spliceSeqs) refChrom = svs[0].chrom refStart = max(0, svs[0].start - args.flank) refEnd = min(refFai[svs[svIndex].chrom], GetEnd(svs[svIndex]) + args.flank) nBases = 0 if args.maxSize is not None and refEnd - refStart > args.maxSize: if args.genotypeVcf is None: results="\n".join(["{}:{}-{}\t{}\t{}".format(sv.chrom,sv.start,sv.end,0,0) for sv in svs]) return results sem.acquire() refSeq = SafeFetch(ref,refChrom, refStart, refEnd) sem.release() tempFileNames = [] fSuffix="."+str(refPos) + ".fasta" sSuffix="."+str(refPos) + ".sam" rFile = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=fSuffix, delete=False, mode='w') tempFileNames.append(rFile.name) dbFile = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=fSuffix, delete=False, mode='w') tempFileNames.append(dbFile.name) readsFile = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=fSuffix, delete=False, mode='w') tempFileNames.append(readsFile.name) WriteSeq(dbFile, dbSeq, "db") WriteSeq(dbFile, refSeq, "re") WriteSeq(rFile, refSeq, "re") # # Now collect all of the sequences. # fetchStart = svs[0].start - args.flank fetchEnd = GetEnd(svs[-1]) + args.flank # just count one breakpoint if large event. if fetchEnd - fetchStart > 30000: sys.stderr.write("******Truncating fetch region {}\n".format(fetchEnd-fetchStart)) fetchEnd = svs[0].start + args.flank sys.stdout.write("Fetching from region " + str(fetchEnd-fetchStart) + " " + str(svs[-1].svType) + "\n") dbFile.close() rFile.close() nBases = 0 if args.genotypeVcf is not None: # # This uses the SNV vcf in the argument to partition reads, and genotype by phase tag. # print("about to start genotyping\n") dipSamFile = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=".dip"+sSuffix, delete=False, mode='w') tempFileNames.append(dipSamFile.name) dipSamFile.close() dipHandle = pysam.AlignmentFile(dipSamFile.name, 'wh', header=bamFiles[0].header) sem.acquire() for b in range(0,len(bamFiles)): for read in bamFiles[b].fetch(sv.chrom, fetchStart, fetchEnd+1): dipHandle.write(read) sem.release() # # Now partition the file by haplotype # hap0SamFile = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=".hap0"+sSuffix, delete=False, mode='w') hap1SamFile = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=".hap1"+sSuffix, delete=False, mode='w') unassignedSamFile = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=".unassigned"+sSuffix, delete=False, mode='w') regionVCF = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=".vars.vcf", delete=False, mode='w') tempFileNames += [hap0SamFile.name, hap1SamFile.name, regionVCF.name, unassignedSamFile.name] vcfStart = max(0,svs[0].start - args.flank*10) vcfEnd = GetEnd(svs[-1]) + args.flank*10 tabixCommand = "tabix -h {} {}:{}-{}".format(args.genotypeVcf, sv.chrom, vcfStart, vcfEnd) subprocess.call(tabixCommand.split(), stdout=regionVCF) regionVCF.close() partitionCommand = "{}/partitionByPhasedSNVs --vcf {} --sam {} --rgn {}:{}-{} --pad 10000 --h1 {} --h2 {} --ref {} --minGenotyped 1 --sample {} --unassigned {}".format("/net/eichler/vol5/home/mchaisso/projects/pbgreedyphase", regionVCF.name, dipSamFile.name, sv.chrom, fetchStart, fetchEnd, hap0SamFile.name, hap1SamFile.name, args.ref, args.sample, unassignedSamFile.name ) subprocess.call(partitionCommand.split()) sams = [hap0SamFile.name, hap1SamFile.name, unassignedSamFile.name] haps = ["0", "1", "u"] sem.acquire() for i in range(0,3): samHandle = pysam.AlignmentFile(sams[i], 'r') for read in samHandle.fetch(): nBases+=min(read.reference_end, fetchEnd) - max(fetchStart,read.reference_start) WriteSeq(readsFile, read.seq, read.query_name + "/" + haps[i]) sem.release() else: sem.acquire() for b in range(0,len(bamFiles)): for read in bamFiles[b].fetch(sv.chrom, fetchStart, fetchEnd+1): nBases+=min(read.reference_end, fetchEnd) - max(fetchStart,read.reference_start) WriteSeq(readsFile, read.seq, read.query_name) sem.release() readsFile.close() # rsFile = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=".sam", delete=False, mode='w') dbsFile = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=sSuffix, delete=False, mode='w') commandOptions = " -maxMatch 25 -sdpMaxAnchorsPerPosition 5 -sdpTupleSize 10 -sam -bestn 1 -affineOpen 5 -affineExtend 5 -nproc 8 -out /dev/stdout -minAlignLength {} ".format(int(1.5*args.flank)) dbCommand = "{} {} {} -preserveReadTitle -clipping soft ".format(args.blasr, readsFile.name, dbFile.name, dbsFile.name) + commandOptions tempFileNames.append(dbsFile.name) dn = open(os.devnull) regionLength = fetchEnd - fetchStart coverage = nBases/regionLength sys.stderr.write("coverage: " + str(coverage)+ "\n") if args.genotypeVcf is not None: genotype=True else: genotype=False fs=0 rs=0 if coverage < args.maxCoverage: proc=subprocess.Popen(dbCommand.split(),stderr=dn,stdout=subprocess.PIPE) alnLines=codecs.latin_1_decode(proc.stdout.read())[0] proc.wait() dbsFile.close() # WaitOnFile(dbsFile.name) fs=os.path.getsize(dbsFile.name) rs=os.path.getsize(readsFile.name) cov = CountRefCoverage(alnLines, genotype) else: sys.stderr.write("Skipping event from coverage " + str(coverage) + "\n") cov = { "db": 0, "re": 0} sys.stderr.write(svs[0].svType+ " " + str(cov) + "\n") if args.genotypeVcf is False and "db" in cov and cov["db"] < 500: print( "spliced " + str(len(svs)) ) print( "cov:") print( cov) print( "{}:{}-{}".format(svs[0].chrom,svs[0].start,svs[0].end)) cleanup = "/bin/rm " + " ".join(tempFileNames) if args.keep is False: subprocess.call(cleanup.split()) else: print(cleanup) if args.genotypeVcf is None: dbCov = cov["db"] rCov = cov["re"] results="\n".join(["{}:{}-{}\t{}\t{}".format(sv.chrom,sv.start,sv.end,dbCov,rCov ) for sv in svs]) else: results="\n".join(["{}:{}-{}\t{}\t{}\t{}\t{}\t{}\t{}".format(sv.chrom,sv.start,sv.end,\ cov["db"][0],cov["db"][1],cov["db"]['u'],\ cov["re"][0],cov["re"][1],cov["re"]['u']\ ) for sv in svs]) sys.stdout.write(results + "\n") return results
def __init__(self): self.entity_code_dict = { "amp": 0x0026, "pound": 0x00A3, "aacute": 0x00E1, "ampersand": 0x0026, "Aacute": 0x00C1, "acirc": 0x00E2, "Acirc": 0x00C2, "agrave": 0x00E0, "Agrave": 0x00C0, "aring": 0x00E5, "Aring": 0x00C5, "atilde": 0x00E3, "Atilde": 0x00C3, "auml": 0x00E4, "Auml": 0x00C4, "aelig": 0x00E6, "AElig": 0x00C6, "ccedil": 0x00E7, "Ccedil": 0x00C7, "eth": 0x00F0, "ETH": 0x00D0, "eacute": 0x00E9, "Eacute": 0x00C9, "ecirc": 0x00EA, "Ecirc": 0x00CA, "egrave": 0x00E8, "Egrave": 0x00C8, "euml": 0x00EB, "Euml": 0x00CB, "iacute": 0x00ED, "Iacute": 0x00CD, "icirc": 0x00EE, "Icirc": 0x00CE, "igrave": 0x00EC, "Igrave": 0x00CC, "iuml": 0x00EF, "Iuml": 0x00CF, "ntilde": 0x00F1, "Ntilde": 0x00D1, "oacute": 0x00F3, "Oacute": 0x00D3, "ocirc": 0x00F4, "Ocirc": 0x00D4, "ograve": 0x00F2, "Ograve": 0x00D2, "oslash": 0x00F8, "Oslash": 0x00D8, "otilde": 0x00F5, "Otilde": 0x00D5, "ouml": 0x00F6, "Ouml": 0x00D6, "szlig": 0x00DF, "thorn": 0x00FE, "THORN": 0x00DE, "uacute": 0x00FA, "Uacute": 0x00DA, "ucirc": 0x00FB, "Ucirc": 0x00DB, "ugrave": 0x00F9, "Ugrave": 0x00D9, "uuml": 0x00FC, "Uuml": 0x00DC, "yacute": 0x00FD, "Yacute": 0x00DD, "yuml": 0x00FF, "abreve": 0x0103, "Abreve": 0x0102, "amacr": 0x0101, "Amacr": 0x0100, "aogon": 0x0105, "Aogon": 0x0104, "cacute": 0x0107, "Cacute": 0x0106, "ccaron": 0x010D, "Ccaron": 0x010C, "ccirc": 0x0109, "Ccirc": 0x0108, "cdot": 0x010B, "Cdot": 0x010A, "dcaron": 0x010F, "Dcaron": 0x010E, "dstrok": 0x0111, "Dstrok": 0x0110, "ecaron": 0x011B, "Ecaron": 0x011A, "edot": 0x0117, "Edot": 0x0116, "emacr": 0x0113, "Emacr": 0x0112, "eogon": 0x0119, "Eogon": 0x0118, "gacute": 0x01F5, "gbreve": 0x011F, "Gbreve": 0x011E, "Gcedil": 0x0122, "gcirc": 0x011D, "Gcirc": 0x011C, "gdot": 0x0121, "Gdot": 0x0120, "hcirc": 0x0125, "Hcirc": 0x0124, "hstrok": 0x0127, "Hstrok": 0x0126, "Idot": 0x0130, "Imacr": 0x012A, "imacr": 0x012B, "ijlig": 0x0133, "IJlig": 0x0132, "inodot": 0x0131, "iogon": 0x012F, "Iogon": 0x012E, "itilde": 0x0129, "Itilde": 0x0128, "jcirc": 0x0135, "Jcirc": 0x0134, "kcedil": 0x0137, "Kcedil": 0x0136, "kgreen": 0x0138, "lacute": 0x013A, "Lacute": 0x0139, "lcaron": 0x013E, "Lcaron": 0x013D, "lcedil": 0x013C, "Lcedil": 0x013B, "lmidot": 0x0140, "Lmidot": 0x013F, "lstrok": 0x0142, "Lstrok": 0x0141, "nacute": 0x0144, "Nacute": 0x0143, "eng": 0x014B, "ENG": 0x014A, "napos": 0x0149, "ncaron": 0x0148, "Ncaron": 0x0147, "ncedil": 0x0146, "Ncedil": 0x0145, "odblac": 0x0151, "Odblac": 0x0150, "Omacr": 0x014C, "omacr": 0x014D, "oelig": 0x0153, "OElig": 0x0152, "racute": 0x0155, "Racute": 0x0154, "rcaron": 0x0159, "Rcaron": 0x0158, "rcedil": 0x0157, "Rcedil": 0x0156, "sacute": 0x015B, "Sacute": 0x015A, "scaron": 0x0161, "Scaron": 0x0160, "scedil": 0x015F, "Scedil": 0x015E, "scirc": 0x015D, "Scirc": 0x015C, "tcaron": 0x0165, "Tcaron": 0x0164, "tcedil": 0x0163, "Tcedil": 0x0162, "tstrok": 0x0167, "Tstrok": 0x0166, "ubreve": 0x016D, "Ubreve": 0x016C, "udblac": 0x0171, "Udblac": 0x0170, "umacr": 0x016B, "Umacr": 0x016A, "uogon": 0x0173, "Uogon": 0x0172, "uring": 0x016F, "Uring": 0x016E, "utilde": 0x0169, "Utilde": 0x0168, "wcirc": 0x0175, "Wcirc": 0x0174, "ycirc": 0x0177, "Ycirc": 0x0176, "Yuml": 0x0178, "zacute": 0x017A, "Zacute": 0x0179, "zcaron": 0x017E, "Zcaron": 0x017D, "zdot": 0x017C, "Zdot": 0x017B, "agr": 0x03B1, "Agr": 0x0391, "bgr": 0x03B2, "Bgr": 0x0392, "ggr": 0x03B3, "Ggr": 0x0393, "dgr": 0x03B4, "Dgr": 0x0394, "egr": 0x03B5, "Egr": 0x0395, "zgr": 0x03B6, "Zgr": 0x0396, "eegr": 0x03B7, "EEgr": 0x0397, "thgr": 0x03B8, "THgr": 0x0398, "igr": 0x03B9, "Igr": 0x0399, "kgr": 0x03BA, "Kgr": 0x039A, "lgr": 0x03BB, "Lgr": 0x039B, "mgr": 0x03BC, "Mgr": 0x039C, "ngr": 0x03BD, "Ngr": 0x039D, "xgr": 0x03BE, "Xgr": 0x039E, "ogr": 0x03BF, "Ogr": 0x039F, "pgr": 0x03C0, "Pgr": 0x03A0, "rgr": 0x03C1, "Rgr": 0x03A1, "sgr": 0x03C3, "Sgr": 0x03A3, "sfgr": 0x03C2, "tgr": 0x03C4, "Tgr": 0x03A4, "ugr": 0x03C5, "Ugr": 0x03A5, "phgr": 0x03C6, "PHgr": 0x03A6, "khgr": 0x03C7, "KHgr": 0x03A7, "psgr": 0x03C8, "PSgr": 0x03A8, "ohgr": 0x03C9, "OHgr": 0x03A9, "half": 0x00BD, "frac12": 0x00BD, "frac14": 0x00BC, "frac34": 0x00BE, "frac18": 0x215B, "frac38": 0x215C, "frac58": 0x215D, "frac78": 0x215E, "sup1": 0x00B9, "sup2": 0x00B2, "sup3": 0x00B3, "plus": 0x002B, "plusmn": 0x00B1, "equals": 0x003D, "gt": 0x003E, "divide": 0x00F7, "times": 0x00D7, "curren": 0x00A4, "pound": 0x00A3, "dollar": 0x0024, "cent": 0x00A2, "yen": 0x00A5, "num": 0x0023, "percnt": 0x0025, "ast": 0x2217, "commat": 0x0040, "lsqb": 0x005B, "bsol": 0x005C, "rsqb": 0x005D, "lcub": 0x007B, "horbar": 0x2015, "verbar": 0x007C, "rcub": 0x007D, "micro": 0x00B5, "ohm": 0x2126, "deg": 0x00B0, "ordm": 0x00BA, "ordf": 0x00AA, "sect": 0x00A7, "para": 0x00B6, "middot": 0x00B7, "larr": 0x2190, "rarr": 0x2192, "uarr": 0x2191, "darr": 0x2193, "copy": 0x00A9, "reg": 0x00AF, "trade": 0x2122, "brvbar": 0x00A6, "not": 0x00AC, "sung": 0x2669, "excl": 0x0021, "iexcl": 0x00A1, "quot": 0x0022, "apos": 0x0027, "lpar": 0x0028, "rpar": 0x0029, "comma": 0x002C, "lowbar": 0x005F, "hyphen": 0xE4F8, "period": 0x002E, "sol": 0x002F, "colon": 0x003A, "semi": 0x003B, "quest": 0x003F, "iquest": 0x00BF, "laquo": 0x00AB, "raquo": 0x00BB, "lsquo": 0x2018, "rsquo": 0x2019, "ldquo": 0x201C, "rdquo": 0x201D, "nbsp": 0x00A0, "shy": 0x00AD, "acute": 0x00B4, "breve": 0x02D8, "caron": 0x02C7, "cedil": 0x00B8, "circ": 0x2218, "dblac": 0x02DD, "die": 0x00A8, "dot": 0x02D9, "grave": 0x0060, "macr": 0x00AF, "ogon": 0x02DB, "ring": 0x02DA, "tilde": 0x007E, "uml": 0x00A8, "emsp": 0x2003, "ensp": 0x2002, "emsp13": 0x2004, "emsp14": 0x2005, "numsp": 0x2007, "puncsp": 0x2008, "thinsp": 0x2009, "hairsp": 0x200A, "mdash": 0x2014, "ndash": 0x2013, "dash": 0x2010, "blank": 0x2423, "hellip": 0x2026, "nldr": 0x2025, "frac13": 0x2153, "frac23": 0x2154, "frac15": 0x2155, "frac25": 0x2156, "frac35": 0x2157, "frac45": 0x2158, "frac16": 0x2159, "frac56": 0x215A, "incare": 0x2105, "block": 0x2588, "uhblk": 0x2580, "lhblk": 0x2584, "blk14": 0x2591, "blk12": 0x2592, "blk34": 0x2593, "marker": 0x25AE, "cir": 0x25CB, "squ": 0x25A1, "rect": 0x25AD, "utri": 0x25B5, "dtri": 0x25BF, "star": 0x22C6, "bull": 0x2022, "squf": 0x25AA, "utrif": 0x25B4, "dtrif": 0x25BE, "ltrif": 0x25C2, "rtrif": 0x25B8, "clubs": 0x2663, "diams": 0x2666, "hearts": 0x2665, "spades": 0x2660, "malt": 0x2720, "dagger": 0x2020, "Dagger": 0x2021, "check": 0x2713, "cross": 0x2717, "sharp": 0x266F, "flat": 0x266D, "male": 0x2642, "female": 0x2640, "phone": 0x260E, "telrec": 0x2315, "copysr": 0x2117, "caret": 0x2041, "lsquor": 0x201A, "ldquor": 0x201E, "fflig": 0xFB00, "filig": 0xFB01, "ffilig": 0xFB03, "ffllig": 0xFB04, "fllig": 0xFB02, "mldr": 0x2026, "rdquor": 0x201C, "rsquor": 0x2018, "vellip": 0x22EE, "hybull": 0x2043, "loz": 0x25CA, "lozf": 0x2726, "ltri": 0x25C3, "rtri": 0x25B9, "starf": 0x2605, "natur": 0x266E, "rx": 0x211E, "sext": 0x2736, "target": 0x2316, "dlcrop": 0x230D, "drcrop": 0x230C, "ulcrop": 0x230F, "urcrop": 0x230E, "boxh": 0x2500, "boxv": 0x2502, "boxur": 0x2514, "boxul": 0x2518, "boxdl": 0x2510, "boxdr": 0x250C, "boxvr": 0x251C, "boxhu": 0x2534, "boxvl": 0x2524, "boxhd": 0x252C, "boxvh": 0x253C, "boxvR": 0x255E, "boxhU": 0x2567, "boxvL": 0x2561, "boxhD": 0x2564, "boxvH": 0x256A, "boxH": 0x2550, "boxV": 0x2551, "boxUR": 0x2558, "boxUL": 0x255B, "boxDL": 0x2555, "boxDR": 0x2552, "boxVR": 0x255F, "boxHU": 0x2568, "boxVL": 0x2562, "boxHD": 0x2565, "boxVH": 0x256B, "boxVr": 0x2560, "boxHu": 0x2569, "boxVl": 0x2563, "boxHd": 0x2566, "boxVh": 0x256C, "boxuR": 0x2559, "boxUl": 0x255C, "boxdL": 0x2556, "boxDr": 0x2553, "boxUr": 0x255A, "boxuL": 0x255D, "boxDl": 0x2557, "boxdR": 0x2554 } self.entity_char_dict = {} for ent, code in self.entity_code_dict.iteritems(): try: self.entity_char_dict[ent] = latin_1_decode(chr(code),"utf8")[0] except ValueError,UnicodeEncodeError: self.entity_char_dict[ent] = unichr(code)
time.sleep(sleeptime) of = open(lockfile, "w") of.close() sys.stderr.write("making query\n") query = '\n'.join(queries).rstrip() + "\n" command = "jellyfish query --load --sequence=/dev/stdin {}".format(jf) sys.stderr.write("Submitting query " + str(len(queries)) + "\n") proc = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stdin=subprocess.PIPE) proc_stdout = proc.communicate(input=bytes(query, 'utf-8')) allLines = codecs.latin_1_decode(proc_stdout[0])[0] jfRes = allLines.split("\n") print("il has " + str(len(jfRes))) #os.killpg(os.getpgid(proc.pid), signal.SIGTERM) # Send the signal to all the process groups if jfRes[-1] == '': del jfRes[-1] #delete empty value if any([ len(jfRes) != exp_result_len, #make sure that there are the correct number of HG JF results len(queries) % 2 != 0, #make sure there are 2 queries for each variant ]): print("one of these is not like the other " + str(len(jfRes)) + " " + str(exp_result_len) + " " + str(len(queries)))
def decode(self, input, final=False): return codecs.latin_1_decode(input, self.errors)[0]
def from_rxstring(s): return codecs.latin_1_decode(s)[0]