예제 #1
0
    def test_interop_array(self):
        arr = System.Array[System.Byte](b"abc")
        ars = System.ArraySegment[System.Byte](arr)
        mem = System.Memory[System.Byte](arr)
        rom = System.ReadOnlyMemory[System.Byte](arr)

        self.assertEqual(codecs.latin_1_decode(arr), ("abc", 3))
        self.assertEqual(codecs.latin_1_decode(ars), ("abc", 3))
        self.assertEqual(codecs.latin_1_decode(mem), ("abc", 3))
        self.assertEqual(codecs.latin_1_decode(rom), ("abc", 3))
예제 #2
0
파일: widgets.py 프로젝트: madewokherd/urk
    def write(self, text, line_ending='\n', fg=None):
        if not isinstance(text, unicode):
            try:
                text = codecs.utf_8_decode(text)[0]
            except:
                text = codecs.latin_1_decode(text)[0]
        tags, text = parse_mirc.parse_mirc(text)

        if fg:
            tags.append({'data': ("foreground", isinstance(fg, basestring) and ('#%s'%fg) or parse_mirc.get_mirc_color(fg)), 'from': 0, 'to': len(text)})

        buffer = self.get_buffer()
        
        cc = buffer.get_char_count()

        buffer.insert_with_tags_by_name(
            buffer.get_end_iter(),
            text + line_ending,
            'indent'
            )

        for tag in tags:
            tag_name = str(tag['data'])
   
            if not tag_table.lookup(tag_name):
                buffer.create_tag(tag_name, **prop_to_gtk(self, tag['data']))

            buffer.apply_tag_by_name(
                tag_name, 
                buffer.get_iter_at_offset(tag['from'] + cc),
                buffer.get_iter_at_offset(tag['to'] + cc)
                )
예제 #3
0
    def write(self, text, line_ending='\n', fg=None):
        if not isinstance(text, unicode):
            try:
                text = codecs.utf_8_decode(text)[0]
            except:
                text = codecs.latin_1_decode(text)[0]
        tags, text = parse_mirc.parse_mirc(text)

        if fg:
            tags.append({
                'data':
                ("foreground", isinstance(fg, basestring) and ('#%s' % fg)
                 or parse_mirc.get_mirc_color(fg)),
                'from':
                0,
                'to':
                len(text)
            })

        buffer = self.get_buffer()

        cc = buffer.get_char_count()

        for tag in tags:
            tag_name = str(tag['data'])

            if not tag_table.lookup(tag_name):
                buffer.create_tag(tag_name, **prop_to_Gtk(self, tag['data']))

            buffer.apply_tag_by_name(
                tag_name, buffer.get_iter_at_offset(tag['from'] + cc),
                buffer.get_iter_at_offset(tag['to'] + cc))

        buffer.insert_with_tags(buffer.get_end_iter(), text + line_ending,
                                indent_tag)
예제 #4
0
 def _as_unicode(s):
     """Turn byte string or unicode string into a unicode string."""
     if isinstance(s, str):
         return s
     #Assume it is a bytes string
     #Note ISO-8859-1 aka Latin-1 preserves first 256 chars
     return codecs.latin_1_decode(s)[0]
예제 #5
0
파일: _py3k.py 프로젝트: Pfiver/RNA-Seqlyze
 def _as_unicode(s):
     """Turn byte string or unicode string into a unicode string."""
     if isinstance(s, str):
         return s
     #Assume it is a bytes string
     #Note ISO-8859-1 aka Latin-1 preserves first 256 chars
     return codecs.latin_1_decode(s)[0]
예제 #6
0
    def _read_json(self, url):
        """Performs an HTTP GET on the given URL and interprets the
        response as JSON."""

        try:
            try:
                conn = self._get_conn()
                conn.request('GET', url)

                res = conn.getresponse()

                if res.status != 200:
                    raise CrabError('server error: ' + self._read_error(res))

                return json.loads(latin_1_decode(res.read(), 'replace')[0])

            # except HTTPException as err:
            except HTTPException:
                err = sys.exc_info()[1]
                raise CrabError('HTTP error: ' + str(err))

            # except socket.error as err:
            except socket.error:
                err = sys.exc_info()[1]
                raise CrabError('socket error: ' + str(err))

            # except ValueError as err:
            except ValueError:
                err = sys.exc_info()[1]
                raise CrabError('did not understand response: ' + str(err))

        finally:
            conn.close()
예제 #7
0
파일: __init__.py 프로젝트: grahambell/crab
    def _read_json(self, url):
        """Performs an HTTP GET on the given URL and interprets the
        response as JSON."""

        try:
            try:
                conn = self._get_conn()
                conn.request('GET', url)

                res = conn.getresponse()

                if res.status != 200:
                    raise CrabError('server error: ' + self._read_error(res))

                return json.loads(latin_1_decode(res.read(), 'replace')[0])

            # except HTTPException as err:
            except HTTPException:
                err = sys.exc_info()[1]
                raise CrabError('HTTP error: ' + str(err))

            # except socket.error as err:
            except socket.error:
                err = sys.exc_info()[1]
                raise CrabError('socket error: ' + str(err))

            # except ValueError as err:
            except ValueError:
                err = sys.exc_info()[1]
                raise CrabError('did not understand response: ' + str(err))

        finally:
            conn.close()
예제 #8
0
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
예제 #10
0
    def _read_data(cls):
        """
        Read imaging photometry data from the file.
        """

        instrument_names = [
            (cls.UFTI, 'UFTI'),
            (cls.UIST, 'UIST'),
            (cls.WFCAM, 'WFCAM'),
        ]

        data = json.loads(
            latin_1_decode(get_data('ukirt_itc', 'data/phot.json'))[0])

        # Process sky data.
        data_sky = data.get('sky')
        if data_sky is None:
            raise UKIRTITCError('Data file did not contain "sky" section')
        cls._sky = {}
        for (filter_, values) in data_sky.items():
            if filter_ not in cls.FILTERS:
                raise UKIRTITCError(
                    'Sky filter "{0}" not recognised'.format(filter_))
            cls._sky[filter_] = SkyInfo(*values)

        # Process extinction data.
        data_ext = data.get('extinction')
        if data_ext is None:
            raise UKIRTITCError(
                'Data file did not contain "extinction" section')
        cls._extinction = {}
        for (filter_, value) in data_ext.items():
            if filter_ not in cls.FILTERS:
                raise UKIRTITCError(
                    'Extinction filter "{0}" not recognised'.format(filter_))
            cls._extinction[filter_] = value

        # Process instrument data.
        data_instruments = data.get('instrument')
        if data_instruments is None:
            raise UKIRTITCError(
                'Data file did not contain "instrument" section')
        cls._info = OrderedDict()
        for (instrument, name) in instrument_names:
            data_instrument = data_instruments.get(name)
            if data_instrument is None:
                raise UKIRTITCError('Could not find instrument information '
                                    'for "{0}"'.format(name))

            info_obj = InstrumentInfo(name=name, **data_instrument)

            for filter_ in info_obj.zeropoint:
                if filter_ not in cls.FILTERS:
                    raise UKIRTITCError(
                        'Instrument "{0}" filter "{1}" not recognised'.format(
                            name, filter_))

            cls._info[instrument] = info_obj
def WaitOnFile(fn):
    cmd=["lsof",fn]
    while True:
        res=subprocess.Popen(cmd, stdout=subprocess.PIPE)
        l=codecs.latin_1_decode(res.stdout.read())[0]
        res.wait()
        if l == '':
            return
        sys.stderr.write("Waiting on temporary file.\n")
예제 #12
0
파일: html2txt.py 프로젝트: skeptycal/bin
def name2cp(k):
    ''' translate common character name into unicode code point '''
    if k == "apos":
        return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"):  # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"):
            return int(k[2:-1])  # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
예제 #13
0
def strip_text(text):
    text = latin_1_decode(text)[0]
    text = normalize('NFD', text).encode('ascii', 'ignore')

    text = re.sub('&mdash+;', ' ', text)  # convert mdash to " "
    #    text = re.sub('&', ' and ', text)   # convert mdash to " "
    text = pte.replace_entities(text)
    #    text = re.sub('&[A-Za-z]+;', '', text)   # convert ampersand stuff to ""
    text = re.sub('<[^>]*>', ' ', text)  # strip HTML markup
    text = re.sub('\s+', ' ', text)  # strip whitespace

    return text
예제 #14
0
def check_file(path, line):
    """Test whether the file is the expected ``.vcf.gz`` file
    """
    raw = path.read(mode="rb")
    assert raw[0] == 0x1F
    assert raw[1] == 0x8B
    # compare actual result with expected
    inflated = gzip.decompress(raw)
    RESULT = codecs.latin_1_decode(inflated)[0]
    LINE = "20\t100\t.\tC\tT\t.\t.\t.\tGT\t0/1\t0/0\t1/1\n"
    EXPECTED = MEDIUM_HEADER + LINE
    assert EXPECTED == RESULT
예제 #15
0
def strip_text (text):
    text = latin_1_decode(text)[0]
    text = normalize('NFD',text).encode('ascii','ignore')

    text = re.sub('&mdash+;', ' ', text)   # convert mdash to " "
#    text = re.sub('&amp;', ' and ', text)   # convert mdash to " "
    text = pte.replace_entities(text)
#    text = re.sub('&[A-Za-z]+;', '', text)   # convert ampersand stuff to ""
    text = re.sub('<[^>]*>', ' ', text)   # strip HTML markup
    text = re.sub('\s+', ' ', text)      # strip whitespace

    return text
예제 #16
0
파일: bgzf.py 프로젝트: xm1490/biopython
def _load_bgzf_block(handle, text_mode=False):
    """Load the next BGZF block of compressed data (PRIVATE).

    Returns a tuple (block size and data), or at end of file
    will raise StopIteration.
    """
    magic = handle.read(4)
    if not magic:
        # End of file - should we signal this differently now?
        # See https://www.python.org/dev/peps/pep-0479/
        raise StopIteration
    if magic != _bgzf_magic:
        raise ValueError(r"A BGZF (e.g. a BAM file) block should start with "
                         r"%r, not %r; handle.tell() now says %r" %
                         (_bgzf_magic, magic, handle.tell()))
    gzip_mod_time, gzip_extra_flags, gzip_os, extra_len = struct.unpack(
        "<LBBH", handle.read(8))

    block_size = None
    x_len = 0
    while x_len < extra_len:
        subfield_id = handle.read(2)
        subfield_len = struct.unpack("<H", handle.read(2))[0]  # uint16_t
        subfield_data = handle.read(subfield_len)
        x_len += subfield_len + 4
        if subfield_id == _bytes_BC:
            assert subfield_len == 2, "Wrong BC payload length"
            assert block_size is None, "Two BC subfields?"
            block_size = struct.unpack("<H", subfield_data)[0] + 1  # uint16_t
    assert x_len == extra_len, (x_len, extra_len)
    assert block_size is not None, "Missing BC, this isn't a BGZF file!"
    # Now comes the compressed data, CRC, and length of uncompressed data.
    deflate_size = block_size - 1 - extra_len - 19
    d = zlib.decompressobj(-15)  # Negative window size means no headers
    data = d.decompress(handle.read(deflate_size)) + d.flush()
    expected_crc = handle.read(4)
    expected_size = struct.unpack("<I", handle.read(4))[0]
    if expected_size != len(data):
        raise RuntimeError("Decompressed to %i, not %i" %
                           (len(data), expected_size))
    # Should cope with a mix of Python platforms...
    crc = zlib.crc32(data)
    if crc < 0:
        crc = struct.pack("<i", crc)
    else:
        crc = struct.pack("<I", crc)
    if expected_crc != crc:
        raise RuntimeError("CRC is %s, not %s" % (crc, expected_crc))
    if text_mode:
        # Note ISO-8859-1 aka Latin-1 preserves first 256 chars
        return block_size, codecs.latin_1_decode(data)[0]
    else:
        return block_size, data
예제 #17
0
 def get_entitydefs():
     from codecs import latin_1_decode
     try:
         htmlentitydefs.name2codepoint
     except AttributeError:
         entitydefs = {}
         for name, char in htmlentitydefs.entitydefs.items():
             uc = latin_1_decode(char)[0]
             if uc.startswith("&#") and uc.endswith(";"):
                 uc = unescape_charref(uc[2:-1], None)
             codepoint = ord(uc)
             entitydefs[name] = codepoint
     else:
         entitydefs = htmlentitydefs.name2codepoint
     return entitydefs
예제 #18
0
 def get_entitydefs():
     from codecs import latin_1_decode
     try:
         htmlentitydefs.name2codepoint
     except AttributeError:
         entitydefs = {}
         for name, char in htmlentitydefs.entitydefs.items():
             uc = latin_1_decode(char)[0]
             if uc.startswith("&#") and uc.endswith(";"):
                 uc = unescape_charref(uc[2:-1], None)
             codepoint = ord(uc)
             entitydefs[name] = codepoint
     else:
         entitydefs = htmlentitydefs.name2codepoint
     return entitydefs
예제 #19
0
파일: __init__.py 프로젝트: obforfair/crab
    def _read_json(self):
        """Attempts to interpret the HTTP PUT body as JSON and return
        the corresponding Python object.

        There could be a correpsonding _write_json method, but there
        is little need as the caller can just do: return json.dumps(...)
        and the CherryPy handler needs to pass the response back with
        return."""

        message = latin_1_decode(cherrypy.request.body.read(), 'replace')[0]

        try:
            return json.loads(message)
        except ValueError:
            cherrypy.log.error('CrabError: Failed to read JSON: ' + message)
            raise HTTPError(400, message='Did not understand JSON')
예제 #20
0
    def _read_json(self):
        """Attempts to interpret the HTTP PUT body as JSON and return
        the corresponding Python object.

        There could be a correpsonding _write_json method, but there
        is little need as the caller can just do: return json.dumps(...)
        and the CherryPy handler needs to pass the response back with
        return."""

        message = latin_1_decode(cherrypy.request.body.read(), 'replace')[0]

        try:
            return json.loads(message)
        except ValueError:
            cherrypy.log.error('CrabError: Failed to read JSON: ' + message)
            raise HTTPError(400, message='Did not understand JSON')
예제 #21
0
    def _write_json(self, url, obj, read=False):
        """Converts the given object to JSON and sends it with an
        HTTP PUT to the given URL.

        Optionally attempts to read JSON from the response."""

        try:
            try:
                conn = self._get_conn()
                conn.request('PUT', url, json.dumps(obj))

                res = conn.getresponse()

                if res.status != 200:
                    raise CrabError('server error: ' + self._read_error(res))

                if read:
                    response = latin_1_decode(res.read(), 'replace')[0]

                    # Check we got a response before attempting to decode
                    # it as JSON.  (Some messages did not have responses
                    # for previous server versions.)
                    if response:
                        return json.loads(response)
                    else:
                        return {}

            #except HTTPException as err:
            #except HTTPException, err:
            except HTTPException:
                err = sys.exc_info()[1]
                raise CrabError('HTTP error: ' + str(err))

            #except socket.error as err:
            #except socket.error, err:
            except socket.error:
                err = sys.exc_info()[1]
                raise CrabError('socket error: ' + str(err))

            #except ValueError as err:
            #except ValueError, err:
            except ValueError:
                err = sys.exc_info()[1]
                raise CrabError('did not understand response: ' + str(err))

        finally:
            conn.close()
예제 #22
0
파일: __init__.py 프로젝트: namely/crab
    def _write_json(self, url, obj, read=False):
        """Converts the given object to JSON and sends it with an
        HTTP PUT to the given URL.

        Optionally attempts to read JSON from the response."""

        try:
            try:
                conn = self._get_conn()
                conn.request('PUT', url, json.dumps(obj))

                res = conn.getresponse()

                if res.status != 200:
                    raise CrabError('server error: ' + self._read_error(res))

                if read:
                    response = latin_1_decode(res.read(), 'replace')[0]

                    # Check we got a response before attempting to decode
                    # it as JSON.  (Some messages did not have responses
                    # for previous server versions.)
                    if response:
                        return json.loads(response)
                    else:
                        return {}

            #except HTTPException as err:
            #except HTTPException, err:
            except HTTPException:
                err = sys.exc_info()[1]
                raise CrabError('HTTP error: ' + str(err))

            #except socket.error as err:
            #except socket.error, err:
            except socket.error:
                err = sys.exc_info()[1]
                raise CrabError('socket error: ' + str(err))

            #except ValueError as err:
            #except ValueError, err:
            except ValueError:
                err = sys.exc_info()[1]
                raise CrabError('did not understand response: ' + str(err))

        finally:
            conn.close()
예제 #23
0
파일: __init__.py 프로젝트: namely/crab
    def _read_error(self, res):
        """Determine the error message to show based on an
        unsuccessful HTTP response.

        Currently use the HTTP status phrase or the first
        paragraph of the body, if found with a regular expression."""

        message = res.reason

        try:
            body = latin_1_decode(res.read(), 'replace')[0]
            match = re.search('<p>([^<]*)', body)
            if match:
                message = match.group(1)
        except:
            pass

        return message
예제 #24
0
def file_write(file_name):
    if not os.path.exists(base_dir + get_path()+file_name):
        print("File", file_name, "Not Found")
        return -3
    file = open(base_dir + get_path()+file_name, 'rb').read()
    cont = codecs.latin_1_decode(file)
    result = requests.post(ns_ip + '/writeFile', json=json.loads(json.dumps({'path': get_path() + file_name,
                                                                             'cont': cont[0]})))

    if result.status_code == 500:
        return -3
    if result.json()['resp'] == 404:
        print("Not Found")
        return -3
    if result.json()['resp'] == 500:
        print("Server error")
        return -3
    return 1
예제 #25
0
    def _read_error(self, res):
        """Determine the error message to show based on an
        unsuccessful HTTP response.

        Currently use the HTTP status phrase or the first
        paragraph of the body, if found with a regular expression."""

        message = res.reason

        try:
            body = latin_1_decode(res.read(), 'replace')[0]
            match = re.search('<p>([^<]*)', body)
            if match:
                message = match.group(1)
        except:
            pass

        return message
예제 #26
0
def internet_decode(input, errors='strict', final=False):
    """The core decoding function"""
    try:
        # First try utf-8. This should be the usual case by far.
        return codecs.utf_8_decode(input, errors, final)
    except UnicodeDecodeError:
        try:
            # If that fails, try windows-1252 (aka cp1252), which defines more characters than latin1,
            # but will fail for five particular bytes: 0x81, 0x8D, 0x8F, 0x90, 0x9D
            return codecs.charmap_decode(input, errors, encodings.cp1252.decoding_table)
        except UnicodeDecodeError:
            # and finally, try latin-1, which never fails, but defines 27 less characters than cp1252.
            return codecs.latin_1_decode(input, errors)
    except UnicodeEncodeError:
        # Was that thing already unicode? Then it's already decoded.
        if isinstance(input, unicode):
            return (input, len(input))
        else:
            raise
예제 #27
0
def internet_decode(input, errors='strict', final=False):
    """The core decoding function"""
    try:
        # First try utf-8. This should be the usual case by far.
        return codecs.utf_8_decode(input, errors, final)
    except UnicodeDecodeError:
        try:
            # If that fails, try windows-1252 (aka cp1252), which defines more characters than latin1,
            # but will fail for five particular bytes: 0x81, 0x8D, 0x8F, 0x90, 0x9D
            return codecs.charmap_decode(input, errors,
                                         encodings.cp1252.decoding_table)
        except UnicodeDecodeError:
            # and finally, try latin-1, which never fails, but defines 27 less characters than cp1252.
            return codecs.latin_1_decode(input, errors)
    except UnicodeEncodeError:
        # Was that thing already unicode? Then it's already decoded.
        if isinstance(input, unicode):
            return (input, len(input))
        else:
            raise
예제 #28
0
    def test_codecs_builtins(self):
        s = "abc"

        encoded = codecs.utf_8_encode(s)
        self.assertEqual(s, codecs.utf_8_decode(encoded[0])[0])

        encoded = codecs.utf_7_encode(s)
        self.assertEqual(s, codecs.utf_7_decode(encoded[0])[0])

        encoded = codecs.utf_16_encode(s)
        self.assertEqual(s, codecs.utf_16_decode(encoded[0])[0])

        encoded = codecs.utf_16_le_encode(s)
        self.assertEqual(s, codecs.utf_16_le_decode(encoded[0])[0])

        encoded = codecs.utf_16_be_encode(s)
        self.assertEqual(s, codecs.utf_16_be_decode(encoded[0])[0])

        encoded = codecs.utf_32_encode(s)
        self.assertEqual(s, codecs.utf_32_decode(encoded[0])[0])

        encoded = codecs.utf_32_le_encode(s)
        self.assertEqual(s, codecs.utf_32_le_decode(encoded[0])[0])

        encoded = codecs.utf_32_be_encode(s)
        self.assertEqual(s, codecs.utf_32_be_decode(encoded[0])[0])

        encoded = codecs.utf_32_be_encode(s)
        self.assertEqual(s, codecs.utf_32_be_decode(encoded[0])[0])

        encoded = codecs.raw_unicode_escape_encode(s)
        self.assertEqual(s, codecs.raw_unicode_escape_decode(encoded[0])[0])

        encoded = codecs.unicode_escape_encode(s)
        self.assertEqual(s, codecs.unicode_escape_decode(encoded[0])[0])

        encoded = codecs.latin_1_encode(s)
        self.assertEqual(s, codecs.latin_1_decode(encoded[0])[0])

        encoded = codecs.ascii_encode(s)
        self.assertEqual(s, codecs.ascii_decode(encoded[0])[0])
    def _read_receiver_info(cls):
        """
        Read receiver information from the "receiver_info.json" file
        and store it in the class's "_info" attribute.

        Should not be called if "_info" has already been set up.
        """

        # List specifying how to map the receiver names to the "enum" values
        # used by this class.  (And the ordering in which to display them.)
        receiver_names = [
            (cls.A3, 'RxA3'),
            (cls.HARP, 'HARP'),
            (cls.WD, 'RxWD'),
        ]

        receiver_data = json.loads(latin_1_decode(
            get_data('jcmt_itc_heterodyne', 'data/receiver_info.json'))[0])

        for (receiver, name) in receiver_names:
            receiver_info = receiver_data.get(name)
            if receiver_info is None:
                raise Exception('Could not find receiver information '
                                'for "{0}".'.format(name))

            info_obj = ReceiverInfo(name=name, **receiver_info)

            if info_obj.array is not None:
                array_obj = ArrayInfo(footprint=None, **info_obj.array)

                array_obj = array_obj._replace(
                    scan_spacings=OrderedDict(array_obj.scan_spacings),
                    jiggle_patterns=OrderedDict(array_obj.jiggle_patterns),
                    footprint=(array_obj.size *
                               cos(radians(array_obj.f_angle))))

                info_obj = info_obj._replace(array=array_obj)

            cls._info[receiver] = info_obj
예제 #30
0
except OSError:
    pass

#idp_ = lambda _:"%s/%s" % (os.environ["IDPATH"],_)
#uidp_ = lambda _:_.replace(os.environ["IDPATH"],"$IDPATH")
idp_ = lambda _: "%s/%s" % (KEYDIR, _)
uidp_ = lambda _: _.replace(KEYDIR, "$KEYDIR")

gcp_ = lambda _: "%s/%s" % (os.environ["GEOCACHE"], _)

import sys, codecs
if sys.version_info.major > 2:
    u_ = lambda _: _  # py3 strings are unicode already
    b_ = lambda _: codecs.latin_1_encode(_)[
        0]  # from py3 unicode string to bytes
    d_ = lambda _: codecs.latin_1_decode(_)[
        0]  # from bytes to py3 unicode string
else:
    u_ = lambda _: unicode(_, "utf-8")  # py2 strings are bytes
    b_ = lambda _: _
    d_ = lambda _: _
pass


def findfile(base, name, relative=True):
    paths = []
    for root, dirs, files in os.walk(base):
        if name in files:
            path = os.path.join(root, name)
            paths.append(path[len(base) + 1:] if relative else path)
        pass
    pass
예제 #31
0
def latin1_to_utf8(text):
    "helper to convert when needed from latin input"
    return utf_8_encode(latin_1_decode(text)[0])[0]
예제 #32
0
 def decode(self, input, final = False):
     return codecs.latin_1_decode(input, self.errors)[0]
예제 #33
0
 def test_latin_1_decode(self):
     #sanity
     new_str, size = codecs.latin_1_decode("abc")
     self.assertEqual(new_str, u'abc')
     self.assertEqual(size, 3)
예제 #34
0
파일: park.py 프로젝트: 9r33n/park
 def un_b(x):
     return codecs.latin_1_decode(x)[0]
예제 #35
0
	def zzz(x):
		return codecs.latin_1_decode(x)[0]
예제 #36
0
파일: ttfonts.py 프로젝트: calston/tums
def latin1_to_utf8(text):
    "helper to convert when needed from latin input"
    return utf_8_encode(latin_1_decode(text)[0])[0]
예제 #37
0
    def __init__(self):
        self.entity_code_dict = {
            "amp": 0x0026,
            "pound": 0x00A3,
            "aacute": 0x00E1,
            "ampersand": 0x0026,
            "Aacute": 0x00C1,
            "acirc": 0x00E2,
            "Acirc": 0x00C2,
            "agrave": 0x00E0,
            "Agrave": 0x00C0,
            "aring": 0x00E5,
            "Aring": 0x00C5,
            "atilde": 0x00E3,
            "Atilde": 0x00C3,
            "auml": 0x00E4,
            "Auml": 0x00C4,
            "aelig": 0x00E6,
            "AElig": 0x00C6,
            "ccedil": 0x00E7,
            "Ccedil": 0x00C7,
            "eth": 0x00F0,
            "ETH": 0x00D0,
            "eacute": 0x00E9,
            "Eacute": 0x00C9,
            "ecirc": 0x00EA,
            "Ecirc": 0x00CA,
            "egrave": 0x00E8,
            "Egrave": 0x00C8,
            "euml": 0x00EB,
            "Euml": 0x00CB,
            "iacute": 0x00ED,
            "Iacute": 0x00CD,
            "icirc": 0x00EE,
            "Icirc": 0x00CE,
            "igrave": 0x00EC,
            "Igrave": 0x00CC,
            "iuml": 0x00EF,
            "Iuml": 0x00CF,
            "ntilde": 0x00F1,
            "Ntilde": 0x00D1,
            "oacute": 0x00F3,
            "Oacute": 0x00D3,
            "ocirc": 0x00F4,
            "Ocirc": 0x00D4,
            "ograve": 0x00F2,
            "Ograve": 0x00D2,
            "oslash": 0x00F8,
            "Oslash": 0x00D8,
            "otilde": 0x00F5,
            "Otilde": 0x00D5,
            "ouml": 0x00F6,
            "Ouml": 0x00D6,
            "szlig": 0x00DF,
            "thorn": 0x00FE,
            "THORN": 0x00DE,
            "uacute": 0x00FA,
            "Uacute": 0x00DA,
            "ucirc": 0x00FB,
            "Ucirc": 0x00DB,
            "ugrave": 0x00F9,
            "Ugrave": 0x00D9,
            "uuml": 0x00FC,
            "Uuml": 0x00DC,
            "yacute": 0x00FD,
            "Yacute": 0x00DD,
            "yuml": 0x00FF,
            "abreve": 0x0103,
            "Abreve": 0x0102,
            "amacr": 0x0101,
            "Amacr": 0x0100,
            "aogon": 0x0105,
            "Aogon": 0x0104,
            "cacute": 0x0107,
            "Cacute": 0x0106,
            "ccaron": 0x010D,
            "Ccaron": 0x010C,
            "ccirc": 0x0109,
            "Ccirc": 0x0108,
            "cdot": 0x010B,
            "Cdot": 0x010A,
            "dcaron": 0x010F,
            "Dcaron": 0x010E,
            "dstrok": 0x0111,
            "Dstrok": 0x0110,
            "ecaron": 0x011B,
            "Ecaron": 0x011A,
            "edot": 0x0117,
            "Edot": 0x0116,
            "emacr": 0x0113,
            "Emacr": 0x0112,
            "eogon": 0x0119,
            "Eogon": 0x0118,
            "gacute": 0x01F5,
            "gbreve": 0x011F,
            "Gbreve": 0x011E,
            "Gcedil": 0x0122,
            "gcirc": 0x011D,
            "Gcirc": 0x011C,
            "gdot": 0x0121,
            "Gdot": 0x0120,
            "hcirc": 0x0125,
            "Hcirc": 0x0124,
            "hstrok": 0x0127,
            "Hstrok": 0x0126,
            "Idot": 0x0130,
            "Imacr": 0x012A,
            "imacr": 0x012B,
            "ijlig": 0x0133,
            "IJlig": 0x0132,
            "inodot": 0x0131,
            "iogon": 0x012F,
            "Iogon": 0x012E,
            "itilde": 0x0129,
            "Itilde": 0x0128,
            "jcirc": 0x0135,
            "Jcirc": 0x0134,
            "kcedil": 0x0137,
            "Kcedil": 0x0136,
            "kgreen": 0x0138,
            "lacute": 0x013A,
            "Lacute": 0x0139,
            "lcaron": 0x013E,
            "Lcaron": 0x013D,
            "lcedil": 0x013C,
            "Lcedil": 0x013B,
            "lmidot": 0x0140,
            "Lmidot": 0x013F,
            "lstrok": 0x0142,
            "Lstrok": 0x0141,
            "nacute": 0x0144,
            "Nacute": 0x0143,
            "eng": 0x014B,
            "ENG": 0x014A,
            "napos": 0x0149,
            "ncaron": 0x0148,
            "Ncaron": 0x0147,
            "ncedil": 0x0146,
            "Ncedil": 0x0145,
            "odblac": 0x0151,
            "Odblac": 0x0150,
            "Omacr": 0x014C,
            "omacr": 0x014D,
            "oelig": 0x0153,
            "OElig": 0x0152,
            "racute": 0x0155,
            "Racute": 0x0154,
            "rcaron": 0x0159,
            "Rcaron": 0x0158,
            "rcedil": 0x0157,
            "Rcedil": 0x0156,
            "sacute": 0x015B,
            "Sacute": 0x015A,
            "scaron": 0x0161,
            "Scaron": 0x0160,
            "scedil": 0x015F,
            "Scedil": 0x015E,
            "scirc": 0x015D,
            "Scirc": 0x015C,
            "tcaron": 0x0165,
            "Tcaron": 0x0164,
            "tcedil": 0x0163,
            "Tcedil": 0x0162,
            "tstrok": 0x0167,
            "Tstrok": 0x0166,
            "ubreve": 0x016D,
            "Ubreve": 0x016C,
            "udblac": 0x0171,
            "Udblac": 0x0170,
            "umacr": 0x016B,
            "Umacr": 0x016A,
            "uogon": 0x0173,
            "Uogon": 0x0172,
            "uring": 0x016F,
            "Uring": 0x016E,
            "utilde": 0x0169,
            "Utilde": 0x0168,
            "wcirc": 0x0175,
            "Wcirc": 0x0174,
            "ycirc": 0x0177,
            "Ycirc": 0x0176,
            "Yuml": 0x0178,
            "zacute": 0x017A,
            "Zacute": 0x0179,
            "zcaron": 0x017E,
            "Zcaron": 0x017D,
            "zdot": 0x017C,
            "Zdot": 0x017B,
            "agr": 0x03B1,
            "Agr": 0x0391,
            "bgr": 0x03B2,
            "Bgr": 0x0392,
            "ggr": 0x03B3,
            "Ggr": 0x0393,
            "dgr": 0x03B4,
            "Dgr": 0x0394,
            "egr": 0x03B5,
            "Egr": 0x0395,
            "zgr": 0x03B6,
            "Zgr": 0x0396,
            "eegr": 0x03B7,
            "EEgr": 0x0397,
            "thgr": 0x03B8,
            "THgr": 0x0398,
            "igr": 0x03B9,
            "Igr": 0x0399,
            "kgr": 0x03BA,
            "Kgr": 0x039A,
            "lgr": 0x03BB,
            "Lgr": 0x039B,
            "mgr": 0x03BC,
            "Mgr": 0x039C,
            "ngr": 0x03BD,
            "Ngr": 0x039D,
            "xgr": 0x03BE,
            "Xgr": 0x039E,
            "ogr": 0x03BF,
            "Ogr": 0x039F,
            "pgr": 0x03C0,
            "Pgr": 0x03A0,
            "rgr": 0x03C1,
            "Rgr": 0x03A1,
            "sgr": 0x03C3,
            "Sgr": 0x03A3,
            "sfgr": 0x03C2,
            "tgr": 0x03C4,
            "Tgr": 0x03A4,
            "ugr": 0x03C5,
            "Ugr": 0x03A5,
            "phgr": 0x03C6,
            "PHgr": 0x03A6,
            "khgr": 0x03C7,
            "KHgr": 0x03A7,
            "psgr": 0x03C8,
            "PSgr": 0x03A8,
            "ohgr": 0x03C9,
            "OHgr": 0x03A9,
            "half": 0x00BD,
            "frac12": 0x00BD,
            "frac14": 0x00BC,
            "frac34": 0x00BE,
            "frac18": 0x215B,
            "frac38": 0x215C,
            "frac58": 0x215D,
            "frac78": 0x215E,
            "sup1": 0x00B9,
            "sup2": 0x00B2,
            "sup3": 0x00B3,
            "plus": 0x002B,
            "plusmn": 0x00B1,
            "equals": 0x003D,
            "gt": 0x003E,
            "divide": 0x00F7,
            "times": 0x00D7,
            "curren": 0x00A4,
            "pound": 0x00A3,
            "dollar": 0x0024,
            "cent": 0x00A2,
            "yen": 0x00A5,
            "num": 0x0023,
            "percnt": 0x0025,
            "ast": 0x2217,
            "commat": 0x0040,
            "lsqb": 0x005B,
            "bsol": 0x005C,
            "rsqb": 0x005D,
            "lcub": 0x007B,
            "horbar": 0x2015,
            "verbar": 0x007C,
            "rcub": 0x007D,
            "micro": 0x00B5,
            "ohm": 0x2126,
            "deg": 0x00B0,
            "ordm": 0x00BA,
            "ordf": 0x00AA,
            "sect": 0x00A7,
            "para": 0x00B6,
            "middot": 0x00B7,
            "larr": 0x2190,
            "rarr": 0x2192,
            "uarr": 0x2191,
            "darr": 0x2193,
            "copy": 0x00A9,
            "reg": 0x00AF,
            "trade": 0x2122,
            "brvbar": 0x00A6,
            "not": 0x00AC,
            "sung": 0x2669,
            "excl": 0x0021,
            "iexcl": 0x00A1,
            "quot": 0x0022,
            "apos": 0x0027,
            "lpar": 0x0028,
            "rpar": 0x0029,
            "comma": 0x002C,
            "lowbar": 0x005F,
            "hyphen": 0xE4F8,
            "period": 0x002E,
            "sol": 0x002F,
            "colon": 0x003A,
            "semi": 0x003B,
            "quest": 0x003F,
            "iquest": 0x00BF,
            "laquo": 0x00AB,
            "raquo": 0x00BB,
            "lsquo": 0x2018,
            "rsquo": 0x2019,
            "ldquo": 0x201C,
            "rdquo": 0x201D,
            "nbsp": 0x00A0,
            "shy": 0x00AD,
            "acute": 0x00B4,
            "breve": 0x02D8,
            "caron": 0x02C7,
            "cedil": 0x00B8,
            "circ": 0x2218,
            "dblac": 0x02DD,
            "die": 0x00A8,
            "dot": 0x02D9,
            "grave": 0x0060,
            "macr": 0x00AF,
            "ogon": 0x02DB,
            "ring": 0x02DA,
            "tilde": 0x007E,
            "uml": 0x00A8,
            "emsp": 0x2003,
            "ensp": 0x2002,
            "emsp13": 0x2004,
            "emsp14": 0x2005,
            "numsp": 0x2007,
            "puncsp": 0x2008,
            "thinsp": 0x2009,
            "hairsp": 0x200A,
            "mdash": 0x2014,
            "ndash": 0x2013,
            "dash": 0x2010,
            "blank": 0x2423,
            "hellip": 0x2026,
            "nldr": 0x2025,
            "frac13": 0x2153,
            "frac23": 0x2154,
            "frac15": 0x2155,
            "frac25": 0x2156,
            "frac35": 0x2157,
            "frac45": 0x2158,
            "frac16": 0x2159,
            "frac56": 0x215A,
            "incare": 0x2105,
            "block": 0x2588,
            "uhblk": 0x2580,
            "lhblk": 0x2584,
            "blk14": 0x2591,
            "blk12": 0x2592,
            "blk34": 0x2593,
            "marker": 0x25AE,
            "cir": 0x25CB,
            "squ": 0x25A1,
            "rect": 0x25AD,
            "utri": 0x25B5,
            "dtri": 0x25BF,
            "star": 0x22C6,
            "bull": 0x2022,
            "squf": 0x25AA,
            "utrif": 0x25B4,
            "dtrif": 0x25BE,
            "ltrif": 0x25C2,
            "rtrif": 0x25B8,
            "clubs": 0x2663,
            "diams": 0x2666,
            "hearts": 0x2665,
            "spades": 0x2660,
            "malt": 0x2720,
            "dagger": 0x2020,
            "Dagger": 0x2021,
            "check": 0x2713,
            "cross": 0x2717,
            "sharp": 0x266F,
            "flat": 0x266D,
            "male": 0x2642,
            "female": 0x2640,
            "phone": 0x260E,
            "telrec": 0x2315,
            "copysr": 0x2117,
            "caret": 0x2041,
            "lsquor": 0x201A,
            "ldquor": 0x201E,
            "fflig": 0xFB00,
            "filig": 0xFB01,
            "ffilig": 0xFB03,
            "ffllig": 0xFB04,
            "fllig": 0xFB02,
            "mldr": 0x2026,
            "rdquor": 0x201C,
            "rsquor": 0x2018,
            "vellip": 0x22EE,
            "hybull": 0x2043,
            "loz": 0x25CA,
            "lozf": 0x2726,
            "ltri": 0x25C3,
            "rtri": 0x25B9,
            "starf": 0x2605,
            "natur": 0x266E,
            "rx": 0x211E,
            "sext": 0x2736,
            "target": 0x2316,
            "dlcrop": 0x230D,
            "drcrop": 0x230C,
            "ulcrop": 0x230F,
            "urcrop": 0x230E,
            "boxh": 0x2500,
            "boxv": 0x2502,
            "boxur": 0x2514,
            "boxul": 0x2518,
            "boxdl": 0x2510,
            "boxdr": 0x250C,
            "boxvr": 0x251C,
            "boxhu": 0x2534,
            "boxvl": 0x2524,
            "boxhd": 0x252C,
            "boxvh": 0x253C,
            "boxvR": 0x255E,
            "boxhU": 0x2567,
            "boxvL": 0x2561,
            "boxhD": 0x2564,
            "boxvH": 0x256A,
            "boxH": 0x2550,
            "boxV": 0x2551,
            "boxUR": 0x2558,
            "boxUL": 0x255B,
            "boxDL": 0x2555,
            "boxDR": 0x2552,
            "boxVR": 0x255F,
            "boxHU": 0x2568,
            "boxVL": 0x2562,
            "boxHD": 0x2565,
            "boxVH": 0x256B,
            "boxVr": 0x2560,
            "boxHu": 0x2569,
            "boxVl": 0x2563,
            "boxHd": 0x2566,
            "boxVh": 0x256C,
            "boxuR": 0x2559,
            "boxUl": 0x255C,
            "boxdL": 0x2556,
            "boxDr": 0x2553,
            "boxUr": 0x255A,
            "boxuL": 0x255D,
            "boxDl": 0x2557,
            "boxdR": 0x2554
        }
        self.entity_char_dict = {}

        for ent, code in self.entity_code_dict.iteritems():
            try:
                self.entity_char_dict[ent] = latin_1_decode(chr(code),
                                                            "utf8")[0]
            except ValueError, UnicodeEncodeError:
                self.entity_char_dict[ent] = unichr(code)
예제 #38
0
 def un_b(x): return codecs.latin_1_decode(x)[0]

class KVStore(object): """An abstract key-value interface with support for range iteration.""" __metaclass__ = abc.ABCMeta
예제 #39
0
파일: crypto.py 프로젝트: voussoir/mega.py
 def makestring(x):
     return codecs.latin_1_decode(x)[0]
예제 #40
0
 def test_latin_1_decode(self):
     #sanity
     new_str, num_processed = codecs.latin_1_decode(b"abc")
     self.assertEqual(new_str, 'abc')
     self.assertEqual(num_processed, 3)
예제 #41
0
파일: adduser.py 프로젝트: B-Rich/smart
	except Exception, e:
		univention.debug.debug(univention.debug.ADMIN, univention.debug.WARN, 'authentication error: %s' % str(e))
		try:
			lo, position=univention.admin.uldap.getMachineConnection()
		except Exception, e2:
			univention.debug.debug(univention.debug.ADMIN, univention.debug.WARN, 'authentication error: %s' % str(e2))
			out.append('authentication error: %s' % str(e))
			out.append('authentication error: %s' % str(e2))
			return out
			pass
	
	for i in range(0, len(args)):
		try:
			args[i]=codecs.utf_8_decode(args[i])[0]
		except:
			args[i]=codecs.latin_1_decode(args[i])[0]
	
	if len(args) == 1:
		if scope == 'machine':
			machine=args[0]
			if machine[-1] == '$':
				machine=machine[0:-1]
			if configRegistry.has_key('samba/defaultcontainer/computer') and configRegistry['samba/defaultcontainer/computer']:
				position.setDn(configRegistry['samba/defaultcontainer/computer'])
			else:
				position.setDn(univention.admin.config.getDefaultContainer(lo, 'computers/windows'))
		elif scope == 'group':
			group=args[0]
			if configRegistry.has_key('samba/defaultcontainer/group') and configRegistry['samba/defaultcontainer/group']:
				position.setDn(configRegistry['samba/defaultcontainer/group'])
			else:
예제 #42
0
import codecs
import re


de_utf8 = lambda s: codecs.utf_8_decode(s, "ignore")[0]
en_utf8 = lambda s: codecs.utf_8_encode(s, "ignore")[0]
de_latin1 = lambda s: codecs.latin_1_decode(s, "ignore")[0]
class UnicodeProcessor:
	"""Try to do something senseful with unicode stuff."""
	re_unicodexml = re.compile("&#(\d{3,5});")
	def __init__(self, latin1 = False):
		self.repl_unicodexml = lambda x: en_utf8(unichr(int(x.group(1))))
		if latin1:
			self.process = self.process_latin1

	def process_latin1(self, string):
		string = de_latin1(string)
		string = en_utf8(string)
		return re_unicodexml.sub(self.repl_unicodexml, string)

	def process(self, string):
		return re_unicodexml.sub(self.repl_unicodexml, string)
	


re_unicodexml = re.compile("&#(\d{3,5});")
repl_unicodexml = lambda x: en_utf8(unichr(int(x.group(1))))
def to_unicode(string):
	"""Convert Unicode &#xxx; stuff and try to handle other stuff.

	Still doesn't work like it should.  Decoding latin_1 and encoding
def SpliceTestLine(svs):
    svIndex = 0
    for sv in svs:
        if sv.chrom not in refFai:
            continue
        if svIndex == 0:
            prefixStart = max(0, sv.start-args.flank)
            prefixEnd   = sv.start
            sem.acquire()
            gapPrefix   = SafeFetch(ref, sv.chrom, prefixStart, prefixEnd)
            sem.release()
            spliceSeqs  = [gapPrefix]
            refPos      = sv.start
        
        if args.mssm:
            spliceSeqs.append(sv[5])

            refPos = sv[2]

            if svIndex < len(svs)-1:
                if refPos > svs[svIndex+1][1]:
                    continue
                sem.acquire()
                between = SafeFetch(ref, sv[0], refPos, svs[svIndex+1][1])
                sem.release()
                spliceSeqs.append(between)
        elif args.falcon:
            # only checking deletions
            refPos = sv[2]
            if svIndex < len(svs)-1:
                if refPos > svs[svIndex+1][1]:
                    continue
            
        else:
            if sv.svType == "insertion":
                svSeq = sv.seq
                if len(svSeq) > args.maxInsertion:
                    mid=args.maxInsertion/2
                    svSeq = sv.seq[0:mid] +sv.seq[-mid:]
                    sys.stderr.write("keeping center of insertion {}\n".format(len(sv.seq)))
                spliceSeqs.append(svSeq)
                refPos = sv.start
            else:
                refPos = sv.end
        

            if svIndex < len(svs)-1:
                if refPos > svs[svIndex+1].start:
                    sys.stderr.write("Ignoring an sv {} {} {}\n".format(svs[svIndex+1].chrom, svs[svIndex+1].start, svs[svIndex+1].end))
                    svIndex+=1
                    continue
                else:
                    sem.acquire()
                    between = SafeFetch(ref, sv.chrom, refPos, svs[svIndex+1].start)
                    sem.release()
                    refPos += len(between)
                    spliceSeqs.append(between)
        svIndex +=1

    svIndex = len(svs)-1
    sem.acquire()
    suffix = SafeFetch(ref, svs[svIndex].chrom, refPos, min(refFai[svs[svIndex].chrom], refPos + args.flank))
    sem.release()
    
    spliceSeqs.append(suffix)
    dbSeq= ''.join(spliceSeqs)
    refChrom = svs[0].chrom


    refStart = max(0, svs[0].start - args.flank)
    refEnd   = min(refFai[svs[svIndex].chrom], GetEnd(svs[svIndex]) + args.flank)
    nBases = 0
    if args.maxSize is not None and refEnd - refStart > args.maxSize:
        if args.genotypeVcf is None:
            results="\n".join(["{}:{}-{}\t{}\t{}".format(sv.chrom,sv.start,sv.end,0,0) for sv in svs])
            return results
        
        
    sem.acquire()
    refSeq = SafeFetch(ref,refChrom, refStart, refEnd)
    sem.release()
    tempFileNames = []
    fSuffix="."+str(refPos) + ".fasta"
    sSuffix="."+str(refPos) + ".sam"    
    rFile     = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=fSuffix, delete=False, mode='w')
    tempFileNames.append(rFile.name)
    dbFile    = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=fSuffix, delete=False, mode='w')
    tempFileNames.append(dbFile.name)
    readsFile = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=fSuffix, delete=False, mode='w')
    tempFileNames.append(readsFile.name)

    WriteSeq(dbFile, dbSeq, "db")
    WriteSeq(dbFile, refSeq, "re")
    WriteSeq(rFile, refSeq, "re")    
    #
    # Now collect all of the sequences.
    #
    fetchStart = svs[0].start - args.flank
    fetchEnd   = GetEnd(svs[-1]) + args.flank
    # just count one breakpoint if large event.
    if fetchEnd - fetchStart > 30000:
        sys.stderr.write("******Truncating fetch region {}\n".format(fetchEnd-fetchStart))
        fetchEnd = svs[0].start  + args.flank
    sys.stdout.write("Fetching from region " + str(fetchEnd-fetchStart) + " " + str(svs[-1].svType) + "\n")
    
    
    dbFile.close()
    rFile.close()
    nBases = 0
    if args.genotypeVcf is not None:
        #
        # This uses the SNV vcf in the argument to partition reads, and genotype by phase tag.
        #
        print("about to start genotyping\n")
        dipSamFile = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=".dip"+sSuffix, delete=False, mode='w')
        tempFileNames.append(dipSamFile.name)
        dipSamFile.close()
        dipHandle = pysam.AlignmentFile(dipSamFile.name, 'wh', header=bamFiles[0].header)
        
        sem.acquire()        
        for b in range(0,len(bamFiles)):
            for read in bamFiles[b].fetch(sv.chrom, fetchStart, fetchEnd+1):
                dipHandle.write(read)
        sem.release()
        #
        # Now partition the file by haplotype
        #
        hap0SamFile = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=".hap0"+sSuffix, delete=False, mode='w')
        hap1SamFile = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=".hap1"+sSuffix, delete=False, mode='w')
        unassignedSamFile = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=".unassigned"+sSuffix, delete=False, mode='w')        
        regionVCF = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=".vars.vcf", delete=False, mode='w')
        tempFileNames += [hap0SamFile.name, hap1SamFile.name, regionVCF.name, unassignedSamFile.name]
        
        vcfStart = max(0,svs[0].start - args.flank*10)
        vcfEnd   = GetEnd(svs[-1]) + args.flank*10

        tabixCommand = "tabix -h {} {}:{}-{}".format(args.genotypeVcf, sv.chrom, vcfStart, vcfEnd)
        subprocess.call(tabixCommand.split(), stdout=regionVCF)
        regionVCF.close()        
        partitionCommand = "{}/partitionByPhasedSNVs --vcf {} --sam {} --rgn {}:{}-{} --pad 10000 --h1 {} --h2 {} --ref {} --minGenotyped 1 --sample {} --unassigned {}".format("/net/eichler/vol5/home/mchaisso/projects/pbgreedyphase", regionVCF.name, dipSamFile.name, sv.chrom, fetchStart, fetchEnd, hap0SamFile.name, hap1SamFile.name, args.ref, args.sample, unassignedSamFile.name )
        subprocess.call(partitionCommand.split())

        sams = [hap0SamFile.name, hap1SamFile.name, unassignedSamFile.name]
        haps = ["0", "1", "u"]
        sem.acquire()
        for i in range(0,3):

            samHandle = pysam.AlignmentFile(sams[i], 'r')
            for read in samHandle.fetch():
                nBases+=min(read.reference_end, fetchEnd) - max(fetchStart,read.reference_start)                    
                WriteSeq(readsFile, read.seq, read.query_name + "/" + haps[i])
        sem.release()
    else:
        sem.acquire()
        for b in range(0,len(bamFiles)):
            for read in bamFiles[b].fetch(sv.chrom, fetchStart, fetchEnd+1):
                nBases+=min(read.reference_end, fetchEnd) - max(fetchStart,read.reference_start)
                WriteSeq(readsFile, read.seq, read.query_name)
        sem.release()
    readsFile.close()
    
        
#    rsFile  = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=".sam", delete=False, mode='w')
    dbsFile = tempfile.NamedTemporaryFile(dir=args.tmpdir, suffix=sSuffix, delete=False, mode='w')
    
    commandOptions = " -maxMatch 25 -sdpMaxAnchorsPerPosition 5 -sdpTupleSize 10 -sam -bestn 1 -affineOpen 5 -affineExtend 5 -nproc 8 -out /dev/stdout -minAlignLength {} ".format(int(1.5*args.flank))

    dbCommand = "{} {} {} -preserveReadTitle -clipping soft ".format(args.blasr, readsFile.name, dbFile.name, dbsFile.name) + commandOptions
    tempFileNames.append(dbsFile.name)
    

    dn = open(os.devnull)
    regionLength = fetchEnd - fetchStart
    coverage = nBases/regionLength
    sys.stderr.write("coverage: " + str(coverage)+ "\n")

    if args.genotypeVcf is not None:
        genotype=True
    else:
        genotype=False
    fs=0
    rs=0
    if coverage < args.maxCoverage:
        proc=subprocess.Popen(dbCommand.split(),stderr=dn,stdout=subprocess.PIPE)
        alnLines=codecs.latin_1_decode(proc.stdout.read())[0]
        proc.wait()
        dbsFile.close()
#        WaitOnFile(dbsFile.name)

        fs=os.path.getsize(dbsFile.name)
        rs=os.path.getsize(readsFile.name)
        cov = CountRefCoverage(alnLines, genotype)
    else:
        sys.stderr.write("Skipping event from coverage " + str(coverage) + "\n")
        cov = { "db": 0, "re": 0}
    sys.stderr.write(svs[0].svType+ " " + str(cov) + "\n")
    
    if args.genotypeVcf is False and "db" in cov and cov["db"] < 500:
        print( "spliced " + str(len(svs))        )
        print( "cov:")
        print( cov)
        print( "{}:{}-{}".format(svs[0].chrom,svs[0].start,svs[0].end))


    cleanup = "/bin/rm " + " ".join(tempFileNames)

    if args.keep is False:
        subprocess.call(cleanup.split())
    else:
        print(cleanup)

    if args.genotypeVcf is None:
        dbCov = cov["db"]
        rCov  = cov["re"]
    
        results="\n".join(["{}:{}-{}\t{}\t{}".format(sv.chrom,sv.start,sv.end,dbCov,rCov ) for sv in svs])
    else:
        results="\n".join(["{}:{}-{}\t{}\t{}\t{}\t{}\t{}\t{}".format(sv.chrom,sv.start,sv.end,\
                                                                                     cov["db"][0],cov["db"][1],cov["db"]['u'],\
                                                                                     cov["re"][0],cov["re"][1],cov["re"]['u']\
                                                                     ) for sv in svs])
        sys.stdout.write(results + "\n")
    return results
예제 #44
0
    def __init__(self):
        self.entity_code_dict = {
            "amp": 0x0026,
            "pound": 0x00A3,
            "aacute": 0x00E1,
            "ampersand": 0x0026,
            "Aacute": 0x00C1,
            "acirc": 0x00E2,
            "Acirc": 0x00C2,
            "agrave": 0x00E0,
            "Agrave": 0x00C0,
            "aring": 0x00E5,
            "Aring": 0x00C5,
            "atilde": 0x00E3,
            "Atilde": 0x00C3,
            "auml": 0x00E4,
            "Auml": 0x00C4,
            "aelig": 0x00E6,
            "AElig": 0x00C6,
            "ccedil": 0x00E7,
            "Ccedil": 0x00C7,
            "eth": 0x00F0,
            "ETH": 0x00D0,
            "eacute": 0x00E9,
            "Eacute": 0x00C9,
            "ecirc": 0x00EA,
            "Ecirc": 0x00CA,
            "egrave": 0x00E8,
            "Egrave": 0x00C8,
            "euml": 0x00EB,
            "Euml": 0x00CB,
            "iacute": 0x00ED,
            "Iacute": 0x00CD,
            "icirc": 0x00EE,
            "Icirc": 0x00CE,
            "igrave": 0x00EC,
            "Igrave": 0x00CC,
            "iuml": 0x00EF,
            "Iuml": 0x00CF,
            "ntilde": 0x00F1,
            "Ntilde": 0x00D1,
            "oacute": 0x00F3,
            "Oacute": 0x00D3,
            "ocirc": 0x00F4,
            "Ocirc": 0x00D4,
            "ograve": 0x00F2,
            "Ograve": 0x00D2,
            "oslash": 0x00F8,
            "Oslash": 0x00D8,
            "otilde": 0x00F5,
            "Otilde": 0x00D5,
            "ouml": 0x00F6,
            "Ouml": 0x00D6,
            "szlig": 0x00DF,
            "thorn": 0x00FE,
            "THORN": 0x00DE,
            "uacute": 0x00FA,
            "Uacute": 0x00DA,
            "ucirc": 0x00FB,
            "Ucirc": 0x00DB,
            "ugrave": 0x00F9,
            "Ugrave": 0x00D9,
            "uuml": 0x00FC,
            "Uuml": 0x00DC,
            "yacute": 0x00FD,
            "Yacute": 0x00DD,
            "yuml": 0x00FF,
            "abreve": 0x0103,
            "Abreve": 0x0102,
            "amacr": 0x0101,
            "Amacr": 0x0100,
            "aogon": 0x0105,
            "Aogon": 0x0104,
            "cacute": 0x0107,
            "Cacute": 0x0106,
            "ccaron": 0x010D,
            "Ccaron": 0x010C,
            "ccirc": 0x0109,
            "Ccirc": 0x0108,
            "cdot": 0x010B,
            "Cdot": 0x010A,
            "dcaron": 0x010F,
            "Dcaron": 0x010E,
            "dstrok": 0x0111,
            "Dstrok": 0x0110,
            "ecaron": 0x011B,
            "Ecaron": 0x011A,
            "edot": 0x0117,
            "Edot": 0x0116,
            "emacr": 0x0113,
            "Emacr": 0x0112,
            "eogon": 0x0119,
            "Eogon": 0x0118,
            "gacute": 0x01F5,
            "gbreve": 0x011F,
            "Gbreve": 0x011E,
            "Gcedil": 0x0122,
            "gcirc": 0x011D,
            "Gcirc": 0x011C,
            "gdot": 0x0121,
            "Gdot": 0x0120,
            "hcirc": 0x0125,
            "Hcirc": 0x0124,
            "hstrok": 0x0127,
            "Hstrok": 0x0126,
            "Idot": 0x0130,
            "Imacr": 0x012A,
            "imacr": 0x012B,
            "ijlig": 0x0133,
            "IJlig": 0x0132,
            "inodot": 0x0131,
            "iogon": 0x012F,
            "Iogon": 0x012E,
            "itilde": 0x0129,
            "Itilde": 0x0128,
            "jcirc": 0x0135,
            "Jcirc": 0x0134,
            "kcedil": 0x0137,
            "Kcedil": 0x0136,
            "kgreen": 0x0138,
            "lacute": 0x013A,
            "Lacute": 0x0139,
            "lcaron": 0x013E,
            "Lcaron": 0x013D,
            "lcedil": 0x013C,
            "Lcedil": 0x013B,
            "lmidot": 0x0140,
            "Lmidot": 0x013F,
            "lstrok": 0x0142,
            "Lstrok": 0x0141,
            "nacute": 0x0144,
            "Nacute": 0x0143,
            "eng": 0x014B,
            "ENG": 0x014A,
            "napos": 0x0149,
            "ncaron": 0x0148,
            "Ncaron": 0x0147,
            "ncedil": 0x0146,
            "Ncedil": 0x0145,
            "odblac": 0x0151,
            "Odblac": 0x0150,
            "Omacr": 0x014C,
            "omacr": 0x014D,
            "oelig": 0x0153,
            "OElig": 0x0152,
            "racute": 0x0155,
            "Racute": 0x0154,
            "rcaron": 0x0159,
            "Rcaron": 0x0158,
            "rcedil": 0x0157,
            "Rcedil": 0x0156,
            "sacute": 0x015B,
            "Sacute": 0x015A,
            "scaron": 0x0161,
            "Scaron": 0x0160,
            "scedil": 0x015F,
            "Scedil": 0x015E,
            "scirc": 0x015D,
            "Scirc": 0x015C,
            "tcaron": 0x0165,
            "Tcaron": 0x0164,
            "tcedil": 0x0163,
            "Tcedil": 0x0162,
            "tstrok": 0x0167,
            "Tstrok": 0x0166,
            "ubreve": 0x016D,
            "Ubreve": 0x016C,
            "udblac": 0x0171,
            "Udblac": 0x0170,
            "umacr": 0x016B,
            "Umacr": 0x016A,
            "uogon": 0x0173,
            "Uogon": 0x0172,
            "uring": 0x016F,
            "Uring": 0x016E,
            "utilde": 0x0169,
            "Utilde": 0x0168,
            "wcirc": 0x0175,
            "Wcirc": 0x0174,
            "ycirc": 0x0177,
            "Ycirc": 0x0176,
            "Yuml": 0x0178,
            "zacute": 0x017A,
            "Zacute": 0x0179,
            "zcaron": 0x017E,
            "Zcaron": 0x017D,
            "zdot": 0x017C,
            "Zdot": 0x017B,
            "agr": 0x03B1,
            "Agr": 0x0391,
            "bgr": 0x03B2,
            "Bgr": 0x0392,
            "ggr": 0x03B3,
            "Ggr": 0x0393,
            "dgr": 0x03B4,
            "Dgr": 0x0394,
            "egr": 0x03B5,
            "Egr": 0x0395,
            "zgr": 0x03B6,
            "Zgr": 0x0396,
            "eegr": 0x03B7,
            "EEgr": 0x0397,
            "thgr": 0x03B8,
            "THgr": 0x0398,
            "igr": 0x03B9,
            "Igr": 0x0399,
            "kgr": 0x03BA,
            "Kgr": 0x039A,
            "lgr": 0x03BB,
            "Lgr": 0x039B,
            "mgr": 0x03BC,
            "Mgr": 0x039C,
            "ngr": 0x03BD,
            "Ngr": 0x039D,
            "xgr": 0x03BE,
            "Xgr": 0x039E,
            "ogr": 0x03BF,
            "Ogr": 0x039F,
            "pgr": 0x03C0,
            "Pgr": 0x03A0,
            "rgr": 0x03C1,
            "Rgr": 0x03A1,
            "sgr": 0x03C3,
            "Sgr": 0x03A3,
            "sfgr": 0x03C2,
            "tgr": 0x03C4,
            "Tgr": 0x03A4,
            "ugr": 0x03C5,
            "Ugr": 0x03A5,
            "phgr": 0x03C6,
            "PHgr": 0x03A6,
            "khgr": 0x03C7,
            "KHgr": 0x03A7,
            "psgr": 0x03C8,
            "PSgr": 0x03A8,
            "ohgr": 0x03C9,
            "OHgr": 0x03A9,
            "half": 0x00BD,
            "frac12": 0x00BD,
            "frac14": 0x00BC,
            "frac34": 0x00BE,
            "frac18": 0x215B,
            "frac38": 0x215C,
            "frac58": 0x215D,
            "frac78": 0x215E,
            "sup1": 0x00B9,
            "sup2": 0x00B2,
            "sup3": 0x00B3,
            "plus": 0x002B,
            "plusmn": 0x00B1,
            "equals": 0x003D,
            "gt": 0x003E,
            "divide": 0x00F7,
            "times": 0x00D7,
            "curren": 0x00A4,
            "pound": 0x00A3,
            "dollar": 0x0024,
            "cent": 0x00A2,
            "yen": 0x00A5,
            "num": 0x0023,
            "percnt": 0x0025,
            "ast": 0x2217,
            "commat": 0x0040,
            "lsqb": 0x005B,
            "bsol": 0x005C,
            "rsqb": 0x005D,
            "lcub": 0x007B,
            "horbar": 0x2015,
            "verbar": 0x007C,
            "rcub": 0x007D,
            "micro": 0x00B5,
            "ohm": 0x2126,
            "deg": 0x00B0,
            "ordm": 0x00BA,
            "ordf": 0x00AA,
            "sect": 0x00A7,
            "para": 0x00B6,
            "middot": 0x00B7,
            "larr": 0x2190,
            "rarr": 0x2192,
            "uarr": 0x2191,
            "darr": 0x2193,
            "copy": 0x00A9,
            "reg": 0x00AF,
            "trade": 0x2122,
            "brvbar": 0x00A6,
            "not": 0x00AC,
            "sung": 0x2669,
            "excl": 0x0021,
            "iexcl": 0x00A1,
            "quot": 0x0022,
            "apos": 0x0027,
            "lpar": 0x0028,
            "rpar": 0x0029,
            "comma": 0x002C,
            "lowbar": 0x005F,
            "hyphen": 0xE4F8,
            "period": 0x002E,
            "sol": 0x002F,
            "colon": 0x003A,
            "semi": 0x003B,
            "quest": 0x003F,
            "iquest": 0x00BF,
            "laquo": 0x00AB,
            "raquo": 0x00BB,
            "lsquo": 0x2018,
            "rsquo": 0x2019,
            "ldquo": 0x201C,
            "rdquo": 0x201D,
            "nbsp": 0x00A0,
            "shy": 0x00AD,
            "acute": 0x00B4,
            "breve": 0x02D8,
            "caron": 0x02C7,
            "cedil": 0x00B8,
            "circ": 0x2218,
            "dblac": 0x02DD,
            "die": 0x00A8,
            "dot": 0x02D9,
            "grave": 0x0060,
            "macr": 0x00AF,
            "ogon": 0x02DB,
            "ring": 0x02DA,
            "tilde": 0x007E,
            "uml": 0x00A8,
            "emsp": 0x2003,
            "ensp": 0x2002,
            "emsp13": 0x2004,
            "emsp14": 0x2005,
            "numsp": 0x2007,
            "puncsp": 0x2008,
            "thinsp": 0x2009,
            "hairsp": 0x200A,
            "mdash": 0x2014,
            "ndash": 0x2013,
            "dash": 0x2010,
            "blank": 0x2423,
            "hellip": 0x2026,
            "nldr": 0x2025,
            "frac13": 0x2153,
            "frac23": 0x2154,
            "frac15": 0x2155,
            "frac25": 0x2156,
            "frac35": 0x2157,
            "frac45": 0x2158,
            "frac16": 0x2159,
            "frac56": 0x215A,
            "incare": 0x2105,
            "block": 0x2588,
            "uhblk": 0x2580,
            "lhblk": 0x2584,
            "blk14": 0x2591,
            "blk12": 0x2592,
            "blk34": 0x2593,
            "marker": 0x25AE,
            "cir": 0x25CB,
            "squ": 0x25A1,
            "rect": 0x25AD,
            "utri": 0x25B5,
            "dtri": 0x25BF,
            "star": 0x22C6,
            "bull": 0x2022,
            "squf": 0x25AA,
            "utrif": 0x25B4,
            "dtrif": 0x25BE,
            "ltrif": 0x25C2,
            "rtrif": 0x25B8,
            "clubs": 0x2663,
            "diams": 0x2666,
            "hearts": 0x2665,
            "spades": 0x2660,
            "malt": 0x2720,
            "dagger": 0x2020,
            "Dagger": 0x2021,
            "check": 0x2713,
            "cross": 0x2717,
            "sharp": 0x266F,
            "flat": 0x266D,
            "male": 0x2642,
            "female": 0x2640,
            "phone": 0x260E,
            "telrec": 0x2315,
            "copysr": 0x2117,
            "caret": 0x2041,
            "lsquor": 0x201A,
            "ldquor": 0x201E,
            "fflig": 0xFB00,
            "filig": 0xFB01,
            "ffilig": 0xFB03,
            "ffllig": 0xFB04,
            "fllig": 0xFB02,
            "mldr": 0x2026,
            "rdquor": 0x201C,
            "rsquor": 0x2018,
            "vellip": 0x22EE,
            "hybull": 0x2043,
            "loz": 0x25CA,
            "lozf": 0x2726,
            "ltri": 0x25C3,
            "rtri": 0x25B9,
            "starf": 0x2605,
            "natur": 0x266E,
            "rx": 0x211E,
            "sext": 0x2736,
            "target": 0x2316,
            "dlcrop": 0x230D,
            "drcrop": 0x230C,
            "ulcrop": 0x230F,
            "urcrop": 0x230E,
            "boxh": 0x2500,
            "boxv": 0x2502,
            "boxur": 0x2514,
            "boxul": 0x2518,
            "boxdl": 0x2510,
            "boxdr": 0x250C,
            "boxvr": 0x251C,
            "boxhu": 0x2534,
            "boxvl": 0x2524,
            "boxhd": 0x252C,
            "boxvh": 0x253C,
            "boxvR": 0x255E,
            "boxhU": 0x2567,
            "boxvL": 0x2561,
            "boxhD": 0x2564,
            "boxvH": 0x256A,
            "boxH": 0x2550,
            "boxV": 0x2551,
            "boxUR": 0x2558,
            "boxUL": 0x255B,
            "boxDL": 0x2555,
            "boxDR": 0x2552,
            "boxVR": 0x255F,
            "boxHU": 0x2568,
            "boxVL": 0x2562,
            "boxHD": 0x2565,
            "boxVH": 0x256B,
            "boxVr": 0x2560,
            "boxHu": 0x2569,
            "boxVl": 0x2563,
            "boxHd": 0x2566,
            "boxVh": 0x256C,
            "boxuR": 0x2559,
            "boxUl": 0x255C,
            "boxdL": 0x2556,
            "boxDr": 0x2553,
            "boxUr": 0x255A,
            "boxuL": 0x255D,
            "boxDl": 0x2557,
            "boxdR": 0x2554
            }
        self.entity_char_dict = {}

        for ent, code in self.entity_code_dict.iteritems():
            try:
                self.entity_char_dict[ent] = latin_1_decode(chr(code),"utf8")[0]
            except ValueError,UnicodeEncodeError:
                self.entity_char_dict[ent] = unichr(code)
예제 #45
0
 def test_latin_1_decode(self):
     #sanity
     new_str, size = codecs.latin_1_decode("abc")
     self.assertEqual(new_str, u'abc')
     self.assertEqual(size, 3)
예제 #46
0
        time.sleep(sleeptime)
    of = open(lockfile, "w")
    of.close()

sys.stderr.write("making query\n")

query = '\n'.join(queries).rstrip() + "\n"

command = "jellyfish query --load --sequence=/dev/stdin {}".format(jf)

sys.stderr.write("Submitting query " + str(len(queries)) + "\n")
proc = subprocess.Popen(command.split(),
                        stdout=subprocess.PIPE,
                        stdin=subprocess.PIPE)
proc_stdout = proc.communicate(input=bytes(query, 'utf-8'))
allLines = codecs.latin_1_decode(proc_stdout[0])[0]
jfRes = allLines.split("\n")

print("il has " + str(len(jfRes)))
#os.killpg(os.getpgid(proc.pid), signal.SIGTERM)  # Send the signal to all the process groups

if jfRes[-1] == '':
    del jfRes[-1]  #delete empty value
if any([
        len(jfRes) !=
        exp_result_len,  #make sure that there are the correct number of HG JF results
        len(queries) % 2 !=
        0,  #make sure there are 2 queries for each variant                
]):
    print("one of these is not like the other " + str(len(jfRes)) + " " +
          str(exp_result_len) + "  " + str(len(queries)))
예제 #47
0
 def decode(self, input, final=False):
     return codecs.latin_1_decode(input, self.errors)[0]
예제 #48
0
def from_rxstring(s):
    return codecs.latin_1_decode(s)[0]