Example #1
0
 def open_source(fname):
     buffer = open(fname, 'rb')
     encoding, _ = detect_encoding(buffer.readline)
     buffer.seek(0)
     text = TextIOWrapper(buffer, encoding, line_buffering=True)
     text.mode = 'r'
     return text
Example #2
0
def load_geojson():
    result = urlopen('https://github.com/openregister/boundaries/archive/master.zip').read()
    stream = BytesIO(result)
    zipfile = ZipFile(stream, 'r')
    file_names = [name for name in zipfile.namelist()
                  if name.endswith('.geojson')]
    for name in file_names:
        with zipfile.open(name, 'r') as f:
            if name.endswith('.geojson'):
                file_contents = TextIOWrapper(f, encoding='utf-8',
                                          newline='')
                data = geojson.loads(file_contents.read())
                try:
                    name = data['properties']['REGD14NM']
                    code = data['properties']['REGD14CD']
                    geometry = data['geometry']
                    # hackery store everthing as multipolygon
                    if geometry['type'] == 'Polygon':
                        coordinates = []
                        coordinates.append(geometry['coordinates'])
                        geometry['coordinates'] = coordinates
                        geometry['type'] = 'MultiPolygon'
                    polygon = from_shape(asShape(geometry), srid=4326)
                    boundary = Boundary(name=name, code=code, polygon=polygon)
                    db.session.add(boundary)
                    db.session.commit()
                except KeyError as e:
                    print("not something we were expecting really")
Example #3
0
    def __init__(self, args, environment, logic, LOGICS=None, **options):
        Solver.__init__(self,
                        environment,
                        logic=logic,
                        **options)
        self.to = self.environment.typeso
        if LOGICS is not None: self.LOGICS = LOGICS
        self.args = args
        self.declared_vars = set()
        self.declared_sorts = set()
        self.solver = Popen(args, stdout=PIPE, stderr=PIPE, stdin=PIPE,
                            bufsize=-1)
        # Give time to the process to start-up
        time.sleep(0.01)
        self.parser = SmtLibParser(interactive=True)
        if PY2:
            self.solver_stdin = self.solver.stdin
            self.solver_stdout = self.solver.stdout
        else:
            self.solver_stdin = TextIOWrapper(self.solver.stdin)
            self.solver_stdout = TextIOWrapper(self.solver.stdout)

        # Initialize solver
        self.options(self)
        self.set_logic(logic)
Example #4
0
def pandoc(source, fmt, to, extra_args=None, encoding='utf-8'):
    """Convert an input string in format `from` to format `to` via pandoc.

    Parameters
    ----------
    source : string
      Input string, assumed to be valid format `from`.
    fmt : string
      The name of the input format (markdown, etc.)
    to : string
      The name of the output format (html, etc.)

    Returns
    -------
    out : unicode
      Output as returned by pandoc.

    Exceptions
    ----------
    This function will raise PandocMissing if pandoc is not installed.
    Any error messages generated by pandoc are printed to stderr.

    """
    cmd = ['pandoc', '-f', fmt, '-t', to]
    if extra_args:
        cmd.extend(extra_args)

    # this will raise an exception that will pop us out of here
    check_pandoc_version()
    
    # we can safely continue
    p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    out, _ = p.communicate(cast_bytes(source, encoding))
    out = TextIOWrapper(BytesIO(out), encoding, 'replace').read()
    return out.rstrip('\n')
Example #5
0
    def _generate(self):
        """ Generate the Python code. """

        needs_close = False

        if sys.hexversion >= 0x03000000:
            if self._opts.output == '-':
                from io import TextIOWrapper

                pyfile = TextIOWrapper(sys.stdout.buffer, encoding='utf8')
            else:
                pyfile = open(self._opts.output, 'wt', encoding='utf8')
                needs_close = True
        else:
            if self._opts.output == '-':
                pyfile = sys.stdout
            else:
                pyfile = open(self._opts.output, 'wt')
                needs_close = True

        compileUi(self._ui_file, pyfile, self._opts.execute, self._opts.indent,
                self._opts.pyqt3_wrapper, self._opts.from_imports,
                self._opts.resource_suffix)

        if needs_close:
            pyfile.close()
Example #6
0
File: tair.py Project: fnl/gnamed
    def _setup(self, stream: io.TextIOWrapper):
        assert len(self.files) == 3, \
            'received {} files, expected 3'.format(len(self.files))
        lines = super(Parser, self)._setup(stream)

        if not hasattr(self, '_records'):
            self._records = dict()

        if not hasattr(self, '_fileno'):
            self._fileno = 0

        if stream.name.endswith('names.txt'):
            content = stream.readline().strip()
            lines += 1
            logging.debug("file header:\n%s", content)
            self._parse = self._parseName
            self._fileno += 1
        elif stream.name.endswith('aliases.txt'):
            logging.debug('parsing aliases')
            content = stream.readline().strip()
            lines += 1
            logging.debug("file header:\n%s", content)
            self._parse = self._parseAlias
            self._fileno += 1
        elif stream.name.endswith('tair.txt'):
            logging.debug('parsing EntrezGene links')
            self._parse = self._parseEntrez
            self._fileno += 1
        else:
            raise RuntimeError('unknown TAIR file "{}"'.format(stream.name))

        return lines
Example #7
0
class Snakefile:
    def __init__(self, path):
        self.path = path
        try:
            self.file = open(self.path, encoding="utf-8")
        except FileNotFoundError as e:
            try:
                self.file = TextIOWrapper(urllib.request.urlopen(self.path),
                                          encoding="utf-8")
            except (HTTPError, URLError, ContentTooShortError, ValueError):
                raise WorkflowError("Failed to open {}.".format(path))

        self.tokens = tokenize.generate_tokens(self.file.readline)
        self.rulecount = 0
        self.lines = 0

    def __next__(self):
        return next(self.tokens)

    def __iter__(self):
        return self

    def __enter__(self):
        return self

    def __exit__(self, *args):
        self.file.close()
Example #8
0
def pandoc(source, fmt, to, extra_args=None, encoding="utf-8"):
    """Convert an input string in format `from` to format `to` via pandoc.

    This function will raise an error if pandoc is not installed.
    Any error messages generated by pandoc are printed to stderr.

    Parameters
    ----------
    source : string
      Input string, assumed to be valid format `from`.
    fmt : string
      The name of the input format (markdown, etc.)
    to : string
      The name of the output format (html, etc.)

    Returns
    -------
    out : unicode
      Output as returned by pandoc.
    """
    command = ["pandoc", "-f", fmt, "-t", to]
    if extra_args:
        command.extend(extra_args)
    try:
        p = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    except OSError as e:
        raise PandocMissing(
            "The command '%s' returned an error: %s.\n" % (" ".join(command), e)
            + "Please check that pandoc is installed:\n"
            + "http://johnmacfarlane.net/pandoc/installing.html"
        )
    out, _ = p.communicate(cast_bytes(source, encoding))
    out = TextIOWrapper(BytesIO(out), encoding, "replace").read()
    return out.rstrip("\n")
def load_gff(gff):
    """Parses a single GFF file and returns a chromosome-indexed dict for
       that file.

    Arguments
    ---------
    gff: str
        Filepath to GFF

    Returns
    -------
    dict: A dictionary representation of the GFF entries, indexed by
            chromosome ID
    """
    annotations = {}

    if gff.endswith('.gz'):
        import gzip
        from io import TextIOWrapper
        fp = TextIOWrapper(gzip.open(gff))
    else:
        fp = open(gff)

    for entry in GFF.parse(fp):
        if len(entry.features) > 0 and entry.features[0].type == 'chromosome':
            annotations[entry.id] = entry
    fp.close()

    return annotations
Example #10
0
def _grep_a_file(bucketstr: str, key: str, regex: str,
                 output: io.TextIOWrapper):
    '''
    parse the s3 file line to see if it matches the regex
    if yes, dump the line into output buffer

    :param bucket:
    :param key:
    :param regex:
    :param output: the output buffer
    :return:
    '''
    s3 = boto3.resource('s3')
    bucket = s3.Bucket(bucketstr)
    for obj in bucket.objects.filter(Prefix=key):

        datadict = obj.get()

        instream = boto_stream.BotoStreamBody(datadict['Body'])
        instream = io.BufferedReader(instream, buffer_size=1 * 2 ^ 20)

        filename, file_extension = os.path.splitext(key)
        if file_extension == '.gz':
            instream = gzip.GzipFile(fileobj=instream, mode='rb')

        for line in io.TextIOWrapper(instream):
            if re.search(regex, line) is not None:
                output.write(obj.key + ":" + line)
Example #11
0
def _write_table(profile_dir, table_name, rows, fields,
                 append=False, gzip=False):
    # don't gzip if empty
    rows = iter(rows)
    try:
        first_row = next(rows)
    except StopIteration:
        gzip = False
    else:
        rows = chain([first_row], rows)
    if gzip and append:
        logging.warning('Appending to a gzip file may result in '
                        'inefficient compression.')

    if not os.path.exists(profile_dir):
        raise ItsdbError('Profile directory does not exist: {}'
                         .format(profile_dir))

    tbl_filename = os.path.join(profile_dir, table_name)
    mode = 'a' if append else 'w'
    if gzip:
        # text mode only from py3.3; until then use TextIOWrapper
        #mode += 't'  # text mode for gzip
        f = TextIOWrapper(gzopen(tbl_filename + '.gz', mode=mode))
    else:
        f = open(tbl_filename, mode=mode)

    for row in rows:
        f.write(make_row(row, fields) + '\n')

    f.close()
Example #12
0
    def __init__(self, args, environment, logic, user_options=None, LOGICS=None):
        Solver.__init__(self, environment, logic=logic, user_options=user_options)

        if LOGICS is not None:
            self.LOGICS = LOGICS
        self.args = args
        self.declared_vars = set()
        self.solver = Popen(args, stdout=PIPE, stderr=PIPE, stdin=PIPE)
        self.parser = SmtLibParser(interactive=True)
        if PY2:
            self.solver_stdin = self.solver.stdin
            self.solver_stdout = self.solver.stdout
        else:
            self.solver_stdin = TextIOWrapper(self.solver.stdin)
            self.solver_stdout = TextIOWrapper(self.solver.stdout)

        self.dbg = False

        # Initialize solver
        self._send_command(SmtLibCommand(smtcmd.SET_OPTION, [":print-success", "false"]))
        self._send_command(SmtLibCommand(smtcmd.SET_OPTION, [":produce-models", "true"]))

        if self.options is not None:
            for o, v in iteritems(self.options):
                self._send_command(SmtLibCommand(smtcmd.SET_OPTION, [o, v]))
        self._send_command(SmtLibCommand(smtcmd.SET_LOGIC, [logic]))
Example #13
0
def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
    """Converts a bytes string with python source code to unicode.

    Unicode strings are passed through unchanged. Byte strings are checked
    for the python source file encoding cookie to determine encoding.
    txt can be either a bytes buffer or a string containing the source
    code.
    """
    if isinstance(txt, unicode):
        return txt
    if isinstance(txt, bytes):
        buffer = BytesIO(txt)
    else:
        buffer = txt
    try:
        encoding, _ = detect_encoding(buffer.readline)
    except SyntaxError:
        encoding = "ascii"
    buffer.seek(0)
    text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
    text.mode = 'r'
    if skip_encoding_cookie:
        return u"".join(strip_encoding_cookie(text))
    else:
        return text.read()
Example #14
0
    def __init__(self, args, environment, logic, user_options=None,
                 LOGICS=None):
        Solver.__init__(self,
                        environment,
                        logic=logic,
                        user_options=user_options)
        # Flag used to debug interaction with the solver
        self.dbg = False

        if LOGICS is not None: self.LOGICS = LOGICS
        self.args = args
        self.declared_vars = set()
        self.solver = Popen(args, stdout=PIPE, stderr=PIPE, stdin=PIPE)
        self.parser = SmtLibParser(interactive=True)
        if PY2:
            self.solver_stdin = self.solver.stdin
            self.solver_stdout = self.solver.stdout
        else:
            self.solver_stdin = TextIOWrapper(self.solver.stdin)
            self.solver_stdout = TextIOWrapper(self.solver.stdout)

        # Initialize solver
        self.set_option(":print-success", "true")
        if self.options.generate_models:
            self.set_option(":produce-models", "true")
        # Redirect diagnostic output to stdout
        self.set_option(":diagnostic-output-channel", '"stdout"')
        if self.options is not None:
            for o,v in iteritems(self.options):
                self.set_option(o,v)
        self.set_logic(logic)
def read_py_url(url, errors='replace', skip_encoding_cookie=True):
    """Read a Python file from a URL, using the encoding declared inside the file.
    
    Parameters
    ----------
    url : str
      The URL from which to fetch the file.
    errors : str
      How to handle decoding errors in the file. Options are the same as for
      bytes.decode(), but here 'replace' is the default.
    skip_encoding_cookie : bool
      If True (the default), and the encoding declaration is found in the first
      two lines, that line will be excluded from the output - compiling a
      unicode string with an encoding declaration is a SyntaxError in Python 2.
    
    Returns
    -------
    A unicode string containing the contents of the file.
    """
    response = urllib.request.urlopen(url)
    buffer = io.BytesIO(response.read())
    encoding, lines = detect_encoding(buffer.readline)
    buffer.seek(0)
    text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
    text.mode = 'r'
    if skip_encoding_cookie:
        return "".join(strip_encoding_cookie(text))
    else:
        return text.read()
Example #16
0
def open(filename):
    buffer = builtins.open(filename, 'rb')
    (encoding, lines) = detect_encoding(buffer.readline)
    buffer.seek(0)
    text = TextIOWrapper(buffer, encoding, line_buffering=True)
    text.mode = 'r'
    return text
    def __init__(self, fobj, mode='rb', bufsize=-1, close=True):
        """
        :keyword fobj: Either an integer fileno, or an object supporting the
            usual :meth:`socket.fileno` method. The file *will* be
            put in non-blocking mode using :func:`gevent.os.make_nonblocking`.
        :keyword str mode: The manner of access to the file, one of "rb", "rU" or "wb"
            (where the "b" or "U" can be omitted).
            If "U" is part of the mode, IO will be done on text, otherwise bytes.
        :keyword int bufsize: If given, the size of the buffer to use. The default
            value means to use a platform-specific default, and a value of 0 is translated
            to a value of 1. Other values are interpreted as for the :mod:`io` package.
            Buffering is ignored in text mode.
        """
        if isinstance(fobj, int):
            fileno = fobj
            fobj = None
        else:
            fileno = fobj.fileno()
        if not isinstance(fileno, int):
            raise TypeError('fileno must be int: %r' % fileno)

        orig_mode = mode
        mode = (mode or 'rb').replace('b', '')
        if 'U' in mode:
            self._translate = True
            mode = mode.replace('U', '')
        else:
            self._translate = False

        if len(mode) != 1 and mode not in 'rw': # pragma: no cover
            # Python 3 builtin `open` raises a ValueError for invalid modes;
            # Python 2 ignores it. In the past, we raised an AssertionError, if __debug__ was
            # enabled (which it usually was). Match Python 3 because it makes more sense
            # and because __debug__ may not be enabled.
            # NOTE: This is preventing a mode like 'rwb' for binary random access;
            # that code was never tested and was explicitly marked as "not used"
            raise ValueError('mode can only be [rb, rU, wb], not %r' % (orig_mode,))

        self._fobj = fobj
        self._closed = False
        self._close = close

        self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close)

        if bufsize < 0 or bufsize == 1:
            bufsize = self.default_bufsize
        elif bufsize == 0:
            bufsize = 1

        if mode == 'r':
            self.io = BufferedReader(self.fileio, bufsize)
        else:
            assert mode == 'w'
            self.io = BufferedWriter(self.fileio, bufsize)
        #else: # QQQ: not used, not reachable
        #
        #    self.io = BufferedRandom(self.fileio, bufsize)

        if self._translate:
            self.io = TextIOWrapper(self.io)
Example #18
0
 def _get_timeseries_without_moving_file_position(self, datastream):
     original_position = datastream.tell()
     wrapped_datastream = TextIOWrapper(datastream, encoding="utf-8", newline="\n")
     result = HTimeseries.read(wrapped_datastream)
     wrapped_datastream.detach()  # If we don't do this the datastream will be closed
     datastream.seek(original_position)
     return result
Example #19
0
def main():
    for task, digest in CVEDA_PSYTOOLS_DATASETS:
        digest = digest.upper().replace(' ', '_')
        dataset = 'IMAGEN-{task}-{digest}.csv'.format(task=task, digest=digest)
        logging.info('downloading: {0}'.format(dataset))
        url = BASE_URL + dataset + '.gz'
        # let Requests use ~/.netrc instead of passing an auth parameter
        #     auth = requests.auth.HTTPBasicAuth('...', '...')
        # no need to expose identifiers in the code!
        r = requests.get(url, verify=CA_BUNDLE)
        compressed_data = BytesIO(r.content)
        with gzip.GzipFile(fileobj=compressed_data) as uncompressed_data:
            # unfold quoted text spanning multiple lines
            uncompressed_data = TextIOWrapper(uncompressed_data)
            data = QUOTED_PATTERN.sub(lambda x: x.group().replace('\n', '/'),
                                      uncompressed_data.read())
            # skip files that have not changed since last update
            psytools_path = os.path.join(PSYTOOLS_PSC1_DIR, dataset)
            if os.path.isfile(psytools_path):
                with open(psytools_path, 'r') as uncompressed_file:
                    if uncompressed_file.read() == data:
                        logging.info('skip unchanged file: {0}'
                                     .format(psytools_path))
                        continue
            # write downloaded data into file
            with open(psytools_path, 'w') as uncompressed_file:
                logging.info('write file: {0}'.format(psytools_path))
                uncompressed_file.write(data)
def test_read_text2(conn):
    conn.send_request('GET', '/send_%d_bytes' % len(DUMMY_DATA))
    conn.read_response()
    fh = TextIOWrapper(conn)

    # This used to fail because TextIOWrapper can't deal with bytearrays
    fh.read(42)
Example #21
0
def test_readline(rf, fn):
    f = rf.open(fn)
    tr = TextIOWrapper(BufferedReader(f))
    while 1:
        ln = tr.readline()
        if not ln:
            break
    tr.close()
Example #22
0
 def test_partial_decode_wait(self):
     reader = StreamReader()
     wrapped = TextIOWrapper(reader, 'utf-8')
     buf = u'20 \u20ac'.encode('utf-8')
     reader.feed(buf[:-1])
     def write_last():
         gruvi.sleep(0.01)
         reader.feed(buf[-1:])
     gruvi.spawn(write_last)
     self.assertEqual(wrapped.read(4), u'20 \u20ac')
 def open(filename):
     """Open a file in read only mode using the encoding detected by
     detect_encoding().
     """
     buffer = io.open(filename, 'rb')   # Tweaked to use io.open for Python 2
     encoding, lines = detect_encoding(buffer.readline)
     buffer.seek(0)
     text = TextIOWrapper(buffer, encoding, line_buffering=True)
     text.mode = 'r'
     return text   
Example #24
0
def open(filename):
    """Open a file in read only mode using the encoding detected by
    detect_encoding().
    """
    buffer = builtins.open(filename, 'rb')
    encoding, lines = detect_encoding(buffer.readline)
    buffer.seek(0)
    text = TextIOWrapper(buffer, encoding, line_buffering=True)
    text.mode = 'r'
    return text
Example #25
0
    def header_bytes(self, any_chunks):

        if self.status_code not in (100, 204):
            enc_hdr, enc_msg = self.encapsulated(any_chunks)
            self.headers['Encapsulated'] = enc_hdr
        else:
            enc_msg = None
            if any_chunks:
                # http://www.measurement-factory.com/std/icap/#e1
                raise ValueError("no encapsulation allowed")

        bio = BytesIO()
        sio = TextIOWrapper(bio, encoding='iso-8859-1')

        status_line = u'{} {} {}\r\n'.format(self.protocol,
                                             self.status_code,
                                             self.reason)
        sio.write(status_line)
        for key, value in iteritems(self.headers):
            if isinstance(value, list):
                values = [text_type(v) for v in value]
                line = u'{}: {}\r\n'.format(key, ', '.join(values))
            else:
                line = u'{}: {}\r\n'.format(key, value)

            sio.write(line)
        sio.write(u'\r\n')
        sio.flush()

        if enc_msg:
            bio.write(enc_msg)

        return bio.getvalue()
Example #26
0
 def readlines(filename):
     """Read the source code."""
     try:
         with open(filename, 'rb') as f:
             (coding, lines) = tokenize.detect_encoding(f.readline)
             f = TextIOWrapper(f, coding, line_buffering=True)
             return [l.decode(coding) for l in lines] + f.readlines()
     except (LookupError, SyntaxError, UnicodeError):
         # Fall back if file encoding is improperly declared
         with open(filename, encoding='latin-1') as f:
             return f.readlines()
Example #27
0
File: rgd.py Project: fnl/gnamed
    def _setup(self, stream: io.TextIOWrapper):
        lines = super(Parser, self)._setup(stream)
        content = stream.readline().strip()
        lines += 1

        while content.startswith('#'):
            content = stream.readline().strip()
            lines += 1

        logging.debug("file header:\n%s", content)
        return lines
    def __init__(self, fobj, mode='rb', bufsize=-1, close=True):
        """
        :param fobj: Either an integer fileno, or an object supporting the
            usual :meth:`socket.fileno` method. The file will be
            put in non-blocking mode.
        """
        if isinstance(fobj, int):
            fileno = fobj
            fobj = None
        else:
            fileno = fobj.fileno()
        if not isinstance(fileno, int):
            raise TypeError('fileno must be int: %r' % fileno)

        orig_mode = mode
        mode = (mode or 'rb').replace('b', '')
        if 'U' in mode:
            self._translate = True
            mode = mode.replace('U', '')
        else:
            self._translate = False
        if len(mode) != 1:
            # Python 3 builtin `open` raises a ValueError for invalid modes;
            # Python 2 ignores in. In the past, we raised an AssertionError, if __debug__ was
            # enabled (which it usually was). Match Python 3 because it makes more sense
            # and because __debug__ may not be enabled
            raise ValueError('mode can only be [rb, rU, wb], not %r' % (orig_mode,))

        self._fobj = fobj
        self._closed = False
        self._close = close

        self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close)

        if bufsize < 0:
            bufsize = self.default_bufsize
        if mode == 'r':
            if bufsize == 0:
                bufsize = 1
            elif bufsize == 1:
                bufsize = self.default_bufsize
            self.io = BufferedReader(self.fileio, bufsize)
        elif mode == 'w':
            if bufsize == 0:
                bufsize = 1
            elif bufsize == 1:
                bufsize = self.default_bufsize
            self.io = BufferedWriter(self.fileio, bufsize)
        else:
            # QQQ: not used
            self.io = BufferedRandom(self.fileio, bufsize)
        if self._translate:
            self.io = TextIOWrapper(self.io)
def test_read_text3(conn):
    conn.send_request('GET', '/send_%d_bytes' % len(DUMMY_DATA))
    conn.read_response()
    fh = TextIOWrapper(conn)

    # This used to fail because TextIOWrapper tries to read from
    # the underlying fh even after getting ''
    while True:
        if not fh.read(77):
            break

    assert not conn.response_pending()
Example #30
0
 def getTextIO(self) -> TextIO:
     """	Gets the content of this File obj as StringIO buffer """
     self._read()
     return TextIOWrapper(self.content)
Example #31
0
                                'Exo', '1 Samuel'):
        print("  Searching for {!r} got {}".format(
            divisionAbbrevInput,
            bbns2.getDivisionBooklist(divisionAbbrevInput)))


# end of demo

if __name__ == '__main__':
    #multiprocessing.freeze_support() # Multiprocessing support for frozen Windows executables

    import sys
    if 'win' in sys.platform:  # Convert stdout so we don't get zillions of UnicodeEncodeErrors
        from io import TextIOWrapper
        sys.stdout = TextIOWrapper(
            sys.stdout.detach(), sys.stdout.encoding,
            'namereplace' if sys.version_info >=
            (3, 5) else 'backslashreplace')

    # Configure basic Bible Organisational System (BOS) set-up
    parser = BibleOrgSysGlobals.setup(ProgName, ProgVersion)
    parser.add_argument(
        "-p",
        "--expandDemo",
        action="store_true",
        dest="expandDemo",
        default=False,
        help=
        "expand the input abbreviations to include all unambiguous shorter forms"
    )
    BibleOrgSysGlobals.addStandardOptionsAndProcess(parser)
Example #32
0
def import_m3u(file: TextIOWrapper, args: dict) -> List[dict]:
    """
    Return a list of files in the playlist and their information
    :param file: the m3u file
    :param args: additional options
    :return: list of dicts of track information
    """
    first = file.readline().rstrip()
    if first == '#EXTM3U':
        ext = True
        on_inf = True  # Whether we are on a #EXTINF line
    else:
        file.seek(0)
        on_inf = False  # Whether we are on a #EXTINF line
        ext = False
    inf_duration = None
    inf_artist = None
    inf_track = None
    tracks = []
    for line in file:
        line = line.rstrip()
        # If we are on a #EXTINF line then parse it and continue
        if on_inf:
            line = line[7:]
            split_colon = line.split(':')
            inf_duration = split_colon[0] if len(split_colon) > 0 else None
            split_dash = line.split('-')
            inf_artist = split_dash[0].strip().split(
                ',', 1)[1] if len(split_dash) > 0 else None
            inf_track = split_dash[1] if len(split_dash) > 1 else None
            on_inf = False
            continue
        else:
            # Get file tags
            try:
                f = mu.File(line)
                info = {k.lower(): v for k, v in f.tags}
                info['duration'] = f.info.length
            except mu.MutagenError:
                info = {'filename': line}
            except KeyError:
                pass
            # Added info from m3u extended
            if ext:
                on_inf = True
                if args['m3u_ext']:
                    info['title'] = inf_track
                    info['artist'] = inf_artist
                    info['duration'] = inf_duration
                else:
                    if 'title' not in info or len(info['title']) == 0:
                        info['title'] = inf_track
                    if 'artist' not in info or len(info['artist']) == 0:
                        info['arist'] = inf_artist
                    if 'duration' not in info or info['duration'] == 0:
                        info['duration'] = inf_duration
            if args['hash']:
                try:
                    with open(line, 'rb') as hash_file:
                        hasher = md5()
                        buffer = hash_file.read()
                        hasher.update(buffer)
                        info['hash'] = hasher.hexdigest()
                except FileNotFoundError:
                    pass
            info['duration'] = round(info['duration'])
            tracks.append(info)
    return tracks
Example #33
0
    def __init__(self, fobj, mode='rb', bufsize=-1, close=True):
        """
        :keyword fobj: Either an integer fileno, or an object supporting the
            usual :meth:`socket.fileno` method. The file *will* be
            put in non-blocking mode using :func:`gevent.os.make_nonblocking`.
        :keyword str mode: The manner of access to the file, one of "rb", "rU" or "wb"
            (where the "b" or "U" can be omitted).
            If "U" is part of the mode, IO will be done on text, otherwise bytes.
        :keyword int bufsize: If given, the size of the buffer to use. The default
            value means to use a platform-specific default, and a value of 0 is translated
            to a value of 1. Other values are interpreted as for the :mod:`io` package.
            Buffering is ignored in text mode.
        """
        if isinstance(fobj, int):
            fileno = fobj
            fobj = None
        else:
            fileno = fobj.fileno()
        if not isinstance(fileno, int):
            raise TypeError('fileno must be int: %r' % fileno)

        orig_mode = mode
        mode = (mode or 'rb').replace('b', '')
        if 'U' in mode:
            self._translate = True
            mode = mode.replace('U', '')
        else:
            self._translate = False

        if len(mode) != 1 and mode not in 'rw': # pragma: no cover
            # Python 3 builtin `open` raises a ValueError for invalid modes;
            # Python 2 ignores it. In the past, we raised an AssertionError, if __debug__ was
            # enabled (which it usually was). Match Python 3 because it makes more sense
            # and because __debug__ may not be enabled.
            # NOTE: This is preventing a mode like 'rwb' for binary random access;
            # that code was never tested and was explicitly marked as "not used"
            raise ValueError('mode can only be [rb, rU, wb], not %r' % (orig_mode,))

        self._fobj = fobj
        self._closed = False
        self._close = close

        self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close)

        if bufsize < 0 or bufsize == 1:
            bufsize = self.default_bufsize
        elif bufsize == 0:
            bufsize = 1

        if mode == 'r':
            IOFamily = BufferedReader
        else:
            assert mode == 'w'
            IOFamily = BufferedWriter

        self.io = IOFamily(self.fileio, bufsize)
        #else: # QQQ: not used, not reachable
        #
        #    self.io = BufferedRandom(self.fileio, bufsize)

        if self._translate:
            self.io = TextIOWrapper(self.io)
Example #34
0
from io import TextIOWrapper
from zipfile import ZipFile

serList = []  #for tracking serial numbers

with open("harddrive_end_status", "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["date", "serial_number", "model", "fail_status", \
        "hours_running", "smart_5", "smart_187", "smart_188", "smart_197", \
        "smart_198"])
    with ZipFile("data_Q3_2020.zip") as zf:
        fileList = ZipFile.namelist(zf)
        n = len(fileList)
        for i in range(n):
            with zf.open(fileList[n - 1 - i], "r") as infile:
                reader = csv.reader(TextIOWrapper(infile))
                for j in reader:
                    if j[2] == "ST12000NM0008" or \
                            j[2] == "TOSHIBA MG07ACA14TA":
                        if j[1] not in serList:
                            serList.append(j[1])
                            writer.writerow([j[0], j[1], j[2], j[4], j[20], \
                            j[14], j[64], j[66], j[84], j[86]])

rest_files_list = ["data_Q2_2020.zip", "data_Q1_2020.zip", \
                    "data_Q4_2019.zip"]

with open("harddrive_end_status", "a", newline="") as file:
    for k in rest_files_list:
        writer = csv.writer(file)
        with ZipFile(k) as zf:
Example #35
0
    def deserialize_workflow_spec(self, s_state, filename=None):
        """
        :param s_state: a byte-string with the contents of the packaged workflow archive, or a file-like object.
        :param filename: the name of the package file.
        """
        if isinstance(s_state, (str, bytes)):
            s_state = BytesIO(s_state)

        package_zip = zipfile.ZipFile(s_state,
                                      "r",
                                      compression=zipfile.ZIP_DEFLATED)
        config = configparser.SafeConfigParser()
        ini_fp = TextIOWrapper(package_zip.open(Packager.METADATA_FILE),
                               encoding="UTF-8")
        try:
            config.readfp(ini_fp)
        finally:
            ini_fp.close()

        parser_class = BpmnParser

        try:
            parser_class_module = config.get('MetaData',
                                             'parser_class_module',
                                             fallback=None)
        except TypeError:
            # unfortunately the fallback= does not exist on python 2
            try:
                parser_class_module = config.get('MetaData',
                                                 'parser_class_module')
            except configparser.NoOptionError:
                parser_class_module = None

        if parser_class_module:
            mod = __import__(parser_class_module,
                             fromlist=[config.get('MetaData', 'parser_class')])
            parser_class = getattr(mod, config.get('MetaData', 'parser_class'))

        parser = parser_class()

        for info in package_zip.infolist():
            parts = os.path.split(info.filename)
            if len(parts) == 2 and not parts[0] and parts[1].lower().endswith(
                    '.bpmn'):
                #It is in the root of the ZIP and is a BPMN file
                try:
                    svg = package_zip.read(info.filename[:-5] + '.svg')
                except KeyError as e:
                    svg = None

                bpmn_fp = package_zip.open(info)
                try:
                    bpmn = ET.parse(bpmn_fp)
                finally:
                    bpmn_fp.close()

                parser.add_bpmn_xml(bpmn,
                                    svg=svg,
                                    filename='%s:%s' %
                                    (filename, info.filename))

        return parser.get_spec(config.get('MetaData', 'entry_point_process'))
Example #36
0
def validate_fasta_file(text, *args, **kwargs):
    return validate_fasta(TextIOWrapper(text), *args, **kwargs)
Example #37
0
 def load_data(self):
     format = self.file.name.split(".")[-1]
     with self.file.open() as f:
         if format == 'csv':
             f = TextIOWrapper(f, "utf-8")
         return Dataset().load(f, format=format)
Example #38
0
#             print("Error happened when parsing the file: " + self.sourceFileName)
#             raise
#         except HTTPException as e:
#             print("Error happened when get the response from remote URL")
#             raise
#         except Exception as e:
#             print("Unknown error")
#             raise
#         finally:
#             for connection in c:
#                 connection.close()
#             f.close()

if __name__ == '__main__':
    sys.stdout = TextIOWrapper(sys.stdout.buffer,
                               encoding='utf-8',
                               errors='replace')
    today = datetime.now()

    # the path of your source file and destination file
    pathName = "./"

    # be sure to modify the following to reflect your file name (absolute path)
    fileName = pathName + "./SME_Closed.csv"
    #fileName="./1.csv"
    #fileName="./2.csv"

    # 從統編得到公司 or 商號名稱
    # if you'd like to have your own output file name, please modify the following
    # outputFileName=pathName+"./parser_category_"+today.strftime("%Y%m%d%H%M%S_%s")+".csv"
    # # be sure to urlencode for each param
Example #39
0
File: views.py Project: nbashev/noc
    def api_report(
        self,
        request,
        reporttype=None,
        from_date=None,
        to_date=None,
        object_profile=None,
        filter_default=None,
        exclude_zero=True,
        interface_profile=None,
        selector=None,
        administrative_domain=None,
        columns=None,
        description=None,
        o_format=None,
        enable_autowidth=False,
        **kwargs,
    ):
        def load(mo_ids):
            # match = {"links.mo": {"$in": mo_ids}}
            match = {"int.managed_object": {"$in": mo_ids}}
            group = {
                "_id": "$_id",
                "links": {
                    "$push": {
                        "iface_n": "$int.name",
                        # "iface_id": "$int._id",
                        # "iface_descr": "$int.description",
                        # "iface_speed": "$int.in_speed",
                        # "dis_method": "$discovery_method",
                        # "last_seen": "$last_seen",
                        "mo": "$int.managed_object",
                        "linked_obj": "$linked_objects",
                    }
                },
            }
            value = (get_db()["noc.links"].with_options(
                read_preference=ReadPreference.SECONDARY_PREFERRED).aggregate(
                    [
                        {
                            "$unwind": "$interfaces"
                        },
                        {
                            "$lookup": {
                                "from": "noc.interfaces",
                                "localField": "interfaces",
                                "foreignField": "_id",
                                "as": "int",
                            }
                        },
                        {
                            "$match": match
                        },
                        {
                            "$group": group
                        },
                    ],
                    allowDiskUse=True,
                ))

            res = defaultdict(dict)

            for v in value:
                if v["_id"]:
                    for vv in v["links"]:
                        if len(vv["linked_obj"]) == 2:
                            mo = vv["mo"][0]
                            iface = vv["iface_n"]
                            for i in vv["linked_obj"]:
                                if mo != i:
                                    res[mo][i] = iface[0]
            return res

        def translate_row(row, cmap):
            return [row[i] for i in cmap]

        def str_to_float(str):
            return float("{0:.3f}".format(float(str)))

        cols = [
            "object_id",
            "object_name",
            "object_address",
            "object_platform",
            "object_adm_domain",
            "object_segment",
            "object_container",
            # "object_hostname",
            # "object_status",
            # "profile_name",
            # "object_profile",
            # "object_vendor",
            "iface_name",
            "iface_description",
            "iface_speed",
            "max_load_in",
            "max_load_in_time",
            "max_load_out",
            "max_load_out_time",
            "avg_load_in",
            "avg_load_out",
            "total_in",
            "total_out",
            "uplink_iface_name",
            "uplink_iface_description",
            "uplink_iface_speed",
            "uplink_max_load_in",
            "uplink_max_load_in_time",
            "uplink_max_load_out",
            "uplink_max_load_out_time",
            "uplink_avg_load_in",
            "uplink_avg_load_out",
            "uplink_total_in",
            "uplink_total_out",
        ]

        header_row = [
            "ID",
            _("OBJECT_NAME"),
            _("OBJECT_ADDRESS"),
            _("OBJECT_PLATFORM"),
            _("OBJECT_ADMDOMAIN"),
            _("OBJECT_SEGMENT"),
            _("CONTAINER_ADDRESS"),
            _("IFACE_NAME"),
            _("IFACE_DESCRIPTION"),
            _("IFACE_SPEED"),
            _("MAX_LOAD_IN, Mbps"),
            _("MAX_LOAD_IN_TIME"),
            _("MAX_LOAD_OUT, Mbps"),
            _("MAX_LOAD_OUT_TIME"),
            _("AVG_LOAD_IN, Mbps"),
            _("AVG_LOAD_OUT, Mbps"),
            _("TOTAL_IN, Mbyte"),
            _("TOTAL_OUT, Mbyte"),
            _("UPLINK_IFACE_NAME"),
            _("UPLINK_IFACE_DESCRIPTION"),
            _("UPLINK_IFACE_SPEED"),
            _("UPLINK_MAX_LOAD_IN, Mbps"),
            _("UPLINK_MAX_TIME_IN"),
            _("UPLINK_MAX_LOAD_OUT, Mbps"),
            _("UPLINK_MAX_TIME_OUT"),
            _("UPLINK_AVG_LOAD_IN, Mbps"),
            _("UPLINK_AVG_LOAD_OUT, Mbps"),
            _("UPLINK_TOTAL_IN, Mbyte"),
            _("UPLINK_TOTAL_OUT, Mbyte"),
        ]

        if columns:
            cmap = []
            for c in columns.split(","):
                try:
                    cmap += [cols.index(c)]
                except ValueError:
                    continue
        else:
            cmap = list(range(len(cols)))
        columns_order = columns.split(",")
        columns_filter = set(columns_order)
        r = [translate_row(header_row, cmap)]

        # Date Time Block
        if not from_date:
            from_date = datetime.datetime.now() - datetime.timedelta(days=1)
        else:
            from_date = datetime.datetime.strptime(from_date, "%d.%m.%Y")
        if not to_date or from_date == to_date:
            to_date = from_date + datetime.timedelta(days=1)
        else:
            to_date = datetime.datetime.strptime(
                to_date, "%d.%m.%Y") + datetime.timedelta(days=1)
        diff = to_date - from_date

        # Load managed objects
        mos = ManagedObject.objects.filter(is_managed=True)
        if not request.user.is_superuser:
            mos = mos.filter(
                administrative_domain__in=UserAccess.get_domains(request.user))
        if selector:
            mos = mos.filter(
                ManagedObjectSelector.objects.get(id=int(selector)).Q)
        if administrative_domain:
            mos = mos.filter(
                administrative_domain__in=AdministrativeDomain.get_nested_ids(
                    int(administrative_domain)))
        if object_profile:
            mos = mos.filter(object_profile=object_profile)
        if interface_profile:
            interface_profile = InterfaceProfile.objects.filter(
                id=interface_profile).first()

        mo_attrs = namedtuple("MOATTRs",
                              [c for c in cols if c.startswith("object")])

        containers_address = {}
        if "object_container" in columns_filter:
            containers_address = ReportContainerData(
                set(mos.values_list("id", flat=True)))
            containers_address = dict(list(containers_address.extract()))

        moss = {}
        for row in mos.values_list("bi_id", "name", "address", "platform",
                                   "administrative_domain__name", "segment",
                                   "id"):
            moss[row[0]] = mo_attrs(*[
                row[6],
                row[1],
                row[2],
                smart_text(Platform.get_by_id(row[3]) if row[3] else ""),
                row[4],
                smart_text(NetworkSegment.get_by_id(row[5])) if row[5] else "",
                containers_address.
                get(row[6], "") if containers_address and row[6] else "",
            ])

        report_metric = ReportInterfaceMetrics(tuple(sorted(moss)),
                                               from_date,
                                               to_date,
                                               columns=None)
        report_metric.SELECT_QUERY_MAP = {
            (0, "managed_object", "id"): "managed_object",
            (1, "path", "iface_name"): "arrayStringConcat(path)",
            (
                2,
                "",
                "iface_description",
            ):
            "dictGetString('interfaceattributes','description' , (managed_object, arrayStringConcat(path)))",
            (
                3,
                "",
                "profile",
            ):
            "dictGetString('interfaceattributes', 'profile', (managed_object, arrayStringConcat(path)))",
            (
                4,
                "speed",
                "iface_speed",
            ):
            "dictGetUInt64('interfaceattributes', 'in_speed', (managed_object, arrayStringConcat(path)))",
            (5, "load_in_max", "load_in_max"): "divide(max(load_in),1048576)",
            (6, "load_out_max", "load_out_max"):
            "divide(max(load_out),1048576)",
            (7, "max_load_in_time", "max_load_in_time"): "argMax(ts,load_in)",
            (8, "max_load_out_time", "max_load_out_time"):
            "argMax(ts,load_out)",
            (9, "avg_load_in", "avg_load_in"): "divide(avg(load_in),1048576)",
            (10, "avg_load_out", "avg_load_out"):
            "divide(avg(load_out),1048576)",
        }
        ifaces_metrics = defaultdict(dict)

        for row in report_metric.do_query():
            avg_in = str_to_float(row[9])
            avg_out = str_to_float(row[10])
            total_in = avg_in * diff.total_seconds() / 8
            total_out = avg_out * diff.total_seconds() / 8
            ifaces_metrics[row[0]][row[1]] = {
                "description": row[2],
                "profile": row[3],
                "bandwidth": row[4],
                "max_load_in": str_to_float(row[5]),
                "max_load_out": str_to_float(row[6]),
                "max_load_in_time": row[7],
                "max_load_out_time": row[8],
                "avg_load_in": avg_in,
                "avg_load_out": avg_out,
                "total_in": float("{0:.1f}".format(total_in)),
                "total_out": float("{0:.1f}".format(total_out)),
            }

        # find uplinks
        links = {}
        if cmap[-1] > 17:
            mos_id = list(mos.values_list("id", flat=True))
            uplinks = {obj: [] for obj in mos_id}
            for d in ObjectData._get_collection().find(
                {"_id": {
                    "$in": mos_id
                }}, {
                    "_id": 1,
                    "uplinks": 1
                }):
                uplinks[d["_id"]] = d.get("uplinks", [])
            rld = load(mos_id)

            for mo in uplinks:
                for uplink in uplinks[mo]:
                    if rld[mo]:
                        if mo in links:
                            links[mo] += [rld[mo][uplink]]
                        else:
                            links[mo] = [rld[mo][uplink]]

        for mo_bi in ifaces_metrics:
            mo_id = moss[int(mo_bi)]
            mo_ids = getattr(mo_id, "object_id")

            for i in ifaces_metrics[mo_bi]:
                if not exclude_zero:
                    if (ifaces_metrics[mo_bi][i]["max_load_in"] == 0
                            and ifaces_metrics[mo_bi][i]["max_load_out"] == 0):
                        continue
                if description:
                    if description not in ifaces_metrics[mo_bi][i][
                            "description"]:
                        continue
                if interface_profile:
                    if interface_profile.name not in ifaces_metrics[mo_bi][i][
                            "profile"]:
                        continue

                row2 = [
                    mo_ids,
                    getattr(mo_id, "object_name"),
                    getattr(mo_id, "object_address"),
                    getattr(mo_id, "object_platform"),
                    getattr(mo_id, "object_adm_domain"),
                    getattr(mo_id, "object_segment"),
                    getattr(mo_id, "object_container"),
                    i,
                    ifaces_metrics[mo_bi][i]["description"],
                    ifaces_metrics[mo_bi][i]["bandwidth"],
                    ifaces_metrics[mo_bi][i]["max_load_in"],
                    ifaces_metrics[mo_bi][i]["max_load_in_time"],
                    ifaces_metrics[mo_bi][i]["max_load_out"],
                    ifaces_metrics[mo_bi][i]["max_load_out_time"],
                    ifaces_metrics[mo_bi][i]["avg_load_in"],
                    ifaces_metrics[mo_bi][i]["avg_load_out"],
                    ifaces_metrics[mo_bi][i]["total_in"],
                    ifaces_metrics[mo_bi][i]["total_out"],
                    "",
                    "",
                    "",
                    "",
                    "",
                    "",
                    "",
                    "",
                    "",
                    "",
                    "",
                ]

                ss = True
                if mo_ids in links:
                    for ifname_uplink in links[mo_ids]:
                        if ifname_uplink in ifaces_metrics[mo_bi]:
                            row2[18] = ifname_uplink
                            row2[19] = ifaces_metrics[mo_bi][ifname_uplink][
                                "description"]
                            row2[20] = ifaces_metrics[mo_bi][ifname_uplink][
                                "bandwidth"]
                            row2[21] = ifaces_metrics[mo_bi][ifname_uplink][
                                "max_load_in"]
                            row2[22] = ifaces_metrics[mo_bi][ifname_uplink][
                                "max_load_in_time"]
                            row2[23] = ifaces_metrics[mo_bi][ifname_uplink][
                                "max_load_out"]
                            row2[24] = ifaces_metrics[mo_bi][ifname_uplink][
                                "max_load_out_time"]
                            row2[25] = ifaces_metrics[mo_bi][ifname_uplink][
                                "avg_load_in"]
                            row2[26] = ifaces_metrics[mo_bi][ifname_uplink][
                                "avg_load_out"]
                            row2[27] = ifaces_metrics[mo_bi][ifname_uplink][
                                "total_in"]
                            row2[28] = ifaces_metrics[mo_bi][ifname_uplink][
                                "total_out"]
                            r += [translate_row(row2, cmap)]
                            ss = False
                if ss:
                    r += [translate_row(row2, cmap)]

        filename = "metrics_detail_report_%s" % datetime.datetime.now(
        ).strftime("%Y%m%d")
        if o_format == "csv":
            response = HttpResponse(content_type="text/csv")
            response[
                "Content-Disposition"] = 'attachment; filename="%s.csv"' % filename
            writer = csv.writer(response,
                                dialect="excel",
                                delimiter=",",
                                quoting=csv.QUOTE_MINIMAL)
            writer.writerows(r)
            return response
        elif o_format == "csv_zip":
            response = BytesIO()
            f = TextIOWrapper(TemporaryFile(mode="w+b"), encoding="utf-8")
            writer = csv.writer(f,
                                dialect="excel",
                                delimiter=";",
                                quotechar='"')
            writer.writerows(r)
            f.seek(0)
            with ZipFile(response, "w", compression=ZIP_DEFLATED) as zf:
                zf.writestr("%s.csv" % filename, f.read())
                zf.filename = "%s.csv.zip" % filename
            # response = HttpResponse(content_type="text/csv")
            response.seek(0)
            response = HttpResponse(response.getvalue(),
                                    content_type="application/zip")
            response[
                "Content-Disposition"] = 'attachment; filename="%s.csv.zip"' % filename
            return response
        elif o_format == "xlsx":
            response = BytesIO()
            wb = xlsxwriter.Workbook(response)
            cf1 = wb.add_format({"bottom": 1, "left": 1, "right": 1, "top": 1})
            ws = wb.add_worksheet("Metrics")
            max_column_data_length = {}
            for rn, x in enumerate(r):
                for cn, c in enumerate(x):
                    if rn and (r[0][cn] not in max_column_data_length or
                               len(str(c)) > max_column_data_length[r[0][cn]]):
                        max_column_data_length[r[0][cn]] = len(str(c))
                    ws.write(rn, cn, c, cf1)
            ws.autofilter(0, 0, rn, cn)
            ws.freeze_panes(1, 0)
            for cn, c in enumerate(r[0]):
                # Set column width
                width = get_column_width(c)
                if enable_autowidth and width < max_column_data_length[c]:
                    width = max_column_data_length[c]
                ws.set_column(cn, cn, width=width)
            wb.close()
            response.seek(0)
            response = HttpResponse(response.getvalue(),
                                    content_type="application/vnd.ms-excel")
            response[
                "Content-Disposition"] = 'attachment; filename="%s.xlsx"' % filename
            response.close()
            return response
Example #40
0
def cli_compile(file: io.TextIOWrapper, level: str):
    """Compiles a source file into a binary."""
    compiler.compile(file.read(), level=getattr(logging, level.upper()))
Example #41
0
def get_handle(
    path_or_buf: FilePathOrBuffer,
    mode: str,
    encoding: str | None = None,
    compression: CompressionOptions = None,
    memory_map: bool = False,
    is_text: bool = True,
    errors: str | None = None,
    storage_options: StorageOptions = None,
) -> IOHandles:
    """
    Get file handle for given path/buffer and mode.

    Parameters
    ----------
    path_or_buf : str or file handle
        File path or object.
    mode : str
        Mode to open path_or_buf with.
    encoding : str or None
        Encoding to use.
    compression : str or dict, default None
        If string, specifies compression mode. If dict, value at key 'method'
        specifies compression mode. Compression mode must be one of {'infer',
        'gzip', 'bz2', 'zip', 'xz', None}. If compression mode is 'infer'
        and `filepath_or_buffer` is path-like, then detect compression from
        the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise
        no compression). If dict and compression mode is one of
        {'zip', 'gzip', 'bz2'}, or inferred as one of the above,
        other entries passed as additional compression options.

        .. versionchanged:: 1.0.0

           May now be a dict with key 'method' as compression mode
           and other keys as compression options if compression
           mode is 'zip'.

        .. versionchanged:: 1.1.0

           Passing compression options as keys in dict is now
           supported for compression modes 'gzip' and 'bz2' as well as 'zip'.

    memory_map : bool, default False
        See parsers._parser_params for more information.
    is_text : bool, default True
        Whether the type of the content passed to the file/buffer is string or
        bytes. This is not the same as `"b" not in mode`. If a string content is
        passed to a binary file/buffer, a wrapper is inserted.
    errors : str, default 'strict'
        Specifies how encoding and decoding errors are to be handled.
        See the errors argument for :func:`open` for a full list
        of options.
    storage_options: StorageOptions = None
        Passed to _get_filepath_or_buffer

    .. versionchanged:: 1.2.0

    Returns the dataclass IOHandles
    """
    # Windows does not default to utf-8. Set to utf-8 for a consistent behavior
    encoding = encoding or "utf-8"

    # read_csv does not know whether the buffer is opened in binary/text mode
    if _is_binary_mode(path_or_buf, mode) and "b" not in mode:
        mode += "b"

    # valdiate errors
    if isinstance(errors, str):
        errors = errors.lower()
    if errors not in (
            None,
            "strict",
            "ignore",
            "replace",
            "xmlcharrefreplace",
            "backslashreplace",
            "namereplace",
            "surrogateescape",
            "surrogatepass",
    ):
        raise ValueError(
            f"Invalid value for `encoding_errors` ({errors}). Please see " +
            "https://docs.python.org/3/library/codecs.html#error-handlers " +
            "for valid values.")

    # open URLs
    ioargs = _get_filepath_or_buffer(
        path_or_buf,
        encoding=encoding,
        compression=compression,
        mode=mode,
        storage_options=storage_options,
    )

    handle = ioargs.filepath_or_buffer
    handles: list[Buffer]

    # memory mapping needs to be the first step
    handle, memory_map, handles = _maybe_memory_map(
        handle,
        memory_map,
        ioargs.encoding,
        ioargs.mode,
        errors,
        ioargs.compression["method"] not in _compression_to_extension,
    )

    is_path = isinstance(handle, str)
    compression_args = dict(ioargs.compression)
    compression = compression_args.pop("method")

    if compression:
        # compression libraries do not like an explicit text-mode
        ioargs.mode = ioargs.mode.replace("t", "")

        # GZ Compression
        if compression == "gzip":
            if is_path:
                assert isinstance(handle, str)
                handle = gzip.GzipFile(
                    filename=handle,
                    mode=ioargs.mode,
                    **compression_args,
                )
            else:
                handle = gzip.GzipFile(
                    # error: Argument "fileobj" to "GzipFile" has incompatible type
                    # "Union[str, Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase,
                    # TextIOWrapper, mmap]]"; expected "Optional[IO[bytes]]"
                    fileobj=handle,  # type: ignore[arg-type]
                    mode=ioargs.mode,
                    **compression_args,
                )

        # BZ Compression
        elif compression == "bz2":
            handle = bz2.BZ2File(
                # Argument 1 to "BZ2File" has incompatible type "Union[str,
                # Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper,
                # mmap]]"; expected "Union[Union[str, bytes, _PathLike[str],
                # _PathLike[bytes]], IO[bytes]]"
                handle,  # type: ignore[arg-type]
                mode=ioargs.mode,
                **compression_args,
            )

        # ZIP Compression
        elif compression == "zip":
            handle = _BytesZipFile(handle, ioargs.mode, **compression_args)
            if handle.mode == "r":
                handles.append(handle)
                zip_names = handle.namelist()
                if len(zip_names) == 1:
                    handle = handle.open(zip_names.pop())
                elif len(zip_names) == 0:
                    raise ValueError(
                        f"Zero files found in ZIP file {path_or_buf}")
                else:
                    raise ValueError("Multiple files found in ZIP file. "
                                     f"Only one file per ZIP: {zip_names}")

        # XZ Compression
        elif compression == "xz":
            handle = get_lzma_file(lzma)(handle, ioargs.mode)

        # Unrecognized Compression
        else:
            msg = f"Unrecognized compression type: {compression}"
            raise ValueError(msg)

        assert not isinstance(handle, str)
        handles.append(handle)

    elif isinstance(handle, str):
        # Check whether the filename is to be opened in binary mode.
        # Binary mode does not support 'encoding' and 'newline'.
        if ioargs.encoding and "b" not in ioargs.mode:
            # Encoding
            handle = open(
                handle,
                ioargs.mode,
                encoding=ioargs.encoding,
                errors=errors,
                newline="",
            )
        else:
            # Binary mode
            handle = open(handle, ioargs.mode)
        handles.append(handle)

    # Convert BytesIO or file objects passed with an encoding
    is_wrapped = False
    if is_text and (compression or _is_binary_mode(handle, ioargs.mode)):
        handle = TextIOWrapper(
            # error: Argument 1 to "TextIOWrapper" has incompatible type
            # "Union[IO[bytes], IO[Any], RawIOBase, BufferedIOBase, TextIOBase, mmap]";
            # expected "IO[bytes]"
            handle,  # type: ignore[arg-type]
            encoding=ioargs.encoding,
            errors=errors,
            newline="",
        )
        handles.append(handle)
        # only marked as wrapped when the caller provided a handle
        is_wrapped = not (isinstance(ioargs.filepath_or_buffer, str)
                          or ioargs.should_close)

    handles.reverse()  # close the most recently added buffer first
    if ioargs.should_close:
        assert not isinstance(ioargs.filepath_or_buffer, str)
        handles.append(ioargs.filepath_or_buffer)

    assert not isinstance(handle, str)
    return IOHandles(
        handle=handle,
        created_handles=handles,
        is_wrapped=is_wrapped,
        is_mmap=memory_map,
        compression=ioargs.compression,
    )
INSERT_QUERY = "INSERT INTO advertiser_weekly_spend ({}) VALUES ({}) ON CONFLICT (advertiser_id, week_start_date)  DO NOTHING".format(
    ', '.join([k for k in KEYS]), ', '.join([":" + k for k in KEYS]))


def load_advertiser_weekly_spend_to_db(csv_filelike):
    total_rows = 0
    start_time = datetime.now()
    for row in agate.Table.from_csv(csv_filelike):
        ad_data = {k.lower(): v for k, v in row.items() if k.lower() in KEYS}
        ad_data["spend_usd"] = ad_data["spend_usd"] or 0
        total_rows += 1
        print(ad_data)
        DB.query(INSERT_QUERY, **ad_data)
    duration = (datetime.now() - start_time)
    log1 = "loaded {} advertiser weekly spend records for this week in {}".format(
        total_rows, formattimedelta(duration))
    log.info(log1)
    info_to_slack("Google ads: " + log1)


if __name__ == "__main__":
    # csvfn = os.path.join(os.path.dirname(__file__), '..', 'data/google-political-ads-transparency-bundle/google-political-ads-advertiser-weekly-spend.csv')
    # with open(csvfn, 'r') as f:
    # load_advertiser_weekly_spend_to_db(f)
    local_dest_for_bundle = os.path.join(os.path.dirname(__file__), '..',
                                         'data')
    with get_current_bundle() as zip_file:
        bundle_date = get_bundle_date(zip_file)
        load_advertiser_weekly_spend_to_db(
            TextIOWrapper(BytesIO(get_advertiser_weekly_spend_csv(zip_file))))
Example #43
0
    def _print_figure(
            self, outfile, format, dpi, facecolor, edgecolor,
            orientation, papertype, *,
            metadata=None, dryrun=False, bbox_inches_restore=None, **kwargs):
        """
        Render the figure to hardcopy.  Set the figure patch face and
        edge colors.  This is useful because some of the GUIs have a
        gray figure face color background and you'll probably want to
        override this on hardcopy

        If outfile is a string, it is interpreted as a file name.
        If the extension matches .ep* write encapsulated postscript,
        otherwise write a stand-alone PostScript file.

        If outfile is a file object, a stand-alone PostScript file is
        written into this file object.

        metadata must be a dictionary. Currently, only the value for
        the key 'Creator' is used.
        """
        is_eps = format == 'eps'
        if isinstance(outfile, (str, os.PathLike)):
            outfile = title = os.fspath(outfile)
            title = title.encode("ascii", "replace").decode("ascii")
            passed_in_file_object = False
        elif is_writable_file_like(outfile):
            title = None
            passed_in_file_object = True
        else:
            raise ValueError("outfile must be a path or a file-like object")

        # find the appropriate papertype
        width, height = self.figure.get_size_inches()
        if papertype == 'auto':
            papertype = _get_papertype(
                *orientation.swap_if_landscape((width, height)))
        paper_width, paper_height = orientation.swap_if_landscape(
            papersize[papertype])

        if mpl.rcParams['ps.usedistiller']:
            # distillers improperly clip eps files if pagesize is too small
            if width > paper_width or height > paper_height:
                papertype = _get_papertype(
                    *orientation.swap_if_landscape(width, height))
                paper_width, paper_height = orientation.swap_if_landscape(
                    papersize[papertype])

        # center the figure on the paper
        xo = 72 * 0.5 * (paper_width - width)
        yo = 72 * 0.5 * (paper_height - height)

        l, b, w, h = self.figure.bbox.bounds
        llx = xo
        lly = yo
        urx = llx + w
        ury = lly + h
        rotation = 0
        if orientation is _Orientation.landscape:
            llx, lly, urx, ury = lly, llx, ury, urx
            xo, yo = 72 * paper_height - yo, xo
            rotation = 90
        bbox = (llx, lly, urx, ury)

        # generate PostScript code for the figure and store it in a string
        origfacecolor = self.figure.get_facecolor()
        origedgecolor = self.figure.get_edgecolor()
        self.figure.set_facecolor(facecolor)
        self.figure.set_edgecolor(edgecolor)

        if dryrun:
            class NullWriter:
                def write(self, *args, **kwargs):
                    pass

            self._pswriter = NullWriter()
        else:
            self._pswriter = StringIO()

        # mixed mode rendering
        ps_renderer = RendererPS(width, height, self._pswriter, imagedpi=dpi)
        renderer = MixedModeRenderer(
            self.figure, width, height, dpi, ps_renderer,
            bbox_inches_restore=bbox_inches_restore)

        self.figure.draw(renderer)

        if dryrun:  # return immediately if dryrun (tightbbox=True)
            return

        self.figure.set_facecolor(origfacecolor)
        self.figure.set_edgecolor(origedgecolor)

        # check for custom metadata
        if metadata is not None and 'Creator' in metadata:
            creator_str = metadata['Creator']
        else:
            creator_str = \
                f"matplotlib version {mpl.__version__}, http://matplotlib.org/"

        def print_figure_impl(fh):
            # write the PostScript headers
            if is_eps:
                print("%!PS-Adobe-3.0 EPSF-3.0", file=fh)
            else:
                print(f"%!PS-Adobe-3.0\n"
                      f"%%DocumentPaperSizes: {papertype}\n"
                      f"%%Pages: 1\n",
                      end="", file=fh)
            if title:
                print("%%Title: " + title, file=fh)
            # get source date from SOURCE_DATE_EPOCH, if set
            # See https://reproducible-builds.org/specs/source-date-epoch/
            source_date_epoch = os.getenv("SOURCE_DATE_EPOCH")
            if source_date_epoch:
                source_date = datetime.datetime.utcfromtimestamp(
                    int(source_date_epoch)).strftime("%a %b %d %H:%M:%S %Y")
            else:
                source_date = time.ctime()
            print(f"%%Creator: {creator_str}\n"
                  f"%%CreationDate: {source_date}\n"
                  f"%%Orientation: {orientation.name}\n"
                  f"%%BoundingBox: {bbox[0]} {bbox[1]} {bbox[2]} {bbox[3]}\n"
                  f"%%EndComments\n",
                  end="", file=fh)

            Ndict = len(psDefs)
            print("%%BeginProlog", file=fh)
            if not mpl.rcParams['ps.useafm']:
                Ndict += len(ps_renderer._character_tracker.used)
            print("/mpldict %d dict def" % Ndict, file=fh)
            print("mpldict begin", file=fh)
            for d in psDefs:
                d = d.strip()
                for l in d.split('\n'):
                    print(l.strip(), file=fh)
            if not mpl.rcParams['ps.useafm']:
                for font_path, chars \
                        in ps_renderer._character_tracker.used.items():
                    if not chars:
                        continue
                    font = get_font(font_path)
                    glyph_ids = [font.get_char_index(c) for c in chars]
                    fonttype = mpl.rcParams['ps.fonttype']
                    # Can't use more than 255 chars from a single Type 3 font.
                    if len(glyph_ids) > 255:
                        fonttype = 42
                    # The ttf to ps (subsetting) support doesn't work for
                    # OpenType fonts that are Postscript inside (like the STIX
                    # fonts).  This will simply turn that off to avoid errors.
                    if is_opentype_cff_font(font_path):
                        raise RuntimeError(
                            "OpenType CFF fonts can not be saved using "
                            "the internal Postscript backend at this "
                            "time; consider using the Cairo backend")
                    fh.flush()
                    try:
                        convert_ttf_to_ps(os.fsencode(font_path),
                                          fh, fonttype, glyph_ids)
                    except RuntimeError:
                        _log.warning("The PostScript backend does not "
                                     "currently support the selected font.")
                        raise
            print("end", file=fh)
            print("%%EndProlog", file=fh)

            if not is_eps:
                print("%%Page: 1 1", file=fh)
            print("mpldict begin", file=fh)

            print("%s translate" % _nums_to_str(xo, yo), file=fh)
            if rotation:
                print("%d rotate" % rotation, file=fh)
            print("%s clipbox" % _nums_to_str(width*72, height*72, 0, 0),
                  file=fh)

            # write the figure
            content = self._pswriter.getvalue()
            if not isinstance(content, str):
                content = content.decode('ascii')
            print(content, file=fh)

            # write the trailer
            print("end", file=fh)
            print("showpage", file=fh)
            if not is_eps:
                print("%%EOF", file=fh)
            fh.flush()

        if mpl.rcParams['ps.usedistiller']:
            # We are going to use an external program to process the output.
            # Write to a temporary file.
            with TemporaryDirectory() as tmpdir:
                tmpfile = os.path.join(tmpdir, "tmp.ps")
                with open(tmpfile, 'w', encoding='latin-1') as fh:
                    print_figure_impl(fh)
                if mpl.rcParams['ps.usedistiller'] == 'ghostscript':
                    gs_distill(tmpfile, is_eps, ptype=papertype, bbox=bbox)
                elif mpl.rcParams['ps.usedistiller'] == 'xpdf':
                    xpdf_distill(tmpfile, is_eps, ptype=papertype, bbox=bbox)
                _move_path_to_path_or_stream(tmpfile, outfile)

        else:
            # Write directly to outfile.
            if passed_in_file_object:
                requires_unicode = file_requires_unicode(outfile)

                if not requires_unicode:
                    fh = TextIOWrapper(outfile, encoding="latin-1")
                    # Prevent the TextIOWrapper from closing the underlying
                    # file.
                    fh.close = lambda: None
                else:
                    fh = outfile

                print_figure_impl(fh)
            else:
                with open(outfile, 'w', encoding='latin-1') as fh:
                    print_figure_impl(fh)
Example #44
0
def get_handle(
    path_or_buf,
    mode: str,
    encoding=None,
    compression: Optional[Union[str, Mapping[str, Any]]] = None,
    memory_map: bool = False,
    is_text: bool = True,
):
    """
    Get file handle for given path/buffer and mode.

    Parameters
    ----------
    path_or_buf : str or file handle
        File path or object.
    mode : str
        Mode to open path_or_buf with.
    encoding : str or None
        Encoding to use.
    compression : str or dict, default None
        If string, specifies compression mode. If dict, value at key 'method'
        specifies compression mode. Compression mode must be one of {'infer',
        'gzip', 'bz2', 'zip', 'xz', None}. If compression mode is 'infer'
        and `filepath_or_buffer` is path-like, then detect compression from
        the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise
        no compression). If dict and compression mode is 'zip' or inferred as
        'zip', other entries passed as additional compression options.

        .. versionchanged:: 1.0.0

           May now be a dict with key 'method' as compression mode
           and other keys as compression options if compression
           mode is 'zip'.

    memory_map : boolean, default False
        See parsers._parser_params for more information.
    is_text : boolean, default True
        whether file/buffer is in text format (csv, json, etc.), or in binary
        mode (pickle, etc.).

    Returns
    -------
    f : file-like
        A file-like object.
    handles : list of file-like objects
        A list of file-like object that were opened in this function.
    """
    try:
        from s3fs import S3File

        need_text_wrapping = (BufferedIOBase, S3File)
    except ImportError:
        need_text_wrapping = BufferedIOBase  # type: ignore

    handles: List[IO] = list()
    f = path_or_buf

    # Convert pathlib.Path/py.path.local or string
    path_or_buf = stringify_path(path_or_buf)
    is_path = isinstance(path_or_buf, str)

    compression, compression_args = get_compression_method(compression)
    if is_path:
        compression = infer_compression(path_or_buf, compression)

    if compression:

        # GZ Compression
        if compression == "gzip":
            if is_path:
                f = gzip.open(path_or_buf, mode)
            else:
                f = gzip.GzipFile(fileobj=path_or_buf)

        # BZ Compression
        elif compression == "bz2":
            if is_path:
                f = bz2.BZ2File(path_or_buf, mode)
            else:
                f = bz2.BZ2File(path_or_buf)

        # ZIP Compression
        elif compression == "zip":
            zf = _BytesZipFile(path_or_buf, mode, **compression_args)
            # Ensure the container is closed as well.
            handles.append(zf)
            if zf.mode == "w":
                f = zf
            elif zf.mode == "r":
                zip_names = zf.namelist()
                if len(zip_names) == 1:
                    f = zf.open(zip_names.pop())
                elif len(zip_names) == 0:
                    raise ValueError(
                        f"Zero files found in ZIP file {path_or_buf}")
                else:
                    raise ValueError("Multiple files found in ZIP file."
                                     f" Only one file per ZIP: {zip_names}")

        # XZ Compression
        elif compression == "xz":
            f = _get_lzma_file(lzma)(path_or_buf, mode)

        # Unrecognized Compression
        else:
            msg = f"Unrecognized compression type: {compression}"
            raise ValueError(msg)

        handles.append(f)

    elif is_path:
        if encoding:
            # Encoding
            f = open(path_or_buf, mode, encoding=encoding, newline="")
        elif is_text:
            # No explicit encoding
            f = open(path_or_buf, mode, errors="replace", newline="")
        else:
            # Binary mode
            f = open(path_or_buf, mode)
        handles.append(f)

    # Convert BytesIO or file objects passed with an encoding
    if is_text and (compression or isinstance(f, need_text_wrapping)):
        from io import TextIOWrapper

        g = TextIOWrapper(f, encoding=encoding, newline="")
        if not isinstance(f, BufferedIOBase):
            handles.append(g)
        f = g

    if memory_map and hasattr(f, "fileno"):
        try:
            wrapped = _MMapWrapper(f)
            f.close()
            f = wrapped
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f, handles
Example #45
0
def send_snap(snapshot,
              dest_name,
              base=None,
              ssh_dest=None,
              raw=False,
              resume=False,
              resume_token=None,
              dry_run=False):
    """Sends snapshot to destination, incrementally and over ssh if specified.

    Parameters:
    ----------
    snapshot : {ZFSSnapshot}
        Snapshot to send
    dest_name : {str}
        Name of the location to send snapshot
    base : {ZFSSnapshot}, optional
        Base snapshot for incremental stream (the default is None, meaning a full stream)
    ssh_dest : {ssh.SSH}, optional
        Open ssh connection for remote backup (the default is None, meaning local backup)
    dry_run : {boolean}, optional
        Don't change filesystem

    Returns
    -------
    int
        0 if success, 1 if not, 2 if CalledProcessError
    """

    logger = logging.getLogger(__name__)
    dest_name_log = '{:s}@{:s}:{:s}'.format(
        ssh_dest.user, ssh_dest.host, dest_name) if ssh_dest else dest_name

    try:
        ssh_source = snapshot.ssh
        stream_size = snapshot.stream_size(base=base,
                                           raw=raw,
                                           resume_token=resume_token)

        send = snapshot.send(ssh_dest=ssh_dest,
                             base=base,
                             intermediates=True,
                             raw=raw,
                             resume_token=resume_token)
        recv = zfs.receive(name=dest_name,
                           stdin=send.stdout,
                           ssh=ssh_dest,
                           ssh_source=ssh_source,
                           force=True,
                           nomount=True,
                           stream_size=stream_size,
                           raw=raw,
                           resume=resume,
                           dry_run=dry_run)
        send.stdout.close()

        # write pv output to stderr / stdout
        for line in TextIOWrapper(send.stderr, newline='\r'):
            if sys.stdout.isatty():
                sys.stderr.write('  ' + line)
                sys.stderr.flush()
            elif line.rstrip():  # is stdout is redirected, write pv to stdout
                sys.stdout.write('  ' + line.rstrip() + '\n')
                sys.stdout.flush()
        send.stderr.close()

        stdout, stderr = recv.communicate()
        # raise any error that occured
        if recv.returncode:
            raise CalledProcessError(returncode=recv.returncode,
                                     cmd=recv.args,
                                     output=stdout,
                                     stderr=stderr)

    except (DatasetNotFoundError, DatasetExistsError, DatasetBusyError,
            OSError, EOFError) as err:
        logger.error('Error while sending to {:s}: {}...'.format(
            dest_name_log, err))
        return 1
    except CalledProcessError as err:
        logger.error('Error while sending to {:s}: {}...'.format(
            dest_name_log,
            err.stderr.rstrip().decode().replace('\n', ' - ')))
        # returncode 2 means we will retry send if requested
        return 2
    except KeyboardInterrupt:
        logger.error(
            'KeyboardInterrupt while sending to {:s}...'.format(dest_name_log))
        raise
    else:
        return 0
Example #46
0
 def test_read_eof(self):
     reader = StreamReader()
     wrapped = TextIOWrapper(reader, 'utf-8')
     reader.feed(b'foo')
     reader.feed_eof()
     self.assertEqual(wrapped.read(), 'foo')
Example #47
0
def _wrap_output(outp):
    if isinstance(outp, BufferedReader):
        outp = TextIOWrapper(outp)
    return outp
Example #48
0
 def test_simple(self):
     reader = StreamReader()
     wrapped = TextIOWrapper(reader, 'utf-8')
     reader.feed(b'foo')
     self.assertEqual(wrapped.read(3), 'foo')
Example #49
0
def load_package_sources():
    """Discover all sources listed in ``sources.json``."""
    with resource_stream('pandasdmx', 'sources.json') as f:
        # TextIOWrapper is for Python 3.5 compatibility
        for info in json.load(TextIOWrapper(f)):
            add_source(info)
Example #50
0
File: csv.py Project: Xloka/jamovi
def read(data, path, prog_cb):

    file_size = os.stat(path).st_size

    with open(path, mode='rb') as file:

        byts = file.read(4096)
        det  = chardet.detect(byts)
        encoding = det['encoding']
        file.seek(0)

        if encoding == 'ascii':
            encoding = 'utf-8-sig'

        csvfile = TextIOWrapper(file, encoding=encoding, errors='replace')

        try:
            some_data = csvfile.read(131072)
            if len(some_data) == 131072:  # csv sniffer doesn't like partial lines
                some_data = trim_after_last_newline(some_data)
            dialect = csv.Sniffer().sniff(some_data, ', \t;')
        except csv.Error as e:
            log.exception(e)
            dialect = csv.excel

        dialect.doublequote = True

        csvfile.seek(0)
        reader = csv.reader(csvfile, dialect)

        itr = reader.__iter__()
        column_names = itr.__next__()

        column_count = 0
        column_writers = [ ]

        if len(column_names) == 0:
            column_names = ['A']

        for i in range(len(column_names)):
            column_name = column_names[i]
            data.append_column(column_name, column_name)
            column = data[i]
            column.column_type = ColumnType.DATA
            column_writers.append(ColumnWriter(column, i))
            column_count += 1

        row_count = 0

        csvfile.seek(0)
        reader = csv.reader(csvfile, dialect)
        first = True

        for row in reader:
            if first:
                first = False
            else:
                for i in range(column_count):
                    column_writers[i].examine_row(row)

                row_count += 1

            if row_count % 1000 == 0:
                prog_cb(0.33333 * file.tell() / file_size)

        for column_writer in column_writers:
            column_writer.ruminate()

        data.set_row_count(row_count)

        csvfile.seek(0)
        reader = csv.reader(csvfile, dialect)
        first = True

        row_no = 0

        for row in reader:
            if first:
                first = False
            else:
                for i in range(column_count):
                    column_writers[i].parse_row(row, row_no)
                row_no += 1

            if row_no % 1000 == 0:
                prog_cb(.33333 + .66666 * file.tell() / file_size)
Example #51
0
    def _print_figure(self,
                      outfile,
                      format,
                      *,
                      dpi,
                      dsc_comments,
                      orientation,
                      papertype,
                      dryrun=False,
                      bbox_inches_restore=None):
        """
        Render the figure to a filesystem path or a file-like object.

        Parameters are as for `.print_figure`, except that *dsc_comments* is a
        all string containing Document Structuring Convention comments,
        generated from the *metadata* parameter to `.print_figure`.
        """
        is_eps = format == 'eps'
        if isinstance(outfile, (str, os.PathLike)):
            outfile = os.fspath(outfile)
            passed_in_file_object = False
        elif is_writable_file_like(outfile):
            passed_in_file_object = True
        else:
            raise ValueError("outfile must be a path or a file-like object")

        # find the appropriate papertype
        width, height = self.figure.get_size_inches()
        if papertype == 'auto':
            papertype = _get_papertype(
                *orientation.swap_if_landscape((width, height)))
        paper_width, paper_height = orientation.swap_if_landscape(
            papersize[papertype])

        if mpl.rcParams['ps.usedistiller']:
            # distillers improperly clip eps files if pagesize is too small
            if width > paper_width or height > paper_height:
                papertype = _get_papertype(
                    *orientation.swap_if_landscape(width, height))
                paper_width, paper_height = orientation.swap_if_landscape(
                    papersize[papertype])

        # center the figure on the paper
        xo = 72 * 0.5 * (paper_width - width)
        yo = 72 * 0.5 * (paper_height - height)

        llx = xo
        lly = yo
        urx = llx + self.figure.bbox.width
        ury = lly + self.figure.bbox.height
        rotation = 0
        if orientation is _Orientation.landscape:
            llx, lly, urx, ury = lly, llx, ury, urx
            xo, yo = 72 * paper_height - yo, xo
            rotation = 90
        bbox = (llx, lly, urx, ury)

        if dryrun:

            class NullWriter:
                def write(self, *args, **kwargs):
                    pass

            self._pswriter = NullWriter()
        else:
            self._pswriter = StringIO()

        # mixed mode rendering
        ps_renderer = RendererPS(width, height, self._pswriter, imagedpi=dpi)
        renderer = MixedModeRenderer(self.figure,
                                     width,
                                     height,
                                     dpi,
                                     ps_renderer,
                                     bbox_inches_restore=bbox_inches_restore)

        self.figure.draw(renderer)

        if dryrun:  # return immediately if dryrun (tightbbox=True)
            return

        def print_figure_impl(fh):
            # write the PostScript headers
            if is_eps:
                print("%!PS-Adobe-3.0 EPSF-3.0", file=fh)
            else:
                print(
                    f"%!PS-Adobe-3.0\n"
                    f"%%DocumentPaperSizes: {papertype}\n"
                    f"%%Pages: 1\n",
                    end="",
                    file=fh)
            print(
                f"{dsc_comments}\n"
                f"%%Orientation: {orientation.name}\n"
                f"%%BoundingBox: {bbox[0]} {bbox[1]} {bbox[2]} {bbox[3]}\n"
                f"%%EndComments\n",
                end="",
                file=fh)

            Ndict = len(psDefs)
            print("%%BeginProlog", file=fh)
            if not mpl.rcParams['ps.useafm']:
                Ndict += len(ps_renderer._character_tracker.used)
            print("/mpldict %d dict def" % Ndict, file=fh)
            print("mpldict begin", file=fh)
            print("\n".join(psDefs), file=fh)
            if not mpl.rcParams['ps.useafm']:
                for font_path, chars \
                        in ps_renderer._character_tracker.used.items():
                    if not chars:
                        continue
                    font = get_font(font_path)
                    glyph_ids = [font.get_char_index(c) for c in chars]
                    fonttype = mpl.rcParams['ps.fonttype']
                    # Can't use more than 255 chars from a single Type 3 font.
                    if len(glyph_ids) > 255:
                        fonttype = 42
                    # The ttf to ps (subsetting) support doesn't work for
                    # OpenType fonts that are Postscript inside (like the STIX
                    # fonts).  This will simply turn that off to avoid errors.
                    if is_opentype_cff_font(font_path):
                        raise RuntimeError(
                            "OpenType CFF fonts can not be saved using "
                            "the internal Postscript backend at this "
                            "time; consider using the Cairo backend")
                    fh.flush()
                    try:
                        convert_ttf_to_ps(os.fsencode(font_path), fh, fonttype,
                                          glyph_ids)
                    except RuntimeError:
                        _log.warning("The PostScript backend does not "
                                     "currently support the selected font.")
                        raise
            print("end", file=fh)
            print("%%EndProlog", file=fh)

            if not is_eps:
                print("%%Page: 1 1", file=fh)
            print("mpldict begin", file=fh)

            print("%s translate" % _nums_to_str(xo, yo), file=fh)
            if rotation:
                print("%d rotate" % rotation, file=fh)
            print("%s clipbox" % _nums_to_str(width * 72, height * 72, 0, 0),
                  file=fh)

            # write the figure
            print(self._pswriter.getvalue(), file=fh)

            # write the trailer
            print("end", file=fh)
            print("showpage", file=fh)
            if not is_eps:
                print("%%EOF", file=fh)
            fh.flush()

        if mpl.rcParams['ps.usedistiller']:
            # We are going to use an external program to process the output.
            # Write to a temporary file.
            with TemporaryDirectory() as tmpdir:
                tmpfile = os.path.join(tmpdir, "tmp.ps")
                with open(tmpfile, 'w', encoding='latin-1') as fh:
                    print_figure_impl(fh)
                if mpl.rcParams['ps.usedistiller'] == 'ghostscript':
                    _try_distill(gs_distill,
                                 tmpfile,
                                 is_eps,
                                 ptype=papertype,
                                 bbox=bbox)
                elif mpl.rcParams['ps.usedistiller'] == 'xpdf':
                    _try_distill(xpdf_distill,
                                 tmpfile,
                                 is_eps,
                                 ptype=papertype,
                                 bbox=bbox)
                _move_path_to_path_or_stream(tmpfile, outfile)

        else:
            # Write directly to outfile.
            if passed_in_file_object:
                requires_unicode = file_requires_unicode(outfile)

                if not requires_unicode:
                    fh = TextIOWrapper(outfile, encoding="latin-1")
                    # Prevent the TextIOWrapper from closing the underlying
                    # file.
                    fh.close = lambda: None
                else:
                    fh = outfile

                print_figure_impl(fh)
            else:
                with open(outfile, 'w', encoding='latin-1') as fh:
                    print_figure_impl(fh)
Example #52
0
 def translate_newlines(self, mode, *text_args, **text_kwargs):
     wrapper = TextIOWrapper(self._io, *text_args, **text_kwargs)
     if mode:
         wrapper.mode = mode
     self.io = wrapper
     self._translate = True
Example #53
0
class FileObjectPosix(object):
    """
    A file-like object that operates on non-blocking files but
    provides a synchronous, cooperative interface.

    .. caution::
         This object is most effective wrapping files that can be used appropriately
         with :func:`select.select` such as sockets and pipes.

         In general, on most platforms, operations on regular files
         (e.g., ``open('/etc/hosts')``) are considered non-blocking
         already, even though they can take some time to complete as
         data is copied to the kernel and flushed to disk (this time
         is relatively bounded compared to sockets or pipes, though).
         A :func:`~os.read` or :func:`~os.write` call on such a file
         will still effectively block for some small period of time.
         Therefore, wrapping this class around a regular file is
         unlikely to make IO gevent-friendly: reading or writing large
         amounts of data could still block the event loop.

         If you'll be working with regular files and doing IO in large
         chunks, you may consider using
         :class:`~gevent.fileobject.FileObjectThread` or
         :func:`~gevent.os.tp_read` and :func:`~gevent.os.tp_write` to bypass this
         concern.

    .. note::
         Random read/write (e.g., ``mode='rwb'``) is not supported.
         For that, use :class:`io.BufferedRWPair` around two instance of this
         class.

    .. tip::
         Although this object provides a :meth:`fileno` method and
         so can itself be passed to :func:`fcntl.fcntl`, setting the
         :data:`os.O_NONBLOCK` flag will have no effect; however, removing
         that flag will cause this object to no longer be cooperative.

    .. versionchanged:: 1.1
       Now uses the :mod:`io` package internally. Under Python 2, previously
       used the undocumented class :class:`socket._fileobject`. This provides
       better file-like semantics (and portability to Python 3).
    """

    #: platform specific default for the *bufsize* parameter
    default_bufsize = io.DEFAULT_BUFFER_SIZE

    def __init__(self, fobj, mode='rb', bufsize=-1, close=True):
        """
        :keyword fobj: Either an integer fileno, or an object supporting the
            usual :meth:`socket.fileno` method. The file *will* be
            put in non-blocking mode using :func:`gevent.os.make_nonblocking`.
        :keyword str mode: The manner of access to the file, one of "rb", "rU" or "wb"
            (where the "b" or "U" can be omitted).
            If "U" is part of the mode, IO will be done on text, otherwise bytes.
        :keyword int bufsize: If given, the size of the buffer to use. The default
            value means to use a platform-specific default, and a value of 0 is translated
            to a value of 1. Other values are interpreted as for the :mod:`io` package.
            Buffering is ignored in text mode.
        """
        if isinstance(fobj, int):
            fileno = fobj
            fobj = None
        else:
            fileno = fobj.fileno()
        if not isinstance(fileno, int):
            raise TypeError('fileno must be int: %r' % fileno)

        orig_mode = mode
        mode = (mode or 'rb').replace('b', '')
        if 'U' in mode:
            self._translate = True
            mode = mode.replace('U', '')
        else:
            self._translate = False

        if len(mode) != 1 and mode not in 'rw': # pragma: no cover
            # Python 3 builtin `open` raises a ValueError for invalid modes;
            # Python 2 ignores it. In the past, we raised an AssertionError, if __debug__ was
            # enabled (which it usually was). Match Python 3 because it makes more sense
            # and because __debug__ may not be enabled.
            # NOTE: This is preventing a mode like 'rwb' for binary random access;
            # that code was never tested and was explicitly marked as "not used"
            raise ValueError('mode can only be [rb, rU, wb], not %r' % (orig_mode,))

        self._fobj = fobj
        self._closed = False
        self._close = close

        self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close)

        if bufsize < 0 or bufsize == 1:
            bufsize = self.default_bufsize
        elif bufsize == 0:
            bufsize = 1

        if mode == 'r':
            IOFamily = BufferedReader
        else:
            assert mode == 'w'
            IOFamily = BufferedWriter

        self.io = IOFamily(self.fileio, bufsize)
        #else: # QQQ: not used, not reachable
        #
        #    self.io = BufferedRandom(self.fileio, bufsize)

        if self._translate:
            self.io = TextIOWrapper(self.io)

    @property
    def closed(self):
        """True if the file is closed"""
        return self._closed

    def close(self):
        if self._closed:
            # make sure close() is only run once when called concurrently
            return
        self._closed = True
        try:
            self.io.close()
            self.fileio.close()
        finally:
            self._fobj = None

    def flush(self):
        self.io.flush()

    def fileno(self):
        return self.io.fileno()

    def write(self, data):
        self.io.write(data)

    def writelines(self, lines):
        self.io.writelines(lines)

    def read(self, size=-1):
        return self.io.read(size)

    def readline(self, size=-1):
        return self.io.readline(size)

    def readlines(self, sizehint=0):
        return self.io.readlines(sizehint)

    def readable(self):
        """
        .. versionadded:: 1.1b2
        """
        return self.io.readable()

    def writable(self):
        """
        .. versionadded:: 1.1b2
        """
        return self.io.writable()

    def seek(self, *args, **kwargs):
        return self.io.seek(*args, **kwargs)

    def seekable(self):
        return self.io.seekable()

    def tell(self):
        return self.io.tell()

    def truncate(self, size=None):
        return self.io.truncate(size)

    def __iter__(self):
        return self.io

    def __getattr__(self, name):
        # XXX: Should this really be _fobj, or self.io?
        # _fobj can easily be None but io never is
        return getattr(self._fobj, name)
Example #54
0
def _wrap_input(inp):
    if isinstance(inp, BufferedWriter):
        inp = TextIOWrapper(inp, locale.getpreferredencoding())
    return inp
Example #55
0
def _load_contents(bucket: str, key: str) -> TextIOWrapper:
    response = S3_CLIENT.get_object(Bucket=bucket, Key=key)
    gzipped = GzipFile(None, 'rb', fileobj=response['Body'])
    return TextIOWrapper(gzipped)  # type: ignore
Example #56
0
 def lines(self, name):
     with ZipFile(self.filename) as zf:
         with zf.open(name) as f:
             for line in TextIOWrapper(f):
                 yield line
Example #57
0
class LocalGtpBot:
    def __init__(self, agent, termination=None, handicap=0, opponent='gnugo', output_sgf="out.sgf",
                 our_color='w'):
        # initialize a bot from an agent and a termination strategy
        self._agent = TerminationAgent(agent, termination)
        self._handicap = handicap
        # _play until the game is stopped by one of the _players
        self._stopped = False
        self._game_state = GameState.new_game(19)
        # at the end we write the the game to the provided file in SGF forma
        self._sgf = SGFWriter(output_sgf)
        self._our_color = Player.black if our_color == 'b' else Player.white
        self._their_color = self._our_color.other
        # opponent will either be GNU Go or Pachi
        cmd = self.opponent_cmd(opponent)
        pipe = subprocess.PIPE
        # read and write GTP commands from the command line
        self._proc = Popen(cmd, stdin=PIPE, stdout=PIPE)
        self._stdin = TextIOWrapper(self._proc.stdin, encoding='utf-8', line_buffering=True)
        self._stdout = TextIOWrapper(self._proc.stdout, encoding='utf-8')

    @staticmethod
    def opponent_cmd(opponent):
        if opponent == 'gnugo':
            return ["gnugo", "--mode", "gtp"]
        elif opponent == 'pachi':
            return ["pachi"]
        else:
            raise ValueError("Unknown bot name {}".format(opponent))

    def _send_command(self, cmd):
        self._stdin.write(cmd)

    def _get_response(self):
        succeeded = False
        result = ''
        while not succeeded:
            line = self._stdout.readline()
            if line[0] == '=':
                succeeded = True
                line = line.strip()
                result = re.sub('^= ?', '', line)
        return result

    def _command_and_response(self, cmd):
        self._send_command(cmd)
        resp = self._get_response()
        return resp

    def _set_handicap(self):
        if self._handicap == 0:
            self._command_and_response("komi 7.5\n")
            self._sgf.append("KM[7.5]\n")
        else:
            stones = self._command_and_response("fixed_handicap {}\n".format(self._handicap))
            sgf_handicap = "HA[{}]AB".format(self._handicap)
            for pos in stones.split(" "):
                move = Move(gtp_position_to_coords(pos))
                self._game_state = self._game_state.apply_move(move)
                sgf_handicap = sgf_handicap + "[" + self._sgf.coordinates(move) + "]"
            self._sgf.append(sgf_handicap + "\n")

    def _play(self):
        while not self._stopped:
            if self._game_state.next_player == self._our_color:
                self._play_our_move()
            else:
                self._play_their_move()
            print(chr(27) + "[2J")
            print_board(self._game_state.board)
            print("Estimated result: ")
            print(compute_game_result(self._game_state))

    def _play_our_move(self):
        move = self._agent.select_move(self._game_state)
        self._game_state = self._game_state.apply_move(move)

        our_name = self._our_color.name
        our_letter = our_name[0].upper()
        sgf_move = ""
        if move.is_pass:
            self._command_and_response("play {} pass\n".format(our_name))
        elif move.is_resign:
            self._command_and_response("play {} resign\n".format(our_name))
        else:
            pos = coords_to_gtp_position(move)
            self._command_and_response("play {} {}\n".format(our_name, pos))
            sgf_move = self._sgf.coordinates(move)
        self._sgf.append(";{}[{}]\n".format(our_letter, sgf_move))

    def _play_their_move(self):
        their_name = self._their_color.name
        their_letter = their_name[0].upper()

        pos = self._command_and_response("genmove {}\n".format(their_name))
        if pos.lower() == 'resign':
            self._game_state = self._game_state.apply_move(Move.resign())
            self._stopped = True
        elif pos.lower() == 'pass':
            self._game_state = self._game_state.apply_move(Move.pass_turn())
            self._sgf.append(";{}[]\n".format(their_letter))
            if self._game_state.last_move.is_pass:
                self._stopped = True
        else:
            move = Move(gtp_position_to_coords(pos))
            self._game_state = self._game_state.apply_move(move)
            self._sgf.append(";{}[{}]\n".format(their_letter, self._sgf.coordinates(move)))

    def run(self):
        self._command_and_response("boardsize 19\n")
        self._set_handicap()
        self._play()
        self._sgf.write_sgf()
Example #58
0
    print("Connexion à Discord...")
    bot.uptime = datetime.datetime.utcnow()

    if bot.settings.login_credentials:
        yield from bot.login(*bot.settings.login_credentials,
                             bot=not bot.settings.self_bot)
    else:
        print("Aucune clef renseignée pour la connexion n'est disponible.")
        raise RuntimeError()
    yield from bot.connect()


if __name__ == '__main__':
    sys.stdout = TextIOWrapper(sys.stdout.detach(),
                               encoding=sys.stdout.encoding,
                               errors="replace",
                               line_buffering=True)
    bot = initialize()
    loop = asyncio.get_event_loop()
    try:
        loop.run_until_complete(main(bot))
    except discord.LoginFailure:
        bot.logger.error(traceback.format_exc())
        if not bot.settings.no_prompt:
            choice = input(
                "Les clefs de connexion sont invalide... Essayez de redémarrer ce script. "
                "Si ça ne fonctionne toujours pas, tapez 'reset' :")
            if choice.lower().strip() == "reset":
                bot.settings.token = None
                bot.settings.email = None
                bot.settings.password = None
Example #59
0
 def data(self):
     if self._data is None:
         buf = BytesIO(self.storage.load(self._path))
         with TextIOWrapper(buf) as data:
             self._data = signature.load_one_signature(data)
     return self._data
Example #60
-1
 def _buf_fixture(self):
     # try to simulate how sys.stdout looks - we send it u''
     # but then it's trying to encode to something.
     buf = BytesIO()
     wrapper = TextIOWrapper(buf, encoding='ascii', line_buffering=True)
     wrapper.getvalue = buf.getvalue
     return wrapper