Ejemplo n.º 1
0
    def _get_single_df(
        stream: IO, filetype: Optional[TypeEnum], **kwargs
    ) -> Union[pd.DataFrame, Iterable[pd.DataFrame]]:
        """
        Read a stream and retrieve the data frame or data frame generator (chunks)
        It uses `stream.name`, which is the path to a local file (often temporary)
        to avoid closing it. It will be closed at the end of the method.
        """
        if filetype is None:
            filetype = TypeEnum(detect_type(stream.name))

        # Check encoding
        encoding = kwargs.get('encoding')
        if not validate_encoding(stream.name, encoding):
            encoding = detect_encoding(stream.name)
        kwargs['encoding'] = encoding

        # Check separator for CSV files if it's not set
        if filetype is TypeEnum.CSV and 'sep' not in kwargs:
            if not validate_sep(stream.name, encoding=encoding):
                kwargs['sep'] = detect_sep(stream.name, encoding)

        pd_read = getattr(pd, f'read_{filetype}')
        try:
            df = pd_read(stream.name, **kwargs)
        finally:
            stream.close()

        # In case of sheets, the df can be a dictionary
        if kwargs.get('sheet_name', NOTSET) is None:
            for sheet_name, _df in df.items():
                _df['__sheet__'] = sheet_name
            df = pd.concat(df.values(), sort=False)

        return df
Ejemplo n.º 2
0
def display(results: Results, json_file: IO):
    if json_file:
        # Write to the file
        json.dump(
            {
                "successful_requests": results.successful_requests(),
                "slowest": results.slowest(),
                "fastest": results.fastest(),
                "total_time": results.total_time,
                "Requests Per Minute": results.requests_per_minute(),
                "Requests Per Second": results.requests_per_second(),
            },
            json_file,
        )
        json_file.close()
        print("... Done!")
    else:
        # Print to Screen
        print("... Done!")
        print("--- Results ---")
        print(f"Successful requests\t{results.successful_requests()}")
        print(f"Slowest            \t{results.slowest()}")
        print(f"Fastest            \t{results.fastest()}")
        print(f"Average            \t{results.average_time()}")
        print(f"Total Time         \t{results.total_time}")
        print(f"Requests Per Minute\t{results.requests_per_minute()}")
        print(f"Requests Per Second\t{results.requests_per_second()}")
Ejemplo n.º 3
0
Archivo: com.py Proyecto: fmohr/SEDE
def out_write_bytes(wfile: IO, payload, close: bool = True) -> None:
    if not is_bytes(payload):
        raise ValueError("Unexpected type: " + str(payload.__class__) +
                         ". Expected bytes-like.")
    wfile.write(payload)
    if close:
        wfile.close()
Ejemplo n.º 4
0
def IJsonIterator(buffer: IO) -> Iterator[Any]:
    """Takes a file-like object with a json array, and yields elements from that array.
    Provided buffer will be automatically closed."""
    try:
        yield from ijson.items(buffer, "item", use_float=True)  # type: ignore
    finally:
        buffer.close()
Ejemplo n.º 5
0
    def write(self,
              evaluation_results: EvaluationResults,
              buffer: typing.IO = None,
              include_specification: bool = None,
              **kwargs):
        if self.destination is None and buffer is None:
            raise ValueError(
                f"A buffer must be passed in if no destination is declared")

        data_to_write: typing.Dict[
            str, typing.Hashable] = self._results_to_dictionary(
                evaluation_results, include_specification)

        indent = kwargs.get("indent", 4)

        buffer_was_created_here = buffer is None

        try:
            if buffer is None:
                buffer = open(self.destination, 'w')

            json.dump(data_to_write, buffer, indent=indent)
        finally:
            if buffer_was_created_here and buffer is not None:
                buffer.close()
Ejemplo n.º 6
0
def unsmarten(in_stream: IO) -> str:
    """Actual conversion function"""
    in_text = in_stream.readlines()
    in_stream.close()
    out_text = ''
    for line in in_text:
        out_text += unsmarten_line(line)
    return out_text
Ejemplo n.º 7
0
 def _load_config(self, config_file: IO) -> Config:
     """Load the application configuration."""
     try:
         config = load_config(config_file, self.logger)
     except (InvalidMetricType, ConfigError) as error:
         raise ErrorExitMessage(str(error))
     finally:
         config_file.close()
     return config
Ejemplo n.º 8
0
 def svcErrorReader(self, err: IO, queue, logDir: str):
     os.makedirs(logDir, exist_ok=True)
     logFile = os.path.join(logDir,'stderr.log')
     fErr = open(logFile, 'wb')
     for line in iter(err.readline, b''):
         fErr.write(line)
         Logging.info("TDengine STDERR: {}".format(line))
     Logging.info("EOF for TDengine STDERR: {}".format(self))
     err.close()
     fErr.close()
Ejemplo n.º 9
0
Archivo: com.py Proyecto: fmohr/SEDE
def in_read(rfile: IO, content_length=None, close: bool = True):
    if content_length is None:
        content = rfile.read()
    else:
        content = rfile.read(content_length)
        logging.trace("Finished reading %d bytes from input stream. ",
                      content_length)
    if close:
        rfile.close()
    return content
Ejemplo n.º 10
0
    def fromINI(projectId: int,
                fileObj: IO,
                metadata: Dict,
                original_path: str = None) -> TileServer:
        """

        :param projectId: int
        :param fileObj: file descriptor
        :param metadata: Dict of <key, val> pairs
        :param original_path: str path of original file location
        :return: Feature
        """
        config = configparser.ConfigParser(allow_no_value=True)
        config.read_string(fileObj.read().decode('utf-8'))

        tile_server_data = {}
        tile_server_data['tileOptions'] = {}
        tile_server_data['uiOptions'] = {}

        general_config = config['general']

        tile_server_data['name'] = general_config.get('id', '')
        tile_server_data['type'] = general_config.get('type', '').lower()

        if (config.has_section('license')):
            attribution = ''
            for key in config['license']:
                attribution += config['license'].get(key, '')
            tile_server_data['attribution'] = attribution
        else:
            tile_server_data['attribution'] = ''

        if (tile_server_data['type'] == 'tms'):
            tms_config = config['tms']
            tile_server_data['url'] = tms_config.get('url', fallback='')
            tile_server_data['tileOptions']['maxZoom'] = tms_config.getint(
                'zmax', fallback=19)
            tile_server_data['tileOptions']['minZoom'] = tms_config.getint(
                'zmin', fallback=0)
        elif (tile_server_data['type'] == 'wms'):
            wms_config = config['wms']
            tile_server_data['url'] = wms_config.get('url', fallback='')
            tile_server_data['tileOptions']['layers'] = wms_config.get(
                'layers', fallback='')
            tile_server_data['tileOptions']['params'] = wms_config.get(
                'params', fallback='')
            tile_server_data['tileOptions']['format'] = wms_config.get(
                'format', fallback='')

        tile_server_data['uiOptions']['isActive'] = True
        tile_server_data['uiOptions']['opacity'] = 1

        fileObj.close()

        return FeaturesService.addTileServer(projectId, tile_server_data)
Ejemplo n.º 11
0
def _save_chain(chain_pem: bytes, chain_file: IO) -> None:
    """Saves chain_pem at a unique path based on chain_path.

    :param bytes chain_pem: certificate chain in PEM format
    :param str chain_file: chain file object

    """
    try:
        chain_file.write(chain_pem)
    finally:
        chain_file.close()
Ejemplo n.º 12
0
def stream(reader: IO, writer: IO, chunksize=1024, stoplen=0, close=False):
    """Low-level utility function to stream all of `reader`'s contents into `writer` chunk-by-chunk."""
    while True:
        data = reader.read(chunksize)  # Read a lil

        if len(data) <= stoplen:  # End of stream
            break

        writer.write(data)  # Write a lil

    if close:
        reader.close()
        writer.close()
Ejemplo n.º 13
0
def try_finally_io_close(f: IO):
    try:
        if f is not None:
            fio: IO = f
            logger.info("try close {0}".format(fio.name))
            f.close()
            logger.info("closed {0}".format(fio.name))
        elif f is None:
            pass
        else:
            logger.warning('Not IO ')
    except BaseException as ex:
        logger.error(" cannot close {0} : {1}".format(f, ex))
        logger.error(ex, exc_info=True)
Ejemplo n.º 14
0
    def embed_file(self,
                   input_file: IO,
                   output_file_path: str,
                   output_format: str = "all",
                   batch_size: int = DEFAULT_BATCH_SIZE) -> None:
        """
        Computes ELMo embeddings from an input_file where each line contains a sentence tokenized by whitespace.
        The ELMo embeddings are written out in HDF5 format, where each sentences is saved in a dataset.

        Parameters
        ----------
        input_file : ``IO``, required
            A file with one tokenized sentence per line.
        output_file_path : ``str``, required
            A path to the output hdf5 file.
        output_format : ``str``, optional, (default = "all")
            The embeddings to output.  Must be one of "all", "top", or "average".
        batch_size : ``int``, optional, (default = 64)
            The number of sentences to process in ELMo at one time.
        """

        assert output_format in ["all", "top", "average"]

        # Tokenizes the sentences.
        sentences = [line.strip() for line in input_file if line.strip()]
        split_sentences = [sentence.split() for sentence in sentences]
        # Uses the sentence as the key.
        embedded_sentences = zip(
            sentences, self.embed_sentences(split_sentences, batch_size))

        logger.info("Processing sentences.")
        with h5py.File(output_file_path, 'w') as fout:
            for key, embeddings in Tqdm.tqdm(embedded_sentences):
                if key in fout.keys():
                    logger.warning(
                        f"Key already exists in {output_file_path}, skipping: {key}"
                    )
                else:
                    if output_format == "all":
                        output = embeddings
                    elif output_format == "top":
                        output = embeddings[2]
                    elif output_format == "average":
                        output = numpy.average(embeddings, axis=0)

                    fout.create_dataset(key,
                                        output.shape,
                                        dtype='float32',
                                        data=output)
        input_file.close()
Ejemplo n.º 15
0
def close_python_file(python_file: IO):
    """
        Closes the file, closing first the object inside it.

        Args:
            python_file {IO}:
    """
    python_file.write("}")

    # close file
    python_file.close()

    # write update to console
    print("\noui.py updated")
Ejemplo n.º 16
0
    def fromImage(projectId: int,
                  fileObj: IO,
                  metadata: Dict,
                  original_path: str = None) -> Feature:
        """
        Create a Point feature from a georeferenced image
        :param projectId: int
        :param fileObj: file
        :param metadata: dict
        :return: None
        """
        imdata = ImageService.processImage(fileObj)
        point = Point(imdata.coordinates)
        f = Feature()
        f.project_id = projectId
        f.the_geom = from_shape(point, srid=4326)
        f.properties = metadata

        asset_uuid = uuid.uuid4()
        base_filepath = make_project_asset_dir(projectId)
        asset_path = os.path.join(base_filepath, str(asset_uuid) + '.jpeg')

        fa = FeatureAsset(
            uuid=asset_uuid,
            asset_type="image",
            original_path=original_path,
            display_path=original_path,
            path=get_asset_relative_path(asset_path),
            feature=f,
        )
        f.assets.append(fa)
        thumbnail_path = os.path.join(base_filepath,
                                      str(asset_uuid) + ".thumb.jpeg")
        resized_image_path = os.path.join(base_filepath,
                                          str(asset_uuid) + '.jpeg')
        try:
            imdata.thumb.save(thumbnail_path, "JPEG")
            imdata.resized.save(resized_image_path, "JPEG")
        except:
            if os.path.exists(thumbnail_path):
                os.remove(thumbnail_path)
            if os.path.exists(resized_image_path):
                os.remove(resized_image_path)
            raise
        finally:
            fileObj.close()
        db_session.add(f)
        db_session.commit()
        return f
Ejemplo n.º 17
0
    def check_stream(
            stream: typing.IO,
            check_seekable: typing.Optional[bool] = None,
            check_position: typing.Optional[int] = None,
            reset_position: typing.Optional[bool] = False,
            check_content: typing.Optional[typing.AnyStr] = None,
            check_closeable: typing.Optional[bool] = True) -> typing.NoReturn:
        # check input is not None
        assert stream is not None, "provided `stream` is None"

        # check input is a stream
        assert hasattr(stream, "seekable"), \
            "provided `stream` does not appear file-like"

        # check_seekable
        if check_seekable is not None:
            assert stream.seekable() == check_seekable, \
                "value of stream.seekable() is not as expected"

        # check_position
        if check_position is not None:
            if hasattr(stream, "tell"):
                assert stream.tell(
                ) == check_position, "position not as expected"
            elif hasattr(stream, "seek"):
                # stream.seek(0, 1) is an alternate to stream.tell()
                assert stream.seek(
                    0, 1) == check_position, "position not as expected"

        # reset_position
        if reset_position is not None and reset_position:
            # the stream.seek(0) will fail if these don't pass
            assert hasattr(stream, "seekable")
            assert stream.seekable()

            # reset the stream's position
            stream.seek(0)

        # check_content
        if check_content is not None and check_content:
            content = stream.read()
            assert content == check_content, "stream content is not as expected"

        # check_closeable
        if check_closeable is not None and check_closeable:
            assert hasattr(stream, "closed") and not stream.closed
            assert hasattr(stream, "close")
            stream.close()
            assert stream.closed, "cannot close stream"
Ejemplo n.º 18
0
def sort_by_column(file: IO,
                   index_args: Tuple[str],
                   with_header: bool = False) -> None:
    """Reads a CSV file, sorts it by a given column, prints the sorted rows"""

    reader = csv.reader(file)

    writer = csv.writer(sys.stdout)
    if with_header:
        writer.writerow(next(reader))

    for row in sorted(reader, key=lambda x: sort_multiple(x, index_args)):
        writer.writerow(row)

    file.close()
Ejemplo n.º 19
0
    def fromGeoJSON(projectId: int,
                    fileObj: IO,
                    metadata: Dict,
                    original_path: str = None) -> List[Feature]:
        """

        :param projectId: int
        :param fileObj: file descriptor
        :param metadata: Dict of <key, val> pairs
        :param original_path: str path of original file location
        :return: Feature
        """
        data = json.loads(fileObj.read())
        fileObj.close()
        return FeaturesService.addGeoJSON(projectId, data)
Ejemplo n.º 20
0
    def embed_file(self,
                   input_file: IO,
                   output_file_path: str,
                   output_format: str = "all",
                   batch_size: int = DEFAULT_BATCH_SIZE) -> None:
        """
        Computes ELMo embeddings from an input_file where each line contains a sentence tokenized by whitespace.
        The ELMo embeddings are written out in HDF5 format, where each sentences is saved in a dataset.

        Parameters
        ----------
        input_file : ``IO``, required
            A file with one tokenized sentence per line.
        output_file_path : ``str``, required
            A path to the output hdf5 file.
        output_format : ``str``, optional, (default = "all")
            The embeddings to output.  Must be one of "all", "top", or "average".
        batch_size : ``int``, optional, (default = 64)
            The number of sentences to process in ELMo at one time.
        """

        assert output_format in ["all", "top", "average"]

        # Tokenizes the sentences.
        sentences = [line.strip() for line in input_file if line.strip()]
        split_sentences = [sentence.split() for sentence in sentences]
        # Uses the sentence as the key.
        embedded_sentences = zip(sentences, self.embed_sentences(split_sentences, batch_size))

        logger.info("Processing sentences.")
        with h5py.File(output_file_path, 'w') as fout:
            for key, embeddings in Tqdm.tqdm(embedded_sentences):
                if key in fout.keys():
                    logger.warning(f"Key already exists in {output_file_path}, skipping: {key}")
                else:
                    if output_format == "all":
                        output = embeddings
                    elif output_format == "top":
                        output = embeddings[2]
                    elif output_format == "average":
                        output = numpy.average(embeddings, axis=0)

                    fout.create_dataset(
                            key,
                            output.shape, dtype='float32',
                            data=output
                    )
        input_file.close()
def write_file(archive: tarfile.TarFile, file: IO, name: str,
               dataset_dir: Union[Path, str]):
    if isinstance(dataset_dir, str):
        dataset_dir = Path(dataset_dir)

        assert dataset_dir.is_dir()

    inter_dir, short_file_name = get_stored_file_name(name)

    file_path = dataset_dir / inter_dir / short_file_name

    with file_path.open("wb") as out_file:
        out_file.write(file.read())

    file.close()
    archive.close()
Ejemplo n.º 22
0
def func(combinations: typing.Iterable[typing.Tuple[utils.models.Student,
                                                    utils.models.Student]],
         pathfunc: typing.Callable[[utils.models.Student], pathlib.Path],
         record_file: typing.IO,
         cutoff: float = -1):
    codes = []
    max_sim = {}
    for s1, s2 in combinations:
        try:
            d, r = check_diff(pathfunc(s1), pathfunc(s2), False)
            if r > cutoff:
                for code in codes:
                    if s1 in code:
                        code.add(s2)
                        break
                else:
                    codes.append({s1, s2})
                print(s1, s2, end=' ')
                print(d, round(r, 2))
                record_file.write(f'{s1} {s2} {d} {round(r, 2)}\n')
                if s1 in max_sim:
                    if max_sim[s1][1] < r:
                        max_sim[s1][0] = s2
                        max_sim[s1][1] = r
                else:
                    max_sim[s1] = [s2, r]
                if s2 in max_sim:
                    if max_sim[s2][1] < r:
                        max_sim[s2][0] = s1
                        max_sim[s2][1] = r
                else:
                    max_sim[s2] = [s1, r]
        except FileNotFoundError as e:
            print(e)
            pass
    print()
    record_file.write('\n')
    for code in codes:
        print([s for s in code])
        record_file.write(f'{[s for s in code]}\n')
    print()
    record_file.write('\n')
    for s1, v in max_sim.items():
        s2, r = v
        print(s1, s2, f'{r:.2f}')
        record_file.write(f'{s1} {s2} {round(r, 2)}\n')
    record_file.close()
Ejemplo n.º 23
0
    def svcOutputReader(self, out: IO, queue, logDir: str):
        '''
        The infinite routine that processes the STDOUT stream for the sub process being managed.

        :param out: the IO stream object used to fetch the data from
        :param queue: the queue where we dump the roughly parsed line-by-line data
        :param logDir: where we should dump a verbatim output file
        '''
        os.makedirs(logDir, exist_ok=True)
        logFile = os.path.join(logDir,'stdout.log')
        fOut = open(logFile, 'wb')
        # Important Reference: https://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python
        # print("This is the svcOutput Reader...")
        # for line in out :
        for line in iter(out.readline, b''):
            fOut.write(line)
            # print("Finished reading a line: {}".format(line))
            # print("Adding item to queue...")
            try:
                line = line.decode("utf-8").rstrip()
            except UnicodeError:
                print("\nNon-UTF8 server output: {}\n".format(line))

            # This might block, and then causing "out" buffer to block
            queue.put(line)
            self._printProgress("_i")

            if self._status.isStarting():  # we are starting, let's see if we have started
                if line.find(self.TD_READY_MSG) != -1:  # found
                    Logging.info("Waiting for the service to become FULLY READY")
                    time.sleep(1.0) # wait for the server to truly start. TODO: remove this
                    Logging.info("Service is now FULLY READY") # TODO: more ID info here?
                    self._status.set(Status.STATUS_RUNNING)

            # Trim the queue if necessary: TODO: try this 1 out of 10 times
            self._trimQueue(self.MAX_QUEUE_SIZE * 9 // 10)  # trim to 90% size

            if self._status.isStopping():  # TODO: use thread status instead
                # WAITING for stopping sub process to finish its outptu
                print("_w", end="", flush=True)

            # queue.put(line)
        # meaning sub process must have died
        Logging.info("EOF for TDengine STDOUT: {}".format(self))
        out.close() # Close the stream
        fOut.close() # Close the output file
Ejemplo n.º 24
0
    def _log_queue_worker(stream: IO, line_queue: queue.Queue) -> None:
        """
        Worker function to run in a seprate thread.
        Reads from 'stream', puts lines in a Queue (Queue is thread-safe).
        """

        while True:
            # readline() is a blocking operation.
            # decode to push a string in the queue instead of 8-bit bytes.
            log_line = stream.readline().decode("utf-8")
            line_queue.put(log_line)

            if len(log_line) == 0:
                # This is the end of the stream meaning the server process
                # has exited.
                stream.close()
                break
Ejemplo n.º 25
0
    def _scan_dynamodb_and_upload_to_s3(self, temp_file: IO, scan_kwargs: dict, table: Any) -> IO:
        while True:
            response = table.scan(**scan_kwargs)
            items = response['Items']
            for item in items:
                temp_file.write(self.process_func(item))

            if 'LastEvaluatedKey' not in response:
                # no more items to scan
                break

            last_evaluated_key = response['LastEvaluatedKey']
            scan_kwargs['ExclusiveStartKey'] = last_evaluated_key

            # Upload the file to S3 if reach file size limit
            if getsize(temp_file.name) >= self.file_size:
                _upload_file_to_s3(temp_file, self.s3_bucket_name, self.s3_key_prefix)
                temp_file.close()
                temp_file = NamedTemporaryFile()
        return temp_file
Ejemplo n.º 26
0
def _to_textio(fp: IO, mode: str, read_codec: str) -> TextIO:
    if 'b' in mode:
        fp = cast(TextIO, fp)
        # TODO: FIx me
        fp.decoder = read_codec
        fp.native_reader = fp.read
        fp.read = lambda *args: _auto_decode(fp, *args)
    if getattr(fp, 'native_closer', None):
        fp.native_closer = fp.close
        fp.close = lambda *a: _wrapped_close(fp)
    return fp
Ejemplo n.º 27
0
def convert_to_zipfile_object(fileobj: IO):
    is_zipfile = zipfile.is_zipfile(fileobj)
    fileobj.seek(0)
    if is_zipfile:
        return fileobj
    if hasattr(fileobj, "name"):
        named_fileobj = fileobj
    else:
        named_fileobj = tempfile.NamedTemporaryFile(suffix='.zip')
        named_fileobj.write(fileobj.read())
        fileobj.close()
        named_fileobj.seek(0)
    tmp_file = tempfile.NamedTemporaryFile(suffix='.zip')
    with zipfile.ZipFile(tmp_file.name, 'w',
                         compression=zipfile.ZIP_DEFLATED) as new_zip:
        new_zip.write(named_fileobj.name,
                      arcname=Path(named_fileobj.name).name)
    tmp_file.seek(0)
    named_fileobj.close()
    return tmp_file
Ejemplo n.º 28
0
    def write(self,
              evaluation_results: specification.EvaluationResults,
              buffer: typing.IO = None,
              **kwargs):
        if self.destination is None and buffer is None:
            raise ValueError(
                f"A buffer must be passed in if no destination is declared")

        converted_output = self._to_xarray(evaluation_results)
        responsible_for_buffer = buffer is None

        try:
            if responsible_for_buffer:
                buffer = open(self.destination, 'wb')

            raw_netcdf = converted_output.to_netcdf()

            buffer.write(raw_netcdf)
        finally:
            if responsible_for_buffer and buffer is not None and not buffer.closed:
                buffer.close()
Ejemplo n.º 29
0
    def load(
        self, fp: IO = None, serialization: SerializationFormat = None
    ) -> None:
        if serialization is None:
            serialization = self._serialization_format

        if fp is None:
            close_fp_before_return = True
            try:
                fp = open(self._file)
            except (FileNotFoundError, TypeError) as e:
                raise ConfigFileException(e)
        else:
            close_fp_before_return = False

        s = fp.read()
        if close_fp_before_return:
            fp.close()

        self.loads(s, serialization)
        return
Ejemplo n.º 30
0
    def svcOutputReader(self, out: IO, queue):
        # Important Reference: https://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python
        # print("This is the svcOutput Reader...")
        # for line in out :
        for line in iter(out.readline, b''):
            # print("Finished reading a line: {}".format(line))
            # print("Adding item to queue...")
            try:
                line = line.decode("utf-8").rstrip()
            except UnicodeError:
                print("\nNon-UTF8 server output: {}\n".format(line))

            # This might block, and then causing "out" buffer to block
            queue.put(line)
            self._printProgress("_i")

            if self._status.isStarting(
            ):  # we are starting, let's see if we have started
                if line.find(self.TD_READY_MSG) != -1:  # found
                    Logging.info(
                        "Waiting for the service to become FULLY READY")
                    time.sleep(
                        1.0
                    )  # wait for the server to truly start. TODO: remove this
                    Logging.info("Service is now FULLY READY"
                                 )  # TODO: more ID info here?
                    self._status.set(Status.STATUS_RUNNING)

            # Trim the queue if necessary: TODO: try this 1 out of 10 times
            self._trimQueue(self.MAX_QUEUE_SIZE * 9 // 10)  # trim to 90% size

            if self._status.isStopping():  # TODO: use thread status instead
                # WAITING for stopping sub process to finish its outptu
                print("_w", end="", flush=True)

            # queue.put(line)
        # meaning sub process must have died
        Logging.info("EOF for TDengine STDOUT: {}".format(self))
        out.close()
Ejemplo n.º 31
0
    def _cleanup_stream(self, camera: Camera, server_socket: socket.socket,
                        client: IO):
        if client:
            try:
                client.close()
            except Exception as e:
                self.logger.warning('Error on client socket close: {}'.format(
                    str(e)))

        try:
            server_socket.close()
        except Exception as e:
            self.logger.warning('Error on server socket close: {}'.format(
                str(e)))

        if camera.stream:
            try:
                camera.stream.close()
            except Exception as e:
                self.logger.warning(
                    'Error while closing the encoding stream: {}'.format(
                        str(e)))
Ejemplo n.º 32
0
    def dump(
        self, fp: IO = None, serialization: SerializationFormat = None
    ) -> int:
        if serialization is None:
            serialization = self._serialization_format

        s = self.dumps(serialization)

        if fp is None:
            close_fp_before_return = True
            try:
                fp = open(self._file, 'w')
            except FileNotFoundError as e:
                raise ConfigFileException(e)

        else:
            close_fp_before_return = False

        ret = fp.write(s)

        if close_fp_before_return:
            fp.close()
        return ret
Ejemplo n.º 33
0
    def embed_file(self,
                   input_file: IO,
                   output_file_path: str,
                   output_format: str = "all",
                   batch_size: int = DEFAULT_BATCH_SIZE,
                   forget_sentences: bool = False,
                   use_sentence_keys: bool = False) -> None:
        """
        Computes ELMo embeddings from an input_file where each line contains a sentence tokenized by whitespace.
        The ELMo embeddings are written out in HDF5 format, where each sentence embedding
        is saved in a dataset with the line number in the original file as the key.

        Parameters
        ----------
        input_file : ``IO``, required
            A file with one tokenized sentence per line.
        output_file_path : ``str``, required
            A path to the output hdf5 file.
        output_format : ``str``, optional, (default = "all")
            The embeddings to output.  Must be one of "all", "top", or "average".
        batch_size : ``int``, optional, (default = 64)
            The number of sentences to process in ELMo at one time.
        forget_sentences : ``bool``, optional, (default = False).
            If use_sentence_keys is False, whether or not to include a string
            serialized JSON dictionary that associates sentences with their
            line number (its HDF5 key). The mapping is placed in the
            "sentence_to_index" HDF5 key. This is useful if
            you want to use the embeddings without keeping the original file
            of sentences around.
        use_sentence_keys : ``bool``, optional, (default = False).
            Whether or not to use full sentences as keys. By default,
            the line numbers of the input file are used as ids, which is more robust.
        """

        assert output_format in ["all", "top", "average"]

        # Tokenizes the sentences.
        sentences = [line.strip() for line in input_file]

        blank_lines = [i for (i, line) in enumerate(sentences) if line == ""]
        if blank_lines:
            raise ConfigurationError(f"Your input file contains empty lines at indexes "
                                     f"{blank_lines}. Please remove them.")
        split_sentences = [sentence.split() for sentence in sentences]
        # Uses the sentence index as the key.

        if use_sentence_keys:
            logger.warning("Using sentences as keys can fail if sentences "
                           "contain forward slashes or colons. Use with caution.")
            embedded_sentences = zip(sentences, self.embed_sentences(split_sentences, batch_size))
        else:
            embedded_sentences = ((str(i), x) for i, x in
                                  enumerate(self.embed_sentences(split_sentences, batch_size)))

        sentence_to_index = {}
        logger.info("Processing sentences.")
        with h5py.File(output_file_path, 'w') as fout:
            for key, embeddings in Tqdm.tqdm(embedded_sentences):
                if use_sentence_keys and key in fout.keys():
                    raise ConfigurationError(f"Key already exists in {output_file_path}. "
                                             f"To encode duplicate sentences, do not pass "
                                             f"the --use-sentence-keys flag.")

                if not forget_sentences and not use_sentence_keys:
                    sentence = sentences[int(key)]
                    sentence_to_index[sentence] = key

                if output_format == "all":
                    output = embeddings
                elif output_format == "top":
                    output = embeddings[-1]
                elif output_format == "average":
                    output = numpy.average(embeddings, axis=0)

                fout.create_dataset(
                        str(key),
                        output.shape, dtype='float32',
                        data=output
                )
            if not forget_sentences and not use_sentence_keys:
                sentence_index_dataset = fout.create_dataset(
                        "sentence_to_index",
                        (1,),
                        dtype=h5py.special_dtype(vlen=str))
                sentence_index_dataset[0] = json.dumps(sentence_to_index)

        input_file.close()