def test_configuration_manager_read(self):

        configuration_manager = ConfigurationManager()

        value = configuration_manager.get_value("core", "location")
        self.assertNotEqual(value, None)

        value = configuration_manager.get_value("invalid", "")
        self.assertEqual(value, None)

        value = configuration_manager.get_value("core", "invalid")
        self.assertEqual(value, None)
Beispiel #2
0
    def __init__(self):
        """Initializes the engine and session for database operations

        Retrieves the database uri for connection from ConfigurationManager.
        """
        uri = ConfigurationManager().get_value("core",
                                               "sqlalchemy_database_uri")
        # set echo=True to log SQL
        self.engine = create_engine(uri)
        # statements
        self.session = scoped_session(sessionmaker(bind=self.engine))
Beispiel #3
0
def eva():
    """
        Start the eva system
    """

    # Get the hostname and port information from the configuration file
    config = ConfigurationManager()
    hostname = config.get_value('server', 'host')
    port = config.get_value('server', 'port')
    socket_timeout = config.get_value('server', 'socket_timeout')
    loop = asyncio.new_event_loop()
    stop_server_future = loop.create_future()

    # Launch server
    try:
        asyncio.run(
            start_server(host=hostname,
                         port=port,
                         loop=loop,
                         socket_timeout=socket_timeout,
                         stop_server_future=stop_server_future))

    except Exception as e:
        LoggingManager().log(e, LoggingLevel.CRITICAL)
Beispiel #4
0
    def exec(self):
        """
        Read the input video using opencv and persist data
        using storage engine
        """
        # Fetch batch_size from Config
        batch_size = ConfigurationManager().get_value("executor", "batch_size")
        if batch_size is None:
            batch_size = 50

        # videos are persisted using (id, data) schema where id = frame_id
        # and data = frame_data. Current logic supports loading a video into
        # storage with the assumption that frame_id starts from 0. In case
        # we want to append to the existing store we have to figure out the
        # correct frame_id. It can also be a parameter based by the user.
        video_reader = OpenCVReader(self.node.file_path, batch_size=batch_size)
        for batch in video_reader.read():
            # Hook for the storage engine
            append_rows(self.node.table_metainfo, batch)
    def read(self) -> Iterator[Batch]:
        """
        This calls the sub class read implementation and
        yields the batch to the caller
        """

        data_batch = []
        # Fetch batch_size from Config if not provided
        if self.batch_size is None or self.batch_size < 0:
            self.batch_size = ConfigurationManager().get_value(
                "executor", "batch_size")
            if self.batch_size is None:
                self.batch_size = 50

        for data in self._read():
            data_batch.append(data)
            if len(data_batch) % self.batch_size == 0:
                yield Batch(pd.DataFrame(data_batch))
                data_batch = []
        if data_batch:
            yield Batch(pd.DataFrame(data_batch))
Beispiel #6
0
    def classify(self, frames: Tensor) -> pd.DataFrame:
        """
        Given the gpu_batch_size, we split the input tensor inpto chunks.
        And call the _get_predictions and merge the results.
        Arguments:
            frames (Tensor): tensor on which transformation is performed
        Returns:
            pd.DataFrame: outcome after prediction
        """
        gpu_batch_size = ConfigurationManager()\
            .get_value('executor', 'gpu_batch_size')

        if gpu_batch_size:
            chunks = torch.split(frames, gpu_batch_size)
            outcome = pd.DataFrame()
            for tensor in chunks:
                outcome = outcome.append(self._get_predictions(tensor),
                                         ignore_index=True)
            return outcome
        else:
            return self._get_predictions(frames)
Beispiel #7
0
def generate_file_path(name: str = '') -> Path:
    """Generates a arbitrary file_path(md5 hash) based on the a random salt
    and name

    Arguments:
        name (str): Input file_name.

    Returns:
        Path: pathlib.Path object

    """
    dataset_location = ConfigurationManager().get_value("core", "location")
    if dataset_location is None:
        LoggingManager().log('Missing location key in eva.yml',
                             LoggingLevel.ERROR)
        raise KeyError('Missing location key in eva.yml')

    dataset_location = Path(dataset_location)
    salt = uuid.uuid4().hex
    file_name = hashlib.md5(salt.encode() + name.encode()).hexdigest()
    path = dataset_location / file_name
    return path.resolve()
Beispiel #8
0
    def __init__(self, dataset_name: str, frame_metadata: FrameInfo):

        self.dataset_name = dataset_name
        self.H = frame_metadata.height
        self.W = frame_metadata.width
        self.C = frame_metadata.num_channels

        # The schema defines how the dataset schema looks like
        self.dataset_schema = Unischema(self.dataset_name, [
            UnischemaField('frame_id', np.int32,
                           (), ScalarCodec(IntegerType()), False),
            UnischemaField('frame_data', np.uint8, (self.H, self.W, self.C),
                           CompressedNdarrayCodec(), False),
        ])

        # Construct output location
        eva_dir = ConfigurationManager().get_value("core", "location")
        output_url = os.path.join(eva_dir, self.dataset_name)

        # Get session handle
        session = Session()
        spark = session.get_session()
        spark_context = session.get_context()

        # Wrap dataset materialization portion.
        rows_count = 10
        with materialize_dataset(spark, output_url, self.dataset_schema):

            rows_rdd = spark_context.parallelize(range(rows_count))\
                .map(lambda x: row_generator(x, self.H, self.W, self.C))\
                .map(lambda x: dict_to_spark_row(self.dataset_schema, x))

            spark.createDataFrame(rows_rdd,
                                  self.dataset_schema.as_spark_schema()) \
                .coalesce(10) \
                .write \
                .mode('overwrite') \
                .parquet(output_url)
Beispiel #9
0
 def __init__(self):
     self._config_manager = ConfigurationManager()
     self._gpus = self._populate_gpu_ids()
Beispiel #10
0
class Context:
    """
    Stores the context information of the executor, i.e.,
    if using spark, name of the application, current spark executors,
    if using horovod: current rank etc.
    """

    _instance = None

    def __new__(cls):
        if cls._instance is None:
            cls._instance = super(Context, cls).__new__(cls)
        return cls._instance

    def __init__(self):
        self._config_manager = ConfigurationManager()
        self._gpus = self._populate_gpu_ids()

    @property
    def gpus(self):
        return self._gpus

    def _possible_addresses(self) -> Set:
        host = socket.gethostname()
        result_address = {host}
        true_host, aliases, address = socket.gethostbyaddr(host)
        result_address.add(true_host)
        result_address.update(set(aliases).union(set(address)))
        return result_address

    def _populate_gpu_from_config(self) -> List:
        gpu_conf = self._config_manager.get_value('executor', 'gpus')
        gpu_conf = gpu_conf if gpu_conf else {}
        this_address = self._possible_addresses()
        intersection_addresses = this_address.intersection(gpu_conf.keys())
        if len(intersection_addresses) != 0:
            return [
                str(gpu) for gpu in gpu_conf.get(intersection_addresses.pop())
            ]
        return []

    def _populate_gpu_from_env(self) -> List:
        gpus = map(lambda x: x.strip(),
                   os.environ.get('GPU_DEVICES', '').strip().split(','))
        return list(filter(lambda x: x, gpus))

    def _populate_gpu_ids(self) -> List:
        if not is_gpu_available():
            return []
        gpus = self._populate_gpu_from_config()
        if len(gpus) == 0:
            gpus = self._populate_gpu_from_env()
        return gpus

    def _select_random_gpu(self) -> str:
        """
        A random GPU selection strategy
        Returns:
            (str): GPU device ID
        """
        return random.choice(self.gpus)

    def gpu_device(self) -> str:
        """
        Selects a GPU on which the task can be executed
        Returns:
             (str): GPU device ID
        """
        if self.gpus:
            # TODO: Should allow choosing GPU based on Spark and Horovod
            return self._select_random_gpu()
        return NO_GPU
Beispiel #11
0
 def __init__(self):
     self._config = ConfigurationManager()
     name = self._config.get_value('core', 'application')
     self.init_spark_session(name)
Beispiel #12
0
class Session(object):
    """
    Wrapper around Spark Session
    """

    _instance = None
    _session = None

    def __new__(cls):
        if cls._instance is None:
            cls._instance = super(Session, cls).__new__(cls)
        return cls._instance

    def __init__(self):
        self._config = ConfigurationManager()
        name = self._config.get_value('core', 'application')
        self.init_spark_session(name)

    def init_spark_session(self, application_name, spark_master=None):
        """Setup a spark session.

        :param spark_master: A master parameter used by spark session builder.
          Use default value (None) to use system
          environment configured spark cluster.
          Use 'local[*]' to run on a local box.

        :return: spark_session: A spark session
        """

        eva_spark_conf = SparkConf()
        pyspark_config = self._config.get_value('pyspark', 'property')
        for key, value in pyspark_config.items():
            eva_spark_conf.set(key, value)

        session_builder = SparkSession \
            .builder \
            .appName(application_name) \
            .config(conf=eva_spark_conf)

        if spark_master:
            session_builder.master(spark_master)

        # Gets an existing SparkSession or,
        # if there is no existing one, creates a new one based
        # on the options set in this builder.
        self._session = session_builder.getOrCreate()

        # Configure logging
        log4j_level = LoggingManager().getLog4JLevel()
        spark_context = self._session.sparkContext
        spark_context.setLogLevel(log4j_level)

    def get_session(self):
        return self._session

    def get_context(self):
        return self._session.sparkContext

    def stop(self):
        self._session.stop()

    def __del__(self):
        self._session.stop()
Beispiel #13
0
 def __init__(self, node: LimitPlan):
     super().__init__(node)
     self._limit_count = node.limit_value
     self.BATCH_MAX_SIZE = ConfigurationManager().get_value(
         "executor", "batch_size")
Beispiel #14
0
# coding=utf-8
# Copyright 2018-2020 EVA
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from src.configuration.configuration_manager import ConfigurationManager
from src.utils.generic_utils import str_to_class

Loader = str_to_class(ConfigurationManager().get_value("storage", "loader"))
from src.configuration.configuration_manager import ConfigurationManager
from src.utils.generic_utils import str_to_class

StorageEngine = str_to_class(ConfigurationManager().get_value(
    "storage", "engine"))()