コード例 #1
0
ファイル: batch.py プロジェクト: BlackBurdai/eva
    def sort_orderby(self, by, sort_type):
        """
        in_place sort for orderby

        Args:
            by: list of column names
            sort_type: list of True/False if ASC for each column name in 'by'
                i.e [True, False] means [ASC, DESC]
        """
        # if by is None and self.identifier_column in self._frames:
        #     by = [self.identifier_column]

        if sort_type is None:
            sort_type = [True]

        if by is not None:
            for column in by:
                if column not in self._frames.columns:
                    LoggingManager().log(
                        'Can not orderby non-projected column: {}'.format(
                            column), LoggingLevel.ERROR)
                    raise KeyError(
                        'Can not orderby non-projected column: {}'.format(
                            column))

            self._frames.sort_values(by,
                                     ascending=sort_type,
                                     ignore_index=True,
                                     inplace=True)
        else:
            LoggingManager().log(
                'Columns and Sort Type are required for orderby',
                LoggingLevel.WARNING)
コード例 #2
0
ファイル: command_handler.py プロジェクト: xzdandy/eva
def handle_request(transport, request_message):
    """
        Reads a request from a client and processes it

        If user inputs 'quit' stops the event loop
        otherwise just echoes user input
    """
    LoggingManager().log('Receive request: --|' + str(request_message) + '|--')

    try:
        output_batch = execute_query_fetch_all(request_message)
    except Exception as e:
        LoggingManager().log(e, LoggingLevel.WARNING)
        output_batch = Batch(pd.DataFrame([{'error': str(e)}]))
        response = Response(status=ResponseStatus.FAIL, batch=output_batch)
    else:
        response = Response(status=ResponseStatus.SUCCESS, batch=output_batch)

    responseData = response.to_json()
    # Send data length, because response can be very large
    data = (str(len(responseData)) + '|' + responseData).encode('ascii')

    LoggingManager().log('Response to client: --|' + str(response) + '|--\n' +
                         'Length: ' + str(len(responseData)))

    transport.write(data)

    return response
コード例 #3
0
ファイル: client.py プロジェクト: swati21/eva
def start_clients(client_count: int, host: string, port: int, loop,
                  stop_clients_future):
    """
        Start a set of eva clients

        client_count: number of clients (= connections)
        hostname: hostname of the server
        port: port where the server is running
        stop_clients_future: future for externally stopping the clients
    """

    LoggingManager().log('PID(' + str(os.getpid()) + ') attempting ' +
                         str(client_count) + ' connections')

    # Get a reference to the event loop
    # loop = asyncio.get_event_loop()

    max_retry_count = 3

    # Create client tasks
    client_coros = [
        start_client(loop, lambda: EvaClient(), host, port, max_retry_count)
        for i in range(client_count)
    ]

    # Start a set of clients
    clients = loop.create_task(
        asyncio.wait(
            [loop.create_task(client_coro) for client_coro in client_coros]))

    try:
        stop_clients_future = asyncio.wait([clients])
        loop.run_until_complete(stop_clients_future)

    except KeyboardInterrupt:
        LoggingManager().log("client process interrupted")

    finally:
        LoggingManager().log("client process shutdown")

        # tasks, exceptions, retries
        summary = [0, 0, 0]

        if clients.done():
            done, _ = clients.result()
            exceptions = sum(1 for d in done if d.exception())
            retries = sum(max_retry_count - d.result() for d in done
                          if not d.exception())
            tasks = len(client_coros)

            LoggingManager().log(
                str(tasks) + ' tasks, ' + str(exceptions) + ' exceptions, ' +
                str(retries) + ' retries')

            summary = [tasks, exceptions, retries]

        # Close loop
        loop.close()

        return summary
コード例 #4
0
def handle_request(transport, request_message):
    """
        Reads a request from a client and processes it

        If user inputs 'quit' stops the event loop
        otherwise just echoes user input
    """
    LoggingManager().log('Receive request: --|' + str(request_message) + '|--')

    output_batch = None
    response = None
    try:
        stmt = Parser().parse(request_message)[0]
        l_plan = StatementToPlanConvertor().visit(stmt)
        p_plan = PlanGenerator().build(l_plan)
        output_batch = PlanExecutor(p_plan).execute_plan()
    except Exception as e:
        LoggingManager().log(e, LoggingLevel.WARNING)
        response = Response(status=ResponseStatus.FAIL, batch=None)

    if response is None:
        response = Response(status=ResponseStatus.SUCCESS, batch=output_batch)

    responseData = response.to_json()
    # Send data length, because response can be very large
    data = (str(len(responseData)) + '|' + responseData).encode('ascii')

    LoggingManager().log('Response to client: --|' + str(response) + '|--\n' +
                         'Length: ' + str(len(responseData)))

    transport.write(data)

    return response
コード例 #5
0
ファイル: server.py プロジェクト: bot-jiashenC/workflow-test
def start_server(host: string, port: int, loop, socket_timeout: int,
                 stop_server_future):
    """
        Start the server.
        Server objects are asynchronous context managers.

        hostname: hostname of the server
        stop_server_future: future for externally stopping the server
    """

    LoggingManager().log('Start Server', LoggingLevel.CRITICAL)

    # Register signal handler
    def raiseSystemExit(_, __):
        raise SystemExit

    signals = [SIGINT, SIGTERM, SIGHUP, SIGUSR1]

    for handled_signal in signals:
        signal(handled_signal, raiseSystemExit)

    # Get a reference to the event loop
    # loop = asyncio.get_event_loop()

    # Start the eva server
    coro = loop.create_server(lambda: EvaServer(socket_timeout), host, port)
    server = loop.run_until_complete(coro)

    for socket in server.sockets:
        LoggingManager().log(
            'PID(' + str(os.getpid()) + ') serving on ' +
            str(socket.getsockname()), LoggingLevel.CRITICAL)

    server_closed = loop.create_task(server.wait_closed())

    # Start the realtime status monitor
    monitor = loop.create_task(realtime_server_status(EvaServer,
                                                      server_closed))

    try:
        loop.run_until_complete(stop_server_future)

    except KeyboardInterrupt:

        LoggingManager().log("Server process interrupted")

    finally:
        # Stop monitor
        monitor.cancel()

        # Close server
        server.close()

        # Stop event loop
        loop.run_until_complete(server.wait_closed())
        loop.close()

        LoggingManager().log("Successfully shutdown server.")
コード例 #6
0
def init_db():
    """Create database if doesn't exist and create all tables."""
    engine = SQLConfig().engine
    if not database_exists(engine.url):
        LoggingManager().log("Database does not exist, creating database.",
                             LoggingLevel.INFO)
        create_database(engine.url)
    LoggingManager().log("Creating tables", LoggingLevel.INFO)
    BaseModel.metadata.create_all()
コード例 #7
0
ファイル: client.py プロジェクト: swati21/eva
    def connection_made(self, transport):
        self.transport = transport

        if not set_socket_io_timeouts(self.transport, 60, 0):
            self.transport.abort()
            LoggingManager().log("[ " + str(self.id) + " ]" +
                                 " Could not set timeout")
            return

        LoggingManager().log("[ " + str(self.id) + " ]" +
                             " Connected to server")
コード例 #8
0
ファイル: server.py プロジェクト: bot-jiashenC/workflow-test
    def data_received(self, data):
        request_message = data.decode()
        LoggingManager().log('Request from client: --|' +
                             str(request_message) + '|--')

        if request_message in ["quit", "exit"]:
            LoggingManager().log('Close client socket')
            return self.transport.close()
        else:
            LoggingManager().log('Handle request')
            asyncio.create_task(handle_request(self.transport,
                                               request_message))
コード例 #9
0
ファイル: parser_visitor.py プロジェクト: karan-sarkar/eva
    def visitCreateUdf(self, ctx: evaql_parser.CreateUdfContext):
        udf_name = None
        if_not_exists = False
        input_definitions = []
        output_definitions = []
        impl_path = None
        udf_type = None

        for child in ctx.children:
            try:
                if isinstance(child, TerminalNode):
                    continue
                rule_idx = child.getRuleIndex()

                if rule_idx == evaql_parser.RULE_udfName:
                    udf_name = self.visit(ctx.udfName())

                elif rule_idx == evaql_parser.RULE_ifNotExists:
                    if_not_exists = True

                elif rule_idx == evaql_parser.RULE_createDefinitions:
                    # There should be 2 createDefinition
                    # idx 0 describing udf INPUT
                    # idx 1 describing udf OUTPUT
                    if len(ctx.createDefinitions()) != 2:
                        LoggingManager().log('UDF Input or Output Missing',
                                             LoggingLevel.ERROR)
                    input_definitions = self.visit(ctx.createDefinitions(0))
                    output_definitions = self.visit(ctx.createDefinitions(1))

                elif rule_idx == evaql_parser.RULE_udfType:
                    udf_type = self.visit(ctx.udfType())

                elif rule_idx == evaql_parser.RULE_udfImpl:
                    impl_path = self.visit(ctx.udfImpl()).value

            except BaseException:
                LoggingManager().log('CREATE UDF Failed', LoggingLevel.ERROR)
                # stop parsing something bad happened
                return None
        stmt = CreateUDFStatement(
            udf_name,
            if_not_exists,
            input_definitions,
            output_definitions,
            impl_path,
            udf_type)
        return stmt
コード例 #10
0
 def _shutdown_catalog(self):
     """
     This method is responsible for gracefully shutting the
     catalog manager. Currently, it includes dropping the catalog database
     """
     LoggingManager().log("Shutting catalog", LoggingLevel.INFO)
     drop_db()
コード例 #11
0
ファイル: session.py プロジェクト: bot-jiashenC/workflow-test
    def init_spark_session(self, application_name, spark_master=None):
        """Setup a spark session.

        :param spark_master: A master parameter used by spark session builder.
          Use default value (None) to use system
          environment configured spark cluster.
          Use 'local[*]' to run on a local box.

        :return: spark_session: A spark session
        """

        eva_spark_conf = SparkConf()
        pyspark_config = self._config.get_value('pyspark', 'property')
        for key, value in pyspark_config.items():
            eva_spark_conf.set(key, value)

        session_builder = SparkSession \
            .builder \
            .appName(application_name) \
            .config(conf=eva_spark_conf)

        if spark_master:
            session_builder.master(spark_master)

        # Gets an existing SparkSession or,
        # if there is no existing one, creates a new one based
        # on the options set in this builder.
        self._session = session_builder.getOrCreate()

        # Configure logging
        log4j_level = LoggingManager().getLog4JLevel()
        spark_context = self._session.sparkContext
        spark_context.setLogLevel(log4j_level)
コード例 #12
0
async def realtime_server_status(protocol, server_closed):
    """
        Report status changes.

        `protocol` must provide `connections` and `errors` attributes.

        Completion or cancellation of the `server_closed` future
        stops monitoring.
    """

    previous_connections = 0
    previous_errors = 0

    while not server_closed.done() and not server_closed.cancelled():

        # Only report changes
        if protocol.__connections__ != previous_connections or \
                protocol.__errors__ != previous_errors:

            previous_connections = protocol.__connections__
            previous_errors = protocol.__errors__

            LoggingManager().log(
                "Status: " + "connections: " + str(previous_connections) +
                " " + "errors: " + str(previous_errors), LoggingLevel.INFO)

        # Report changes every 1~s
        await asyncio.sleep(1)
コード例 #13
0
    def get_petastorm_column(df_column):

        column_type = df_column.type
        column_name = df_column.name
        column_is_nullable = df_column.is_nullable
        column_array_dimensions = df_column.array_dimensions

        # Reference:
        # https://github.com/uber/petastorm/blob/master/petastorm/
        # tests/test_common.py

        petastorm_column = None
        if column_type == ColumnType.INTEGER:
            petastorm_column = UnischemaField(column_name, np.int32, (),
                                              ScalarCodec(IntegerType()),
                                              column_is_nullable)
        elif column_type == ColumnType.FLOAT:
            petastorm_column = UnischemaField(column_name, np.float64, (),
                                              ScalarCodec(FloatType()),
                                              column_is_nullable)
        elif column_type == ColumnType.TEXT:
            petastorm_column = UnischemaField(column_name, np.str_, (),
                                              ScalarCodec(StringType()),
                                              column_is_nullable)
        elif column_type == ColumnType.NDARRAY:
            petastorm_column = UnischemaField(column_name, np.uint8,
                                              column_array_dimensions,
                                              NdarrayCodec(),
                                              column_is_nullable)
        else:
            LoggingManager().log("Invalid column type: " + str(column_type),
                                 LoggingLevel.ERROR)

        return petastorm_column
コード例 #14
0
ファイル: session.py プロジェクト: gaurav274/Eva
    def init_spark_session(self, application_name, spark_master=None):
        """Setup a spark session.

        :param spark_master: A master parameter used by spark session builder.
          Use default value (None) to use system
          environment configured spark cluster.
          Use 'local[*]' to run on a local box.

        :return: spark_session: A spark session
        """
        eva_spark_conf = SparkConf()
        eva_spark_conf.set('spark.logConf', 'true')
        # enable Arrow optimization for spark Session
        # This is added to help with to and fro conversion
        # between pandas and spark dataframe
        # https://docs.databricks.com/spark/latest/spark-sql/spark-pandas.html
        eva_spark_conf.set('spark.sql.execution.arrow.pyspark.enabled', 'true')

        session_builder = SparkSession \
            .builder \
            .appName(application_name) \
            .config(conf=eva_spark_conf)

        if spark_master:
            session_builder.master(spark_master)

        # Gets an existing SparkSession or,
        # if there is no existing one, creates a new one based
        # on the options set in this builder.
        self._session = session_builder.getOrCreate()

        # Configure logging
        log4j_level = LoggingManager().getLog4JLevel()
        spark_context = self._session.sparkContext
        spark_context.setLogLevel(log4j_level)
コード例 #15
0
    def visit_select(self, statement: SelectStatement):
        """converter for select statement

        Arguments:
            statement {SelectStatement} -- [input select statement]
        """

        video = statement.from_table
        if video is None:
            LoggingManager().log('From entry missing in select statement',
                                 LoggingLevel.ERROR)
            return None

        if isinstance(video, SelectStatement):
            # NestedQuery
            self.visit_select(video)
            child_plan = self._plan
            self._plan = LogicalQueryDerivedGet()
            self._plan.append_child(child_plan)
        elif isinstance(video, TableRef):
            # Table
            self.visit_table_ref(video)

        # Filter Operator
        predicate = statement.where_clause
        if predicate is not None:
            self._visit_select_predicate(predicate)

        # Projection operator
        select_columns = statement.target_list

        # ToDO
        # add support for SELECT STAR
        if select_columns is not None:
            self._visit_projection(select_columns)
コード例 #16
0
ファイル: catalog_manager.py プロジェクト: sanjanag/Eva
    def get_table_bindings(self, database_name: str, table_name: str,
                           column_names: List[str] = None) -> Tuple[int,
                                                                    List[int]]:
        """This method fetches bindings for strings.

        Args:
            database_name: currently not in use
            table_name: the table that is being referred to
            column_names: the column names of the table for which
           bindings are required

        Returns:
            returns metadata_id of table and a list of column ids
        """

        metadata_id = self._dataset_service.dataset_by_name(table_name)
        column_ids = []
        if column_names is not None:
            if not isinstance(column_names, list):
                LoggingManager().log(
                    "CatalogManager::get_table_binding() expected list",
                    LoggingLevel.WARNING)
            column_ids = self._column_service.columns_by_dataset_id_and_names(
                metadata_id,
                column_names)
        return metadata_id, column_ids
コード例 #17
0
ファイル: optimizer_tasks.py プロジェクト: xzdandy/eva
    def execute(self):
        implementation_rules = RulesManager().implementation_rules
        valid_rules = []
        for rule in implementation_rules:
            if rule.top_match(self.root_expr.opr):
                valid_rules.append(rule)

        sorted(valid_rules, key=lambda x: x.promise(), reverse=True)
        for rule in valid_rules:
            binder = Binder(self.root_expr, rule.pattern,
                            self.optimizer_context.memo)
            for match in iter(binder):
                if not rule.check(match, self.optimizer_context):
                    continue
                LoggingManager().log(
                    'In Optimize physical expression,'
                    'Rule {} matched for {}'.format(rule, self.root_expr),
                    LoggingLevel.INFO)
                after = rule.apply(match, self.optimizer_context)
                new_expr = GroupExpression(after, self.root_expr.group_id,
                                           self.root_expr.children)
                # LoggingManager().log('After rewiting {}'.format(new_expr),
                #                     LoggingLevel.INFO)
                self.optimizer_context.memo.add_group_expr(new_expr)
                # Optimize inputs for this physical expr
                self.optimizer_context.task_stack.push(
                    OptimizeInputs(new_expr, self.optimizer_context))

            # Optimize the child groups
            for child_id in self.root_expr.children:
                self.optimizer_context.task_stack.push(
                    OptimizeGroup(child_id, self.optimizer_context))
コード例 #18
0
ファイル: test_client.py プロジェクト: akhileshsiddhanti/eva
    def test_interaction(self):

        host = "0.0.0.0"
        port = 5432
        client_count = 1

        LoggingManager().setEffectiveLevel(LoggingLevel.DEBUG)

        def timeout_server():
            # need a more robust mechanism for when to cancel the future
            time.sleep(2)
            self.stop_clients_future.cancel()

        thread = threading.Thread(target=timeout_server)
        thread.daemon = True

        thread.start()

        summary = start_clients(client_count=client_count,
                                host=host,
                                port=port,
                                loop=self.loop,
                                stop_clients_future=self.stop_clients_future)

        
        self.assertEqual(summary[0], client_count)

        exception_count = 0
        self.assertEqual(summary[1], exception_count)
コード例 #19
0
def create_column_metadata(col_list: List[ColumnDefinition]):
    """Create column metadata for the input parsed column list. This function
    will not commit the provided column into catalog table.
    Will only return in memory list of ColumnDataframe objects

    Arguments:
        col_list {List[ColumnDefinition]} -- parsed col list to be created
    """
    if isinstance(col_list, ColumnDefinition):
        col_list = [col_list]

    result_list = []
    for col in col_list:
        if col is None:
            LoggingManager().log(
                "Empty column while creating column metadata",
                LoggingLevel.ERROR)
            result_list.append(col)
        result_list.append(
            CatalogManager().create_column_metadata(
                col.name, col.type, col.array_type, col.dimension
            )
        )

    return result_list
コード例 #20
0
ファイル: video_loader.py プロジェクト: JeremyHua18/eva
    def load(self):
        video = cv2.VideoCapture(self.video_metadata.file)
        video_start = self.offset if self.offset else 0
        video.set(cv2.CAP_PROP_POS_FRAMES, video_start)

        LoggingManager().log("Loading frames", LoggingLevel.CRITICAL)

        _, frame = video.read()
        frame_ind = video_start - 1

        info = None
        if frame is not None:
            (height, width, num_channels) = frame.shape
            info = FrameInfo(height, width, num_channels, ColorSpace.BGR)

        frames = []
        while frame is not None:
            frame_ind += 1
            eva_frame = Frame(frame_ind, frame, info)
            if self.skip_frames > 0 and frame_ind % self.skip_frames != 0:
                _, frame = video.read()
                continue

            frames.append(eva_frame)
            if self.limit and frame_ind >= self.limit:
                return FrameBatch(frames, info)

            if len(frames) % self.batch_size == 0:
                yield FrameBatch(frames, info)
                frames = []

            _, frame = video.read()

        if frames:
            return FrameBatch(frames, info)
コード例 #21
0
def column_definition_to_udf_io(
        col_list: List[ColumnDefinition], is_input: bool):
    """Create the UdfIO object fro each column definition provided

    Arguments:
        col_list(List[ColumnDefinition]): parsed input/output definitions
        is_input(bool): true if input else false
    """
    if isinstance(col_list, ColumnDefinition):
        col_list = [col_list]

    result_list = []
    for col in col_list:
        if col is None:
            LoggingManager().log(
                "Empty column definition while creating udf io",
                LoggingLevel.ERROR)
            result_list.append(col)
        result_list.append(
            CatalogManager().udf_io(col.name, col.type,
                                    array_type=col.array_type,
                                    dimensions=col.dimension,
                                    is_input=is_input)
        )
    return result_list
コード例 #22
0
 def delete_column(cls, column_list):
     try:
         for column in column_list:
             column.delete()
     except Exception:
         LoggingManager().log("detele column failed", LoggingLevel.ERROR)
         raise Exception
         return None
コード例 #23
0
ファイル: client.py プロジェクト: swati21/eva
    def data_received(self, data):

        response_chunk = data.decode()
        LoggingManager().log("[ " + str(self.id) + " ]" +
                             " Response from server: --|" +
                             str(response_chunk) + "|--")

        self._response_chunk = response_chunk
コード例 #24
0
ファイル: client.py プロジェクト: swati21/eva
    def __init__(self):
        self.done = asyncio.Future()
        self.transport = None
        self.id = EvaClient.__connections__

        EvaClient.__connections__ += 1

        LoggingManager().log("[ " + str(self.id) + " ]" + " Init Client")
コード例 #25
0
    def bootstrap_catalog(self):

        # eva_dir = ConfigurationManager().get_value("core", "location")
        # output_url = os.path.join(eva_dir, CATALOG_DIR)
        # LoggingManager().log("Bootstrapping catalog" + str(output_url),
        #                      LoggingLevel.INFO)
        LoggingManager().log("Bootstrapping catalog", LoggingLevel.INFO)
        init_db()
コード例 #26
0
ファイル: optimizer_tasks.py プロジェクト: xzdandy/eva
    def execute(self):
        """We apply rewrite rules in a top down fashion.
        Right now we are applying rules aggressively. Later
        when we have more rules it might be a better idea to
        push optimization task to a queue.
        """
        rewrite_rules = RulesManager().rewrite_rules
        valid_rules = []
        for rule in rewrite_rules:
            if not self.root_expr.is_rule_explored(rule.rule_type) and \
                    rule.top_match(self.root_expr.opr):
                valid_rules.append(rule)

        # sort the rules by promise
        valid_rules = sorted(valid_rules,
                             key=lambda x: x.promise(),
                             reverse=True)
        for rule in valid_rules:
            binder = Binder(self.root_expr, rule.pattern,
                            self.optimizer_context.memo)
            for match in iter(binder):
                if not rule.check(match, self.optimizer_context):
                    continue
                self.root_expr.mark_rule_explored(rule.rule_type)
                LoggingManager().log(
                    'In TopDown, Rule {} matched for {}'.format(
                        rule, self.root_expr), LoggingLevel.INFO)
                after = rule.apply(match, self.optimizer_context)
                new_expr = self.optimizer_context.xform_opr_to_group_expr(
                    opr=after,
                    root_group_id=self.root_expr.group_id,
                    is_root=True,
                    copy_opr=False)
                self.root_expr = new_expr
                LoggingManager().log(
                    'After rewiting {}'.format(self.root_expr),
                    LoggingLevel.INFO)
                self.optimizer_context.task_stack.push(
                    TopDownRewrite(self.root_expr, self.optimizer_context))

        for child in self.root_expr.children:
            child_expr = self.optimizer_context.memo.groups[child] \
                .logical_exprs[0]
            self.optimizer_context.task_stack.push(
                TopDownRewrite(child_expr, self.optimizer_context))
コード例 #27
0
    def exec(self) -> Iterator[Batch]:
        if self.node.all is False:
            LoggingManager().log('Only UNION ALL is supported now.',
                                 LoggingLevel.WARNING)

        # We should have only two children
        for child in self.children:
            for batch in child.exec():
                yield batch
コード例 #28
0
ファイル: batch.py プロジェクト: xzdandy/eva
 def frames(self, values):
     if isinstance(values, DataFrame):
         self._frames = values[sorted(values.columns)]
     else:
         LoggingManager().log('Batch constructor not properly called!',
                              LoggingLevel.DEBUG)
         raise ValueError('Batch constructor not properly called. \
             Expected pandas.DataFrame')
     self._batch_size = len(values)
コード例 #29
0
ファイル: client.py プロジェクト: swati21/eva
    def connection_lost(self, exc, exc2=None):

        LoggingManager().log("[ " + str(self.id) + " ]" +
                             " Disconnected from server")

        try:
            self.transport.abort()  # free sockets early, free sockets often
            self.transport = None
        except Exception as e:
            LoggingManager().exception(e)
            exc2 = e
        finally:
            if exc or exc2:
                EvaClient.__errors__ += 1
                self.done.set_exception(exc or exc2)
                self.done.exception()  # remove _tb_logger
            else:
                self.done.set_result(None)
コード例 #30
0
 def delete(self):
     """Delete and commit"""
     try:
         db_session.delete(self)
         self._commit()
     except Exception:
         LoggingManager().log("Object couldn't be deleted",
                              LoggingLevel.ERROR)
         raise Exception