Example #1
0
File: frame.py Project: acx2015/atk
    def add_columns(self, frame, expression, schema, columns_accessed=None):
        if not schema or not hasattr(schema, "__iter__"):
            raise ValueError("add_columns requires a non-empty schema of (name, type)")

        only_one_column = False
        if isinstance(schema[0], basestring):
            only_one_column = True
            schema = [schema]

        schema = self._format_schema(schema)
        names, data_types = zip(*schema)

        optimized_frame_schema = []
        if columns_accessed:
            if isinstance(columns_accessed, basestring):
                columns_accessed = [columns_accessed]
            frame_schema = frame.schema
            for i in columns_accessed:
                for j in frame_schema:
                    if i == j[0]:
                        optimized_frame_schema.append(j)

        # By default columns_accessed is an empty list and optimized frame schema is empty which implies frame.schema is considered to evaluate
        columns_accessed, optimized_frame_schema = ([], None) if columns_accessed is None else (columns_accessed, optimized_frame_schema)

        add_columns_function = get_add_one_column_function(expression, data_types[0]) if only_one_column \
            else get_add_many_columns_function(expression, data_types)
        from itertools import imap
        arguments = {'frame': frame.uri,
                     'column_names': names,
                     'column_types': [get_rest_str_from_data_type(t) for t in data_types],
                     'udf': get_udf_arg(frame, add_columns_function, imap, optimized_frame_schema),
                     'columns_accessed': columns_accessed}

        execute_update_frame_command('add_columns', arguments, frame)
Example #2
0
    def add_columns(self, frame, expression, schema, columns_accessed=None):
        if not schema or not hasattr(schema, "__iter__"):
            raise ValueError("add_columns requires a non-empty schema of (name, type)")

        only_one_column = False
        if isinstance(schema[0], basestring):
            only_one_column = True
            schema = [schema]

        schema = self._format_schema(schema)
        names, data_types = zip(*schema)

        optimized_frame_schema = []
        if columns_accessed:
            if isinstance(columns_accessed, basestring):
                columns_accessed = [columns_accessed]
            frame_schema = frame.schema
            for i in columns_accessed:
                for j in frame_schema:
                    if i == j[0]:
                        optimized_frame_schema.append(j)

        # By default columns_accessed is an empty list and optimized frame schema is empty which implies frame.schema is considered to evaluate
        columns_accessed, optimized_frame_schema = ([], None) if columns_accessed is None else (columns_accessed, optimized_frame_schema)

        add_columns_function = get_add_one_column_function(expression, data_types[0]) if only_one_column \
            else get_add_many_columns_function(expression, data_types)
        from itertools import imap
        arguments = {'frame': frame.uri,
                     'column_names': names,
                     'column_types': [get_rest_str_from_data_type(t) for t in data_types],
                     'udf': get_udf_arg(frame, add_columns_function, imap, optimized_frame_schema),
                     'columns_accessed': columns_accessed}

        execute_update_frame_command('add_columns', arguments, frame)