Beispiel #1
0
 def set_book_page(self, state, page):
     width, height = utility.dimensions(state)
     book_n = state['user']['current_book']
     books = OrderedDict(state['user']['books'])
     book = books[book_n].set_page(page)
     bookmarks = tuple(bm for bm in book.bookmarks if bm != 'deleted')
     book = book._replace(bookmarks=bookmarks)
     books[book_n] = book
     return state.copy(
         user=state['user'].copy(books=FrozenOrderedDict(books)),
         location='book',
         home_menu_visible=False)
Beispiel #2
0
 def insert_bookmark(self, state, _):
     number = state['user']['current_book']
     books = OrderedDict(state['user']['books'])
     book = books[number]
     page = book.page_number
     if page not in book.bookmarks:
         bookmarks = sorted(book.bookmarks + tuple([page]))
         book = book._replace(bookmarks=tuple(bookmarks))
         books[number] = book
         return state.copy(user=state['user'].copy(
             books=FrozenOrderedDict(books)))
     return state
Beispiel #3
0
    def _create_layer_dict(self):

        layer_ids = self._compute_layer_ids()
        layer_dict = OrderedDict()
        last_layer_id = ''
        last_layer_type = None
        current_layers = [
        ]  # Layers that could not be wrapped in objects yet, because they were missing the activation function.
        for layer_id in layer_ids:
            keras_layer_obj = self._model.get_layer(layer_id)
            layer_type = keras_layer_obj.__class__.__name__
            # Check whether the layer type is considered a real layer.
            if layer_type in self._layer_types_to_classes.keys():
                # Check that if the layer is a neural layer it contains an activation function.
                # If so the layer can be added savely.
                # Otherwise take the next activation function and merge the layers
                if ((layer_type in self._neural_layer_types
                     and keras_layer_obj.activation.__name__ != 'linear')
                        or (layer_type in self._transformation_layer_types)):
                    # Check that there is no unfinished layer with missing activation function.
                    # If not add the layer.
                    if not current_layers:
                        layer_dict[layer_id] = self._layer_types_to_classes[
                            layer_type](self, [keras_layer_obj])
                    else:
                        raise ParsingError('Missing activation function.')
                else:
                    # Check that there is no other unfinished layer before that one.
                    if not current_layers:
                        current_layers.append(keras_layer_obj)
                        last_layer_id = layer_id
                        last_layer_type = layer_type
                    else:
                        raise ParsingError(
                            'Two consectutive layers with no activation function.'
                        )

            elif layer_type == 'Activation':
                # Check that there was a layer before without activation function and merge.
                if current_layers:
                    current_layers.append(keras_layer_obj)
                    layer_dict[last_layer_id] = self._layer_types_to_classes[
                        last_layer_type](self, current_layers)
                    current_layers = []
                else:
                    raise ParsingError(
                        'Two activation layers after each other.')
            else:
                raise ParsingError(
                    'Not sure how to deal with that layer type at that position.'
                )

        return FrozenOrderedDict(layer_dict)
Beispiel #4
0
def make_hashable(values):
    """
    Make the elements of iterable `values` hashable.
    Or if `values` is a singleton, make it hashable.
    """
    def _hashable_one(value):
        """..."""
        if isinstance(value, Hashable):
            return value
        try:
            return FrozenOrderedDict(value)
        except TypeError as frozen_ordered_dict_error:
            try:
                return tuple(value)
            except TypeError as tuple_error:
                pass
        raise TypeError(
            """
            {} object is neither hashable, mapping, or iterable:
            Error when tried to create FrozenOrderedDict: {}
            Error when tried to create a tuple: {}
            """\
            .format(
                value,
                frozen_ordered_dict_error,
                tuple_error))

    def _ordered(dict_like):
        """..."""
        if isinstance(dict_like, (FrozenOrderedDict, OrderedDict)):
            return _ordered
        assert isinstance(dict_like, dict)
        return OrderedDict(
            sorted(dict_like.items(), key=lambda kv: kv[0]))

    if not isinstance(values, Iterable):
        return _hashable_one(values)
    if isinstance(values, six.string_types):
        return _hashable_one(values)
    if isinstance(values, Mapping):
        return\
            FrozenOrderedDict(
                (make_hashable(key), make_hashable(value))
                for key, value in _ordered(values).items())

    generator = (make_hashable(value) for value in values)
    if isinstance(values, Generator):
        return generator
    if isinstance(values, pd.Series):
        return pd.Series(generator)
    return tuple(generator)
Beispiel #5
0
 def close_menu(self, state, value):
     books = state['user']['books']
     # fully delete deleted bookmarks
     changed_books = OrderedDict()
     for filename in books:
         book = books[filename]
         bookmarks = tuple(bm for bm in book.bookmarks if bm != 'deleted')
         book = book._replace(bookmarks=bookmarks)
         changed_books[filename] = book
     bookmarks_menu = state['bookmarks_menu']
     return state.copy(location='book',
                       bookmarks_menu=bookmarks_menu.copy(page=0),
                       home_menu_visible=False, go_to_page_selection='',
                       help_menu=frozendict({'visible': False, 'page': 0}),
                       user=state['user'].copy(books=FrozenOrderedDict(changed_books)))
Beispiel #6
0
async def read_user_state(path):
    global prev
    global manual
    book_files = utility.find_files(path, ('brf', 'pef'))
    main_toml = os.path.join(path, 'sd-card', USER_STATE_FILE)
    current_book = manual_filename
    if os.path.exists(main_toml):
        main_state = toml.load(main_toml)
        if 'current_book' in main_state:
            current_book = main_state['current_book']
            if not current_book == manual_filename:
                current_book = os.path.join(path, current_book)

    manual_toml = os.path.join(path, to_state_file(manual_filename))
    if os.path.exists(manual_toml):
        t = toml.load(manual_toml)
        if 'current_page' in t:
            manual = manual._replace(page_number=t['current_page'] - 1)
        if 'bookmarks' in t:
            manual = manual._replace(bookmarks=tuple(
                sorted(manual.bookmarks + tuple(bm - 1
                                                for bm in t['bookmarks']))))
    books = OrderedDict({manual_filename: manual})
    for book_file in book_files:
        toml_file = to_state_file(book_file)
        book = BookFile(filename=book_file, width=40, height=9)
        if os.path.exists(toml_file):
            t = toml.load(toml_file)
            if 'current_page' in t:
                book = book._replace(page_number=t['current_page'] - 1)
            if 'bookmarks' in t:
                book = book._replace(bookmarks=tuple(
                    sorted(book.bookmarks + tuple(bm - 1
                                                  for bm in t['bookmarks']))))
        try:
            books[book_file] = await init(book)
        except Exception as e:
            log.warning('could not open {}'.format(book_file))
            log.warning(e)

    if current_book not in books:
        current_book = manual_filename

    user_state = frozendict(books=FrozenOrderedDict(books),
                            current_book=current_book)
    prev = user_state
    return user_state
Beispiel #7
0
    def go_to_page(self, state, page):
        width, height = utility.dimensions(state)

        if page < 0:
            page = 0

        if state['help_menu']['visible']:
            location = 'help_menu'
        else:
            location = state['location']

        if location == 'library':
            books = state['user']['books']
            max_pages = (len(books) - 1) // (height - 1)
            if page > max_pages:
                page = max_pages
            elif page < 0:
                page = 0
            library = frozendict({'page': page})
            return state.copy(library=library)
        elif location == 'book':
            book_n = state['user']['current_book']
            book = state['user']['books'][book_n]
            books = OrderedDict(state['user']['books'])
            book = book.set_page(page)
            books[book_n] = book
            return state.copy(user=state['user'].copy(
                books=FrozenOrderedDict(books)))
        elif location == 'bookmarks_menu':
            book_n = state['user']['current_book']
            book = state['user']['books'][book_n]
            bookmarks_data = book.bookmarks
            max_pages = (len(bookmarks_data) - 1) // height
            if page > max_pages:
                page = max_pages
            bookmarks_menu = state['bookmarks_menu'].copy(page=page)
            return state.copy(bookmarks_menu=bookmarks_menu)
        elif location == 'help_menu':
            max_pages = 1
            if page >= max_pages:
                page = max_pages - 1

            return state.copy(help_menu=state['help_menu'].copy(page=page))
        return state
Beispiel #8
0
 def _hashable_one(value):
     """..."""
     if isinstance(value, Hashable):
         return value
     try:
         return FrozenOrderedDict(value)
     except TypeError as frozen_ordered_dict_error:
         try:
             return tuple(value)
         except TypeError as tuple_error:
             pass
     raise TypeError(
         """
         {} object is neither hashable, mapping, or iterable:
         Error when tried to create FrozenOrderedDict: {}
         Error when tried to create a tuple: {}
         """\
         .format(
             value,
             frozen_ordered_dict_error,
             tuple_error))
Beispiel #9
0
 def delete_bookmark(self, state, n):
     width, height = state_helpers.dimensions(state)
     # adjust for title
     height -= 1
     page = state['bookmarks_menu']['page']
     book_n = state['user']['current_book']
     books = OrderedDict(state['user']['books'])
     book = books[book_n]
     bookmarks = book.bookmarks
     line = page * height
     # don't delete go-to-start end and go-to-end bookmarks
     if (line + n) == 0 or (line + n) == (len(bookmarks) - 1):
         return state
     changed_bookmarks = list(bookmarks[line:line + height])
     if n >= len(changed_bookmarks):
         return state
     changed_bookmarks[n] = 'deleted'
     bookmarks = bookmarks[0:line] + tuple(changed_bookmarks) \
         + bookmarks[line + height:len(bookmarks)]
     books[book_n] = book._replace(bookmarks=bookmarks)
     return state.copy(user=state['user'].copy(
         books=FrozenOrderedDict(books)))
Beispiel #10
0
class DataObject(object):
    def __init__(self, **kwargs):
        for column in self.__class__.columns:
            if column.name in kwargs:
                try:
                    converted_value = column.convert(kwargs[column.name])
                except Exception as e:
                    traceback.print_exc(e)
                    try:
                        error_string = 'Unable to convert value %s for field %s' % (
                            kwargs[column.name], column.name)
                    except:
                        error_string = 'Unable to convert value for field %s' % (
                            column.name)
                    raise ValueError(error_string)
                setattr(self, column.name, converted_value)
                del kwargs[column.name]
            else:
                setattr(self, column.name, None)
        if kwargs:
            raise TypeError('Unexpected argument(s) initializing %s: %s' %
                            (self.__class__.__name__, str(kwargs)))
        for k, v in self.__class__.relationships.items():
            if v[1]:
                setattr(self, k, [])
            else:
                setattr(self, k, None)

    @classmethod
    def to_sqa_table(cls, metadata, name, **kwargs):
        '''
        Create a sqalchemy Table object corresponding to this class.  Use kwargs to 
        override any desired columns. 
        '''
        bad_kwargs = set(kwargs.keys()) - set(
            map(lambda col: col.name, cls.columns))
        if bad_kwargs:
            raise ValueError(
                'Optional keyword argument names must correspond to field names.  The following names are not compliant: %s'
                % str(sorted(bad_kwargs)))
        cols = [
            col.to_sqa() if col.name not in kwargs else kwargs[col.name]
            for col in cls.columns
        ]
        return Table(name, metadata, *cols)

    def __getstate__(self):
        state = {}
        for col in self.columns:
            if hasattr(self, col.name):
                state[col.name] = getattr(self, col.name)
        for k in sorted(self.relationships.keys()):
            state[k] = getattr(self, k)
        return state

    def __setstate__(self, state):
        self.__dict__.update(state)

    def __eq__(self, other):
        # TODO: This could be made much faster by a custom implementation
        return self.__class__ is other.__class__ and self.__getstate__(
        ) == other.__getstate__()

    def __hash__(self):
        return hash((self.__class__,
                     frozendict(valmap(freeze, self.__getstate__()).items())))

    def to_row(self):
        return [
            col.unconvert(getattr(self, col.name, None))
            for col in self.columns
        ]

    @classmethod
    def header(cls):
        return [col.name for col in cls.columns]

    @classmethod
    def init_schema(cls):
        '''
        Called after the schema has been added.  Allows class to do any necessary initialization steps that require
        schema information.
        '''
        pass

    @classmethod
    def init_relationships(cls):
        '''
        Called after a relationship is added.  Allows class to do any necessary initialization steps that require
        relationship information.
        '''
        pass

    relationships = FrozenOrderedDict()

    @classmethod
    def subtypes(cls):
        stack = [cls]
        result = []
        while stack:
            item = stack.pop()
            stack.extend(item.__subclasses__())
            result.append(item)
        return result

    @classmethod
    def typerank(cls):
        if cls.concrete():
            d = {cls: 0}
        else:
            d = {t: i for i, t in enumerate(cls.subtypes())}

        def _typerank(obj):
            return d[obj]

        return _typerank

    @classmethod
    def objrank(cls):
        tr = cls.typerank()

        def _objrank(obj):
            return tr(type(obj))

        return _objrank

    @classmethod
    def relationship_sort_key(cls, relationship):
        klass = cls.relationships[relationship][0]
        typerank = klass.typerank()

        def sort_key(obj):
            return (typerank(type(obj)), obj.sort_key())

        return sort_key

    @classproperty
    @classmethod
    @abstractmethod
    def partition_attribute(cls):
        print(cls)
        raise NotImplementedError

    def set_container_key(self, key):
        translation = self.translate_container_key(key)
        for k, v in translation.items():
            setattr(self, k, v)

    @classmethod
    def translate_container_key(cls, key):
        result = {}
        for i, (k, v) in enumerate(cls.container_key_):
            result[v] = key[i]
        return result

    def set_identity_key(self, key):
        translation = self.translate_identity_key(key)
        for k, v in translation.items():
            setattr(self, k, v)

    @classmethod
    def translate_identity_key(cls, key):
        result = {}
        for i, (k, v) in enumerate(cls.identity_key_):
            result[v] = key[i]
        return result

    def container_key(self):
        return tuple(getattr(self, v) for k, v in self.container_key_)

    def identity_key(self):
        return tuple(getattr(self, v) for k, v in self.identity_key_)

    def sort_key(self):
        return tuple(getattr(self, key) for key in self.sort_key_)

    @classproperty
    @classmethod
    def sort_column_numbers(cls):
        return [cls.header().index(key) for key in cls.sort_key_]

    @classproperty
    @classmethod
    def sort_column_names(cls):
        return cls.sort_key_

#     def __richcmp__(DataObject self, DataObject other, int op):
#         if op == 0:# <
#             return self.sort_key() < other.sort_key()
#         elif op == 1:# ==
#             return self.__class__ is other.__class__ and self.__getstate__() == other.__getstate__()
#         elif op == 2:# >
#             return self.sort_key() > other.sort_key()
#         elif op == 3:# <=
#             return self.sort_key() <= other.sort_key()
#         elif op == 4:# !=
#             return not (self.__class__ is other.__class__ and self.__getstate__() == other.__getstate__())
#         elif op == 5:# >=
#             return self.sort_key() >= other.sort_key()

    def __lt__(self, other):
        if not isinstance(other, DataObject):
            return NotImplemented
        try:
            return self.sort_key() < other.sort_key()
        except AttributeError:
            return NotImplemented

    def __le__(self, other):
        if not isinstance(other, DataObject):
            return NotImplemented
        try:
            return self.sort_key() <= other.sort_key()
        except AttributeError:
            return NotImplemented

    def __gt__(self, other):
        if not isinstance(other, DataObject):
            return NotImplemented
        try:
            return self.sort_key() > other.sort_key()
        except AttributeError:
            return NotImplemented

    def __ge__(self, other):
        if not isinstance(other, DataObject):
            return NotImplemented
        try:
            return self.sort_key() >= other.sort_key()
        except AttributeError:
            return NotImplemented

    @classmethod
    def concrete(cls):
        return not cls.__subclasses__()

    @classmethod
    def reader_class(cls, config):
        if cls.__subclasses__():
            assert cls not in config
            return PolymorphicReader
        elif cls.relationships:
            if cls in config:
                return CompoundReader
            else:
                return ImplicitReader
        else:
            assert cls in config
            assert isinstance(config[cls], SimpleReaderConfig
                              ), 'No config found for class %s' % cls.__name__
            return SimpleReader

    @classmethod
    def writer_class(cls, config):
        if cls.__subclasses__():
            assert cls not in config
            return PolymorphicWriter
        elif cls.relationships:
            if cls in config:
                return CompoundWriter
            else:
                return ImplicitWriter
        else:
            assert cls in config
            assert isinstance(config[cls], SimpleWriterConfig)
            return SimpleWriter

    @classmethod
    def reader(cls, config):
        return cls.reader_class(config)(cls, config)

    @classmethod
    def writer(cls, config):
        return cls.writer_class(config)(cls, config)
Beispiel #11
0
    def go_to_page(self, state, page):
        width, height = state_helpers.dimensions(state)

        if page < 0:
            page = 0

        if state['help_menu']['visible']:
            location = 'help_menu'
        else:
            location = state['location']

        if location == 'library':
            books = state['user']['books']
            max_pages = (len(books) - 1) // (height - 1)
            if page > max_pages:
                page = max_pages
            elif page < 0:
                page = 0
            library = frozendict({'page': page})
            return state.copy(library=library)
        elif location == 'book':
            book_n = state['user']['current_book']
            book = state['user']['books'][book_n]
            books = OrderedDict(state['user']['books'])
            book = book.set_page(page)
            books[book_n] = book
            return state.copy(user=state['user'].copy(books=FrozenOrderedDict(books)))
        elif location == 'bookmarks_menu':
            book_n = state['user']['current_book']
            book = state['user']['books'][book_n]
            bookmarks_data = book.bookmarks
            max_pages = (len(bookmarks_data) - 1) // height
            if page > max_pages:
                page = max_pages
            bookmarks_menu = state['bookmarks_menu'].copy(page=page)
            return state.copy(bookmarks_menu=bookmarks_menu)
        elif location == 'language':
            lang_n = state['user'].get('current_language', 'en_GB:en')
            lang = list(state['languages']['available'].keys())[lang_n]
            language_menu = state['user'].copy(current_language=lang)
            return state.copy(language=language_menu)
        elif location == 'help_menu':

            # To calculate help page bounds and ensure we stay within
            # them, do a dummy render of the help and count the pages.

            mapping = {
                'book': render_book_help,
                'library': render_library_help,
                'system_menu': render_system_help,
                'language': render_language_help,
                'bookmarks_menu': render_bookmarks_help,
                'go_to_page': render_gtp_help,
            }
            if state['home_menu_visible']:
                mapping['book'] = render_home_menu_help
            help_getter = mapping.get(state['location'])
            num_pages = 1
            if help_getter:
                num_pages = len(help_getter(width, height)) // height
            if page >= num_pages:
                page = num_pages - 1
            return state.copy(help_menu=state['help_menu'].copy(page=page))
        return state
Beispiel #12
0
 def remove_book(self, state, book):
     books = OrderedDict(state['user']['books'])
     if book.filename in books:
         del books[book.filename]
     books = FrozenOrderedDict(books)
     return state.copy(user=state['user'].copy(books=books))
Beispiel #13
0
def sort_books(books):
    return FrozenOrderedDict(
        sorted(books.items(), key=lambda x: x[1].title.lower()))
Beispiel #14
0
    def produce(
            self,
            *,
            inputs: container.DataFrame,
            timeout: float = None,
            iterations: int = None) -> base.CallResult[container.DataFrame]:

        cols = ["idx", "name", "rank"]

        # Make sure the target column is of a valid type and return no ranked features if it isn't.
        target_idx = self.hyperparams["target_col_index"]
        if not self._can_use_column(inputs.metadata, target_idx):
            return base.CallResult(container.DataFrame(data={}, columns=cols))

        # check if target is discrete or continuous
        semantic_types = inputs.metadata.query_column(
            target_idx)["semantic_types"]
        discrete = len(set(semantic_types).intersection(
            self._discrete_types)) > 0

        # make a copy of the inputs and clean out any missing data
        feature_df = inputs.copy()
        if self.hyperparams["sub_sample"]:
            sub_sample_size = (self.hyperparams["sub_sample_size"]
                               if self.hyperparams["sub_sample_size"] <
                               inputs.shape[0] else inputs.shape[0])
            rows = random.sample_without_replacement(inputs.shape[0],
                                                     sub_sample_size)
            feature_df = feature_df.iloc[rows, :]
        # makes sure that if an entire column is NA, we remove that column, so as to not remove ALL rows
        cols_to_drop = feature_df.columns[feature_df.isna().sum() ==
                                          feature_df.shape[0]]
        feature_df.drop(columns=cols_to_drop, inplace=True)
        feature_df.dropna(inplace=True)

        # split out the target feature
        target_df = feature_df.iloc[:,
                                    feature_df.columns.
                                    get_loc(inputs.columns[target_idx])]

        # drop features that are not compatible with ranking
        feature_indices = set(
            inputs.metadata.list_columns_with_semantic_types(
                self._semantic_types))
        role_indices = set(
            inputs.metadata.list_columns_with_semantic_types(self._roles))
        feature_indices = feature_indices.intersection(role_indices)
        feature_indices.remove(target_idx)
        for categ_ind in inputs.metadata.list_columns_with_semantic_types(
            ("https://metadata.datadrivendiscovery.org/types/CategoricalData",
             )):
            if categ_ind in feature_indices:
                if (np.unique(inputs[inputs.columns[categ_ind]]).shape[0] ==
                        inputs.shape[0]):
                    feature_indices.remove(categ_ind)
                elif (inputs.metadata.query(
                    (metadata_base.ALL_ELEMENTS,
                     categ_ind))["structural_type"] == str):
                    feature_df[inputs.columns[categ_ind]] = pd.to_numeric(
                        feature_df[inputs.columns[categ_ind]])
        text_indices = inputs.metadata.list_columns_with_semantic_types(
            self._text_semantic)

        tfv = TfidfVectorizer(max_features=20)
        column_to_text_features = {}
        text_feature_indices = []
        for text_index in text_indices:
            if (text_index not in feature_indices
                    and text_index in role_indices
                    and text_index != target_idx):
                word_features = tfv.fit_transform(
                    feature_df[inputs.columns[text_index]])
                if issparse(word_features):
                    column_to_text_features[inputs.columns[
                        text_index]] = pd.DataFrame.sparse.from_spmatrix(
                            word_features)
                else:
                    column_to_text_features[
                        inputs.columns[text_index]] = word_features
                text_feature_indices.append(text_index)
        text_feature_indices = set(text_feature_indices)

        # return an empty result if all features were incompatible
        numeric_features = len(feature_indices) > 0
        if not numeric_features and len(column_to_text_features) == 0:
            return base.CallResult(container.DataFrame(data={}, columns=cols))

        all_indices = set(range(0, inputs.shape[1]))
        skipped_indices = all_indices.difference(
            feature_indices.union(text_feature_indices))
        # remove columns that were dropped
        feature_indices = feature_indices - set(
            [inputs.columns.get_loc(c) for c in cols_to_drop])
        for i, v in enumerate(skipped_indices):
            feature_df.drop(inputs.columns[v], axis=1, inplace=True)

        # figure out the discrete and continuous feature indices and create an array
        # that flags them
        feature_columns = inputs.columns[list(feature_indices)]
        numeric_data = feature_df[feature_columns]
        discrete_indices = inputs.metadata.list_columns_with_semantic_types(
            self._discrete_types)
        discrete_flags = [False] * numeric_data.shape[1]
        for v in discrete_indices:
            col_name = inputs.columns[v]
            if col_name in numeric_data:
                # only mark columns with a least 1 duplicate value as discrete when predicting
                # a continuous target - there's a check in the bowels of MI code that will throw
                # an exception otherwise
                if numeric_data[col_name].duplicated().any() and not discrete:
                    col_idx = numeric_data.columns.get_loc(col_name)
                    discrete_flags[col_idx] = True

        target_np = target_df.values

        # compute mutual information for discrete or continuous target
        ranked_features_np = np.empty([0])
        text_ranked_features_np = np.empty((len(column_to_text_features), ))
        if discrete:
            if numeric_features:
                ranked_features_np = mutual_info_classif(
                    numeric_data.values,
                    target_np,
                    discrete_features=discrete_flags,
                    n_neighbors=self.hyperparams["k"],
                    random_state=self._random_seed,
                )
            for i, column in enumerate(column_to_text_features):
                text_rankings = mutual_info_classif(
                    column_to_text_features[column],
                    target_np,
                    discrete_features=[False] *
                    column_to_text_features[column].shape[1],
                    n_neighbors=self.hyperparams["k"],
                    random_state=self._random_seed,
                )
                sum_text_rank = np.sum(text_rankings)
                text_ranked_features_np[i] = sum_text_rank
        else:
            if numeric_features:
                ranked_features_np = mutual_info_regression(
                    numeric_data.values,
                    target_np,
                    discrete_features=discrete_flags,
                    n_neighbors=self.hyperparams["k"],
                    random_state=self._random_seed,
                )
            for i, column in enumerate(column_to_text_features):
                text_rankings = mutual_info_regression(
                    column_to_text_features[column],
                    target_np,
                    discrete_features=[False] *
                    column_to_text_features[column].shape[1],
                    n_neighbors=self.hyperparams["k"],
                    random_state=self._random_seed,
                )
                sum_text_rank = np.sum(text_rankings)
                text_ranked_features_np[i] = sum_text_rank

        ranked_features_np, target_entropy = self._normalize(
            ranked_features_np,
            feature_df[feature_columns],
            target_np,
            discrete,
            discrete_flags,
        )
        text_ranked_features_np = self._normalize_text(
            text_ranked_features_np, column_to_text_features, target_entropy)

        if self.hyperparams["return_as_metadata"]:
            ranked_features_np = np.append(ranked_features_np,
                                           text_ranked_features_np)
            for i, f in enumerate(feature_indices.union(text_feature_indices)):
                column_metadata = inputs.metadata.query(
                    (metadata_base.ALL_ELEMENTS, f))
                rank_dict = dict(column_metadata)
                rank_dict["rank"] = ranked_features_np[i]
                inputs.metadata = inputs.metadata.update(
                    (metadata_base.ALL_ELEMENTS, f),
                    FrozenOrderedDict(rank_dict.items()),
                )
            return base.CallResult(inputs)

        # merge back into a single list of col idx / rank value tuples
        data: typing.List[typing.Tuple[int, str, float]] = []
        data = self._append_rank_info(inputs, data, ranked_features_np,
                                      feature_df[feature_columns])
        data = self._append_rank_info(
            inputs,
            data,
            text_ranked_features_np,
            feature_df[inputs.columns[list(text_feature_indices)]],
        )

        # wrap as a D3M container - metadata should be auto generated
        results = container.DataFrame(data=data,
                                      columns=cols,
                                      generate_metadata=True)
        results = results.sort_values(by=["rank"],
                                      ascending=False).reset_index(drop=True)
        return base.CallResult(results)
Beispiel #15
0
    def _create_layer_dict(self):
        layer_dict = OrderedDict()

        num_flatten_layers = 0

        # Make sure that the first layer after input is a net input layer.
        if (self.protonet.layer[0].type in self._LAYER_TYPES['input']
                and self.protonet.layer[1].type
                in self._LAYER_TYPES['net_input_layers']):

            last_caffe_layer = self._caffenet.layers[1]
            last_proto_layer = self.protonet.layer[1]
        else:
            raise ParsingError('First layer is not net input layer.')

        for caffe_layer, proto_layer in zip(self._caffenet.layers,
                                            self.protonet.layer):

            # Check whether the layer is considered a layer or just input.
            if caffe_layer.type in self._LAYER_TYPES_TO_IGNORE:
                continue

            # Check that if we have a Pooling layer, we are really dealing with a max pooling layer.
            # Allow no other types of pooling right now.
            if caffe_layer.type == 'Pooling':
                if not proto_layer.pooling_param.MAX == 0:
                    raise ParsingError(
                        'Only max pooling is allowed, but was not used in layer {}'
                        .format(str(proto_layer)))
            # Check whether the layer has a separate activation function, as to whether net input can
            # be computed or not.
            if caffe_layer.type in self._TRANSFORMATION_LAYER_TYPES:
                # Transformation layers are just by themselves, they can be added right away.
                layer_dict[proto_layer.name] = self._LAYER_TYPES_TO_CLASSES[
                    caffe_layer.type](self, caffe_layer, proto_layer)
            elif caffe_layer.type in self._LAYER_TYPES['net_input_layers']:
                # Check whether there occurs an implicit flatten operation between two layers and
                # and artificial flatten layer has to be added.
                # That is the case if we now have a dense layer and the previous layer still had NCHW dimensions.
                if (caffe_layer.type == 'InnerProduct' and len(
                        self._caffenet.blobs[last_proto_layer.name].data.shape)
                        > 2):
                    # Keep a count of the number of added Flatten layers, to name them properly.
                    num_flatten_layers += 1
                    layer_dict[
                        'flatten_' +
                        str(num_flatten_layers
                            )] = self._LAYER_TYPES_TO_CLASSES['Flatten'](
                                self, last_caffe_layer, last_proto_layer,
                                caffe_layer, proto_layer)
                # Conv and Dense layers should wait for there activation function.
                # If however the last layer was a net input layer as well it needs to be added now.
                # The activation in this case would be the layer itself.
                if last_caffe_layer.type in self._LAYER_TYPES[
                        'net_input_layers']:
                    layer_dict[
                        last_proto_layer.name] = self._LAYER_TYPES_TO_CLASSES[
                            last_caffe_layer.type](self, last_caffe_layer,
                                                   last_proto_layer,
                                                   last_caffe_layer,
                                                   last_proto_layer)
            elif caffe_layer.type in self._LAYER_TYPES['activation_functions']:
                # If we are dealing with an activation function, last layer before needs to be a net input layer.
                if last_caffe_layer.type in self._LAYER_TYPES[
                        'net_input_layers']:
                    layer_dict[
                        last_proto_layer.name] = self._LAYER_TYPES_TO_CLASSES[
                            last_caffe_layer.type](
                                self,
                                last_caffe_layer,
                                last_proto_layer,
                                caffe_layer,
                                proto_layer,
                            )
                else:
                    raise ParsingError(
                        'Activation function not after conv or dense layer.')

            # Save the values of the current iteration to compare with the next.
            last_caffe_layer = caffe_layer
            last_proto_layer = proto_layer

        return FrozenOrderedDict(layer_dict)
Beispiel #16
0
 def test_copy_with_keys(self):
     frozen = FrozenOrderedDict(a=0, b=1)
     copied = frozen.copy(b=2, c=3)
     self.assertIs(type(copied), type(frozen))
     self.assertMappingEqual(copied, dict(a=0, b=2, c=3))
Beispiel #17
0
 def __init__(self, **kwargs):
     self._data_format = kwargs['data_format']
     self.layer_dict = FrozenOrderedDict(input_layer=MockLayer(
         input_shape=kwargs['input_shape']))
Beispiel #18
0
async def read_user_state(path):
    global prev
    global manual
    current_book = manual_filename
    current_language = None
    book_files = utility.find_files(path, ('brf', 'pef'))
    config = config_loader.load()
    state_sources = ['sd_card_dir']
    if config.has_option('files', 'additional_lib_1'):
        state_sources.append('additional_lib_1')
    if config.has_option('files', 'additional_lib_2'):
        state_sources.append('additional_lib_2')
    for state_source in state_sources:
        _dir = config.get('files', state_source)
        main_toml = os.path.join(path, _dir, USER_STATE_FILE)
        if os.path.exists(main_toml):
            main_state = toml.load(main_toml)
            if 'current_book' in main_state:
                current_book = main_state['current_book']
                if not current_book == manual_filename:
                    current_book = os.path.join(path, current_book)
            if 'current_language' in main_state:
                current_language = main_state['current_language']
            break

    if not current_language or current_language == OLD_DEFAULT_LOCALE:
        current_language = DEFAULT_LOCALE.code

    install(current_language)
    manual = Manual.create()

    manual_toml = os.path.join(path, to_state_file(manual_filename))
    if os.path.exists(manual_toml):
        t = toml.load(manual_toml)
        if 'current_page' in t:
            manual = manual._replace(page_number=t['current_page'] - 1)
        if 'bookmarks' in t:
            manual = manual._replace(bookmarks=tuple(
                sorted(manual.bookmarks + tuple(bm - 1
                                                for bm in t['bookmarks']))))

    books = OrderedDict({manual_filename: manual})
    for book_file in book_files:
        toml_file = to_state_file(book_file)
        book = BookFile(filename=book_file, width=40, height=9)
        if os.path.exists(toml_file):
            t = toml.load(toml_file)
            if 'current_page' in t:
                book = book._replace(page_number=t['current_page'] - 1)
            if 'bookmarks' in t:
                book = book._replace(bookmarks=tuple(
                    sorted(book.bookmarks + tuple(bm - 1
                                                  for bm in t['bookmarks']))))
        books[book_file] = book
    books[cleaning_filename] = CleaningAndTesting.create()

    if current_book not in books:
        current_book = manual_filename

    user_state = frozendict(books=FrozenOrderedDict(books),
                            current_book=current_book,
                            current_language=current_language)
    prev = user_state
    return user_state.copy(books=user_state['books'])
Beispiel #19
0
 def test_copy(self):
     frozen = FrozenOrderedDict(a=0, b=1)
     copied = frozen.copy()
     self.assertIs(type(copied), type(frozen))
     self.assertMappingEqual(copied, frozen)
Beispiel #20
0
def download_files(primitive_metadata: frozendict.FrozenOrderedDict,
                   output: str, redownload: bool) -> None:
    for installation_entry in primitive_metadata.get('installation', []):
        if installation_entry['type'] not in ['FILE', 'TGZ']:
            continue

        # We store into files based on digest. In this way we deduplicate same
        # files used by multiple primitives.
        output_path = os.path.join(output, installation_entry['file_digest'])

        if installation_entry['type'] == 'FILE':
            if os.path.isfile(output_path) and not redownload:
                print(
                    "File for volume {type}/{key} for primitive {python_path} ({primitive_id}) already exists, skipping: {file_uri}"
                    .format(
                        python_path=primitive_metadata['python_path'],
                        primitive_id=primitive_metadata['id'],
                        type=installation_entry['type'],
                        key=installation_entry['key'],
                        file_uri=installation_entry['file_uri'],
                    ),
                    flush=True)
                continue
        elif installation_entry['type'] == 'TGZ':
            if os.path.isdir(output_path) and not redownload:
                print(
                    "Directory for volume {type}/{key} for primitive {python_path} ({primitive_id}) already exists, skipping: {file_uri}"
                    .format(
                        python_path=primitive_metadata['python_path'],
                        primitive_id=primitive_metadata['id'],
                        type=installation_entry['type'],
                        key=installation_entry['key'],
                        file_uri=installation_entry['file_uri'],
                    ),
                    flush=True)
                continue

        # Cleanup.
        if os.path.isdir(output_path):
            shutil.rmtree(output_path)
        elif os.path.exists(output_path):
            os.remove(output_path)

        print(
            "Downloading file for volume {type}/{key} for primitive {python_path} ({primitive_id}): {file_uri}"
            .format(
                python_path=primitive_metadata['python_path'],
                primitive_id=primitive_metadata['id'],
                type=installation_entry['type'],
                key=installation_entry['key'],
                file_uri=installation_entry['file_uri'],
            ),
            flush=True)

        output_file_obj: typing.Optional[typing.BinaryIO] = None
        output_tar_process = None

        try:
            if installation_entry['type'] == 'FILE':
                output_file_obj = open(output_path, 'wb')
            elif installation_entry['type'] == 'TGZ':
                os.makedirs(output_path, mode=0o755, exist_ok=True)
                output_tar_process = subprocess.Popen(
                    ['tar', '-xz', '-C', output_path], stdin=subprocess.PIPE)
                output_file_obj = typing.cast(typing.BinaryIO,
                                              output_tar_process.stdin)

            hash = hashlib.sha256()
            downloaded = 0
            start = time.time()
            last_progress_report = None

            while True:
                try:
                    headers = {}
                    if downloaded:
                        headers['Range'] = 'bytes={downloaded}-'.format(
                            downloaded=downloaded)
                    with requests.get(
                            installation_entry['file_uri'],
                            stream=True,
                            headers=headers,
                            allow_redirects=True,
                            timeout=30,
                    ) as response:
                        response.raise_for_status()

                        # We require the server to support partial requests. In the case that it returns code 200 when we
                        # have send "Range" header (when downloaded is not zero) we raise an exception.
                        if (downloaded and response.status_code != 206) or (
                                not downloaded
                                and response.status_code != 200):
                            raise requests.HTTPError(
                                "Unexpected status: {status_code}".format(
                                    status_code=response.status_code),
                                response=response)

                        download_total = response.headers.get('Content-Length')

                        for data in response.iter_content(chunk_size=1024 *
                                                          hash.block_size):
                            hash.update(data)
                            downloaded += len(data)
                            output_file_obj.write(data)  # type: ignore

                            # Output at most once every 10 seconds.
                            now = time.time()
                            if last_progress_report is None or now > last_progress_report + 10:
                                last_progress_report = now
                                if download_total:
                                    print(
                                        "Downloaded {downloaded}/{download_total} B"
                                        .format(downloaded=downloaded,
                                                download_total=download_total),
                                        flush=True,
                                    )
                                else:
                                    print(
                                        "Downloaded {downloaded} B".format(
                                            downloaded=downloaded),
                                        flush=True,
                                    )
                    break
                except requests.Timeout:
                    # If timeout, retry/resume.
                    print("Timeout. Retrying.", flush=True)

            end = time.time()

            print("Downloaded {downloaded} B in {seconds} second(s).".format(
                downloaded=downloaded,
                seconds=end - start,
            ),
                  flush=True)

            if output_tar_process is not None:
                # Close the input to the process to signal that we are done.
                output_file_obj.close()  # type: ignore
                output_file_obj = None

                # Wait for 60 seconds to finish writing everything out.
                if output_tar_process.wait(60) != 0:
                    raise subprocess.CalledProcessError(
                        output_tar_process.returncode, output_tar_process.args)
                output_tar_process = None

            if installation_entry['file_digest'] != hash.hexdigest():
                raise ValueError(
                    "Digest for downloaded file does not match one from metadata. Metadata digest: {metadata_digest}. Computed digest: {computed_digest}."
                    .format(
                        metadata_digest=installation_entry['file_digest'],
                        computed_digest=hash.hexdigest(),
                    ))

        except Exception:
            # Cleanup.
            if output_tar_process is not None:
                try:
                    output_tar_process.kill()
                    output_tar_process.wait()
                    output_file_obj = None
                except Exception:
                    # We ignore errors cleaning up.
                    pass
            if os.path.isdir(output_path):
                shutil.rmtree(output_path)
            elif os.path.exists(output_path):
                os.remove(output_path)

            raise

        finally:
            if output_file_obj is not None:
                output_file_obj.close()