コード例 #1
0
ファイル: airbnb.py プロジェクト: Dani-Mora/protodata
    def define_columns(self):
        """ See base class """
        columns, excluded_cols = [], self.amenities_list + ['picture_url']
        col_info = get_column_info(self.data,
                                   excluded=excluded_cols)
        for (name, ftype) in col_info:

            mapped_type = map_feature_type(self.data[name].dtype)

            if is_categorical(ftype):
                # Categorical data
                if name in self.hardcoded_buckets:
                    # If hardcoded, take value
                    keys = self.hardcoded_buckets[name]
                else:
                    # Otherwise is safe to take it from the dataset itself
                    keys = self.categories[name]

                logger.info('Creating column for feature "{}" with keys "{}"'
                            .format(name, keys))
                columns.append(SparseColumn(name,
                                            keys=keys,
                                            type=mapped_type))

            elif is_numeric(ftype) or is_bool(ftype):
                logger.info('Creating column for numerical feature "{}"'
                            .format(name))
                columns.append(NumericColumn(name, type=mapped_type))

            else:
                raise RuntimeError('Unknown column type "{}" for column "{}"'
                                   .format(name, ftype))

        # Add amenities column
        am_keys = self.hardcoded_buckets['amenities'] \
            if 'amenities' in self.hardcoded_buckets \
            else self.amenities_list

        logger.info('Creating column for "amenities" with keys {}'
                    .format(am_keys))

        am_type = map_feature_type(np.dtype('object'))
        columns.append(SparseColumn('amenities',
                                    keys=am_keys,
                                    type=am_type))

        # Image-specific columns
        columns += [
            ImageColumn('image', format='JPEG'),
            NumericColumn('height', type=map_feature_type(np.dtype('int'))),
            NumericColumn('width', type=map_feature_type(np.dtype('int'))),
            SparseColumn('path', map_feature_type(np.dtype('object'))),
            SparseColumn('format', map_feature_type(np.dtype('object'))),
            SparseColumn('colorspace', map_feature_type(np.dtype('object')))
        ]

        for c in columns:
            logger.info('Creating column for feature "{}"'.format(c.name))

        return columns
コード例 #2
0
    def define_columns(self):
        # Image columns
        base_columns = [
            NumericColumn('label', type=map_feature_type(np.dtype('int'))),
            ImageColumn('image', format='JPEG'),
            SparseColumn('colorspace', map_feature_type(np.dtype('object')))
        ]

        # Categorical and numerical columns for each pixel position
        pixel_columns, deep_columns = [], []
        for i in range(self.height):
            for j in range(self.width):
                # Wide column
                sparse_type = map_feature_type(np.dtype('object'))
                sparse_col = SparseColumn(name=self._get_pixel_name(i, j),
                                          type=sparse_type,
                                          keys=256)
                pixel_columns.append(sparse_col)

                # Deep column
                numeric_name = self._get_pixel_name(i, j) + '_num'
                numeric_type = map_feature_type(np.dtype('float'))
                numeric_col = NumericColumn(name=numeric_name,
                                            type=numeric_type)
                deep_columns.append(numeric_col)

        return base_columns + pixel_columns + deep_columns
コード例 #3
0
    def define_columns(self):
        cols = []

        # Columns
        for i in range(self.features.shape[1]):
            current_col = NumericColumn(name=str(i),
                                        type=map_feature_type(
                                            np.dtype('float')))
            cols.append(current_col)

        # Label
        cols.append(
            NumericColumn(name='class',
                          type=map_feature_type(np.dtype('int'))))

        return cols
コード例 #4
0
 def define_columns(self):
     return [
         NumericColumn('label',
                       type=map_feature_type(np.dtype('int'))),
         ImageColumn('image', format='JPEG'),
         SparseColumn('colorspace',
                      map_feature_type(np.dtype('object')))
     ]
コード例 #5
0
ファイル: scikit_dataset.py プロジェクト: Dani-Mora/protodata
    def define_columns(self):
        """ See base class """
        columns = []
        for i in range(self.features.shape[1]):
            numeric_type = map_feature_type(self.features[:, i].dtype)
            columns.append(NumericColumn(self.get_column_names()[i],
                                         type=numeric_type))

        # Add column for index
        columns.append(NumericColumn('index',
                                     type=map_feature_type(np.int)))

        # Add column for label
        columns.append(NumericColumn(self.get_target_name(),
                                     type=map_feature_type(np.float)))

        return columns
コード例 #6
0
ファイル: cifar10.py プロジェクト: Dani-Mora/protodata
 def define_columns(self):
     return [
         NumericColumn('label', type=map_feature_type(np.dtype('int'))),
         ImageColumn('image', format='JPEG'),
     ]