コード例 #1
0
 def from_row(cls, row, imageable: ImageableType, alias=None):
     table = cls.Options.db_table if alias is None else alias
     return cls(
         id=t.Int().check(row[table.c.id]),
         title=t.String().check(row[table.c.title]),
         uri=str(t.URL.check(row[table.c.uri])),
         imageable=t.Or(t.Type(Author),
                        t.Type(Book),
                        t.Type(Series)).check(imageable),
         created_at=DateTime().check(row[table.c.created_at]),
         updated_at=t.Or(DateTime, t.Null).check(row[table.c.updated_at]),
         is_populated=True
     )
コード例 #2
0
 def from_row(cls, row, books: List['Book'], photos: List['Photo'],
              alias=None):
     table = cls.Options.db_table if alias is None else alias
     return cls(
         id=t.Int().check(row[table.c.id]),
         name=t.String().check(row[table.c.name]),
         date_of_birth=Date().check(row[table.c.date_of_birth]),
         date_of_death=t.Or(Date, t.Null).check(row[table.c.date_of_death]),
         books=t.List(t.Type(Book)).check(books),
         photos=t.List(t.Type(Photo)).check(photos),
         created_at=DateTime().check(row[table.c.created_at]),
         updated_at=t.Or(DateTime, t.Null).check(row[table.c.updated_at]),
         is_populated=True
     )
コード例 #3
0
 def from_row(cls, row, author: 'Author', photos: List['Photo'],
              chapters: List['Chapter'], series: 'Series' = None,
              alias=None):
     table = cls.Options.db_table if alias is None else alias
     return cls(
         id=t.Int().check(row[table.c.id]),
         title=t.String().check(row[table.c.title]),
         date_published=Date().check(row[table.c.date_published]),
         author=t.Type(Author).check(author),
         photos=t.List(t.Type(Photo)).check(photos),
         chapters=t.List(t.Type(Chapter)).check(chapters),
         series=t.Or(t.Type(Series), t.Null).check(series),
         created_at=DateTime().check(row[table.c.created_at]),
         updated_at=t.Or(DateTime, t.Null).check(row[table.c.updated_at]),
         is_populated=True
     )
コード例 #4
0
ファイル: client.py プロジェクト: vladz/drafts
class MsgBrokerClient:
    msg_schema_validator = t.Dict({
        t.Key('user'): t.Int(gte=0),
        t.Key('cmd'): t.String(),
        t.Key('data'): t.Type(dict),
        t.Key('priority', default=100): t.Int(gte=0)
    })

    def __init__(self, server_addr: str, service_name: str):
        self.server_addr = server_addr
        self.service_name = service_name
        self.client = Client()
        self.subscriber_queue = asyncio.Queue()

    async def run_client(self) -> None:
        await self.client.connect(servers=[f'nats://{self.server_addr}'],
                                  max_reconnect_attempts=-1)

    async def subscribe(self) -> int:
        return await self.client.subscribe_async(self.service_name,
                                                 cb=self._handler)

    async def publish(self, receiver, msg: Dict[str, Any], reply=None) -> None:
        try:
            msg = self.pack_msg(msg)
        except t.DataError:
            logger.exception(f'Bad message: {msg}')
        else:
            if reply:
                # TODO: сделать проверку таймаута
                return await self.client.publish_request(receiver, reply, msg)
            else:
                asyncio.ensure_future(self.client.publish(receiver, msg))

    @classmethod
    def pack_msg(cls, data: Dict[str, Any]) -> bytes:
        data = cls.msg_schema_validator(data)
        return msgpack.packb(data)

    @classmethod
    def unpack_msg(cls, data: bytes) -> Dict[str, Any]:
        data = msgpack.unpackb(data, encoding='utf-8')
        logger.debug(f'Received message:{data}')
        return cls.msg_schema_validator(data)

    async def _handler(self, msg: 'Msg') -> None:
        try:
            subject = msg.subject
            reply = msg.reply
            data = self.unpack_msg(msg.data)
            msg = Message(subject, data, reply)
        except t.DataError:
            logger.exception(f'Bad message: {msg}')
        else:
            asyncio.ensure_future(self.subscriber_queue.put(msg))

    @property
    def is_connected(self) -> bool:
        return self.client.is_connected
コード例 #5
0
 def test_type(self):
     res = t.Type(int)
     self.assertEqual(repr(res), '<Type(int)>')
     c = t.Type[int]
     res = c.check(1)
     self.assertEqual(res, 1)
     res = extract_error(c, "foo")
     self.assertEqual(res, 'value is not int')
コード例 #6
0
ファイル: models.py プロジェクト: blackrabbit99/kievjs
class User(EmbeddedDocument, UserMixin):
    structure = t.Dict({
        'email': t.Email,
        'password': t.String,
        'first_name': t.String,
        'last_name': t.String,
        'roles': t.List[t.Type(Role)],
        t.Key('active', default=True): t.Bool,
    })
    required_fields = ['email', 'password', 'active']
コード例 #7
0
 def from_row(cls, row, book: 'Book', alias=None):
     table = cls.Options.db_table if alias is None else alias
     return cls(
         id=t.Int().check(row[table.c.id]),
         title=t.String().check(row[table.c.title]),
         ordering=t.Int().check(row[table.c.ordering]),
         book=t.Type(Book).check(book),
         created_at=DateTime().check(row[table.c.created_at]),
         updated_at=t.Or(DateTime, t.Null).check(row[table.c.updated_at]),
         is_populated=True
     )
コード例 #8
0
def construct(arg):
    '''
    Shortcut syntax to define trafarets.

    - int, str, float and bool will return t.Int, t.String, t.Float and t.Bool
    - one element list will return t.List
    - tuple or list with several args will return t.Tuple
    - dict will return t.Dict. If key has '?' at the and it will be optional and '?' will be removed
    - any callable will be t.Call
    - otherwise it will be returned as is

    construct is recursive and will try construct all lists, tuples and dicts args
    '''
    if isinstance(arg, t.Trafaret):
        return arg
    elif isinstance(arg, tuple) or (isinstance(arg, list) and len(arg) > 1):
        return t.Tuple(*(construct(a) for a in arg))
    elif isinstance(arg, list):
        # if len(arg) == 1
        return t.List(construct(arg[0]))
    elif isinstance(arg, dict):
        return t.Dict({
            construct_key(key): construct(value)
            for key, value in arg.items()
        })
    elif isinstance(arg, str):
        return t.Atom(arg)
    elif isinstance(arg, type):
        if arg is int:
            return t.Int()
        elif arg is float:
            return t.Float()
        elif arg is str:
            return t.String()
        elif arg is bool:
            return t.Bool()
        else:
            return t.Type(arg)
    elif callable(arg):
        return t.Call(arg)
    else:
        return arg
コード例 #9
0
 def test_repr(self):
     res = t.Type(int)
     assert repr(res) == '<Type(int)>'
コード例 #10
0
ファイル: models.py プロジェクト: blackrabbit99/kievjs
class Visitor(EmbeddedDocument):
    confirm_fields = {
        "cid": t.String,
        "sent": t.Bool,
        "letter_id": t.String,
        "confirmed_at": t.Float,
        "confirmed": t.Bool
    }

    structure = t.Dict({
        'name': t.String,
        'email': t.Email,
        'position': t.String,
        'company': t.String,
        'created_at': t.Type(datetime),
        'confirms': t.List(t.Dict(confirm_fields)),
        'tshirt_size': t.String(allow_blank=False),
        t.Key('is_approved', default=False): t.Bool,
        t.Key('is_declined', default=False): t.Bool,
        t.Key('is_confirmed', default=False): t.Bool,
    })

    required_fields = [
        'name', 'email', 'is_approved', 'is_declined', 'confirms'
    ]

    def confirmations(self):
        if not hasattr(self, "confirms"):
            self.confirms = []

        return zip(range(1, len(self.confirms) + 1), self.confirms)
        #map(lambda o: {key: val for key, val in o.as_dict().items()
        #               if key != "id"}, self.confirms))

    @classmethod
    def confirmations_stats(cls):
        confirms = {}
        for visitor in cls.query.find({'confirms.confirmed': True}):
            for n, confirm in visitor.confirmations():
                if n not in confirms:
                    confirms[n] = 0

                if confirm["confirmed"] is True:
                    confirms[n] += 1
        return confirms

    @classmethod
    def tshirt_matrix(cls):
        return {}

    def one_confirm(self, letter_id):
        for n, confirm in self.confirmations():
            if confirm["letter_id"] == str(letter_id):
                return confirm

        return None

    def save(self, *args, **kwargs):
        self.save_confirmation(None, commit=True)

    def save_confirmation(self, confirm, index=None, commit=True):
        to_save = []

        for n, c in self.confirmations():
            if n == index:
                to_save.append(confirm)
            else:
                to_save.append(c)

        if index is None and confirm is not None:
            to_save.append(confirm)

        self.confirms = to_save

        if commit is True:
            super(Visitor, self).save()

    def send_confirmation(self, letter, id=None):
        if id is None:
            id = str(uuid.uuid1())

        for n, conf in self.confirmations():
            if "letter_id" in conf and \
                    conf["letter_id"] == str(letter.id):
                return False

        tasks.send_email(
            self.email,
            letter.subject,
            None, {
                'visitor': self,
                'id': id,
                'letter_id': letter.id,
                'link': "{}/confirm/{}/{}/".format(DOMAIN, self.id, id)
            },
            template_text=letter.content)

        self.save_confirmation({
            "cid": id,
            "letter_id": str(letter.id),
            "sent": True,
            "confirmed": False,
            "confirmed_at": time.time()
        })

        return True

    def save_registered(self):
        if self.query.find_one({'email': self.email}) is None:
            self.created_at = datetime.utcnow()
            return self.save()
        else:
            return None
コード例 #11
0
It allows users to featurize the data with model.predict. It also lets the featurizer write the
featurized data to the csv containing the images, appending the features to additional columns
in-line with each image row. Also adds "image_missing" columns automatically for each image_column
which contains binary values of whether the image in that row is missing.
"""

import logging

import trafaret as t
import numpy as np
import pandas as pd

from keras.models import Model


@t.guard(model=t.Type(Model), array=t.Type(np.ndarray))
def featurize_data(model, array):
    """
    Given a model and an array, perform error checking and return the prediction
    of the full feature array.

    Parameters:
    ----------
        model : keras.models.Model
            The featurizer model performing predictions

        array : np.ndarray
            The vectorized array of images being converted into features

    Returns:
    --------
コード例 #12
0
        # if model_str == 'squeezenet':
        #     # Special case for squeezenet - we already have weights for it
        #     this_dir, this_filename = os.path.split(__file__)
        #     model_path = os.path.join(this_dir,
        #                               'saved_models',
        #                               'squeezenet_weights_tf_dim_ordering_tf_kernels.h5')
        #     if not os.path.isfile(model_path):
        #         raise ValueError('Could not find the weights. Download another model'
        #                          ' or replace the SqueezeNet weights in the model folder.')
        #     model.load_weights(model_path)

    logging.info('Model successfully initialized.')
    return model


@t.guard(model=t.Type(Model), depth=t.Int(gte=1))
def _decapitate_model(model, depth):
    """
    Cut off end layers of a model equal to the depth of the desired outputs,
    and then remove the links connecting the new outer layer to the old ones.
    Parameters:
    ----------
    model: keras.models.Model
        The model being decapitated. Note: original model is not changed, method returns new model.
    depth: int
        The number of layers to pop off the top of the network
    Returns:
    -------
    model: keras.models.Model
        Decapitated model.
    """
コード例 #13
0
class ImageFeaturizer:
    """
    This object can load images, rescale, crop, and vectorize them into a
    uniform batch, and then featurize the images for use with custom classifiers.

          Methods
    ------------------
        __init__(depth, autosample,
                 downsample_size):
            --------------------------------
            Initialize the ImageFeaturizer. Build the featurizer model with the
            depth and feature downsampling specified by the inputs.



        featurize_data(image_columns, image_path,
                       csv_path, new_csv_path, scaled_size, grayscale):
            --------------------------------
            Loads image directory and/or csv into the model, and
            featurizes the images



        load_data(image_columns, image_path, csv_path,
                  scaled_size, grayscale):
            --------------------------------
            Loads image directory and/or csv into the model, and vectorize the
            images for input into the featurizer



        featurize_preloaded_data():
            --------------------------------
            Featurize the loaded data, append the features to the csv, and
            return the full dataframe


    """

    @t.guard(depth=t.Int(gte=1, lte=4),
             autosample=t.Bool,
             downsample_size=t.Int(gte=0),
             model=t.Enum(*supported_model_types.keys()))
    def __init__(self,
                 depth=1,
                 autosample=False,
                 downsample_size=0,
                 model='squeezenet'
                 ):
        """
        Initializer.

        Loads an initial InceptionV3 pretrained network, decapitates it and
        downsamples according to user specifications.

        Parameters:
        ----------
            depth : int
                How deep to decapitate the model. Deeper means less specific but
                also less complex

            autosample : bool
                If True, feature layer is automatically downsampled to the right size.

            downsample_size: int
                The number of features to downsample the featurizer to

        Returns:
        --------
        None. Initializes and saves the featurizer object attributes.

        """
        # BUILDING THE MODEL #
        logging.info("Building the featurizer.")

        featurizer = build_featurizer(depth, autosample,
                                      downsample_size, model_str=model.lower())

        # Saving initializations of model
        self.depth = depth
        self.autosample = autosample
        self.downsample_size = downsample_size
        self.num_features = featurizer.layers[-1].output_shape[-1]

        # Save the model
        self.model_name = model.lower()
        self.featurizer = featurizer
        self.visualize = featurizer.summary

        # Initializing preprocessing variables for after we load and featurize the images
        self.data = np.zeros((1))
        self.features = pd.DataFrame()
        self.df_original = pd.DataFrame()
        self.full_dataframe = pd.DataFrame()
        self.df_features = pd.DataFrame()
        self.csv_path = ''
        self.image_dict = {}
        self.image_columns = ''
        self.image_path = ''

        # Image scaling and cropping
        self.scaled_size = (0, 0)
        self.crop_size = (0, 0)
        self.number_crops = 0
        self.isotropic_scaling = False

    def load_data(self,
                  image_columns,
                  image_path='',
                  image_dict='',
                  csv_path='',
                  grayscale=False,
                  save_data=True,
                  # crop_size = (299, 299),
                  # number_crops = 0,
                  # random_crop = False,
                  # isotropic_scaling = True
                  ):
        """
        Load image directory and/or csv, and vectorize the images for input into the featurizer.

        Parameters:
        ----------
            image_columns : str
                the name of the column holding the image data, if a csv exists,
                or what the name of the column will be, if generating the csv
                from a directory

            image_path : str
                the path to the folder containing the images. If using URLs, leave blank

            csv_path : str
                the path to the csv. If just using a directory, leave blank.
                If csv exists, this is the path where the featurized csv will be
                generated.

            # These features haven't been implemented yet.
            # grayscale : bool
            #     Flags the image as grayscale
            #
            # isotropic_scaling : bool
            #     If True, images are scaled keeping proportions and then cropped
            #
            # crop_size: tuple
            #     If the image gets cropped, decides the size of the crop
            #
            # random_crop: bool
            #     If False, only take the center crop. If True, take random crop
            #

        """
        # Fix column headers and image path if they haven't been done, build path for new csv
        image_columns, image_path = _input_fixer(image_columns, image_path)

        # If there's no dataframe, build it!
        if csv_path == '':
            if len(image_columns) > 1:
                raise ValueError('If building the dataframe from an image directory, the featurizer'
                                 'can only create a single image column. If two image columns are '
                                 'needed, please create a csv to pass in.')

        # If the image_dict hasn't been passed in (which only happens in batch processing),
        # build the full image dict and save the original dataframe
        if not image_dict:
            image_dict, df = _build_image_dict(image_path, csv_path,
                                               image_columns)
            self.df_original = df
            self.full_dataframe = df
            self.image_columns = image_columns
            self.image_dict = image_dict

        scaled_size, full_image_data = \
            self._load_data_helper(self.model_name, image_columns,
                                   image_path, image_dict, csv_path, grayscale)

        # Save all of the necessary data to the featurizer
        if save_data:
            self.data = full_image_data

        self.csv_path = csv_path
        self.image_path = image_path
        self.scaled_size = scaled_size
        return full_image_data

    @t.guard(batch_data=t.Type(np.ndarray),
             image_columns=t.List(t.String(allow_blank=True)) | t.String(allow_blank=True),
             batch_processing=t.Bool,
             features_only=t.Bool,
             save_features=t.Bool,
             save_csv=t.Bool,
             new_csv_path=t.String(allow_blank=True),
             omit_model=t.Bool,
             omit_depth=t.Bool,
             omit_output=t.Bool,
             omit_time=t.Bool,
             )
    def featurize_preloaded_data(self, batch_data=np.zeros((1)), image_columns='',
                                 batch_processing=False, features_only=False,
                                 save_features=False, save_csv=False, new_csv_path='',
                                 omit_model=False, omit_depth=False, omit_output=False,
                                 omit_time=False):
        """
        Featurize the loaded data, returning the dataframe and writing the features
        and the full combined data to csv

        Parameters
        ----------


        Returns
        -------
            full_dataframe or df_features: pandas.DataFrame
                If features_only, this returns a Dataframe containing the features.
                Otherwise, it returns a DataFrame containing the features appended to the
                original csv. If save_csv is set to True, it also writes csv's
                to the same path as the csv containing the list of names.

        """

        # If the batch data isn't passed in, then load the full data from the attributes
        if np.array_equal(batch_data, np.zeros((1))):
            batch_data = self.data
        if image_columns == '':
            image_columns = self.image_columns
        if isinstance(image_columns, str):
            image_columns = [image_columns]

        # Check data has been loaded, and that the data was vectorized correctly
        if np.array_equal(batch_data, np.zeros((1))):
            raise IOError('Must load data into the model first. Call load_data.')

        # If batch processing, make sure we're only doing a single column at a time.
        # Otherwise, make sure the number of columns matches the first dimension of the data
        if batch_processing:
            assert len(image_columns) == 1 or isinstance(image_columns, str)
        else:
            assert len(image_columns) == batch_data.shape[0]
        logging.info("Trying to featurize data.")

        # Initialize featurized data vector with appropriate size
        features = np.zeros((batch_data.shape[1],
                             self.num_features * len(image_columns)))

        # Get the image features
        df_features = self._featurize_helper(
            features, image_columns, batch_data)

        # Save features if boolean set to True
        if save_features:
            self.features = df_features

        # If called with features_only, returns only the features
        if features_only:
            return df_features

        # Save the image features with the original dataframe
        full_dataframe = pd.concat([self.df_original, df_features], axis=1)

        # If batch processing, this is only the batch dataframe. Otherwise, this is the actual
        # full dataframe.
        if not batch_processing:
            self.full_dataframe = full_dataframe

        # Save csv if called
        if save_csv:
            self.save_csv(new_csv_path=new_csv_path, omit_model=omit_model, omit_depth=omit_depth,
                          omit_output=omit_output, omit_time=omit_time, save_features=save_features)

        return full_dataframe

    @t.guard(image_columns=t.List(t.String(allow_blank=True)) | t.String(allow_blank=True),
             image_path=t.String(allow_blank=True),
             csv_path=t.String(allow_blank=True),
             new_csv_path=t.String(allow_blank=True),
             batch_processing=t.Bool,
             batch_size=t.Int,
             save_data=t.Bool,
             save_features=t.Bool,
             save_csv=t.Bool,
             omit_time=t.Bool,
             omit_model=t.Bool,
             omit_depth=t.Bool,
             omit_output=t.Bool,
             verbose=t.Bool,
             grayscale=t.Bool
             )
    def featurize(self,
                  image_columns,
                  image_path='',
                  csv_path='',
                  new_csv_path='',
                  batch_processing=True,
                  batch_size=1000,
                  save_data=False,
                  save_features=False,
                  save_csv=False,
                  omit_time=False,
                  omit_model=False,
                  omit_depth=False,
                  omit_output=False,
                  verbose=True,
                  grayscale=False
                  # crop_size = (299, 299),
                  # number_crops = 0,
                  # random_crop = False,
                  # isotropic_scaling = True
                  ):
        """
        Load image directory and/or csv, and vectorize the images for input into the featurizer.
        Then, featurize the data.

        Parameters:
        ----------
            image_columns : list of str
                list of the names of the column holding the image data, if a csv exists,
                or what the name of the column will be, if generating the csv
                from a directory

            image_path : str
                the path to the folder containing the images. If using URLs, leave blank

            csv_path : str
                the path to the csv. If just using a directory, leave blank, and
                specify the path for the generated csv in new_csv_path.
                If csv exists, this is the path where the featurized csv will be
                generated.

            new_csv_path : str
                the path to the new csv, if one is being generated from a directory.
                If no csv exists, this is the path where the featurized csv will
                be generated

            grayscale : bool
                Decides if image is grayscale or not. May get deprecated. Don't
                think it works on the InceptionV3 model due to input size.

            # These features haven't been implemented yet.
            # isotropic_scaling : bool
            #     if True, images are scaled keeping proportions and then cropped
            #
            # crop_size: tuple
            #     if the image gets cropped, decides the size of the crop
            #
            # random_crop: bool
            #    If False, only take the center crop. If True, take random crop
            #

        Returns:
        --------
            full_dataframe :
                Dataframe containing the features appended to the original csv.
                Also writes csvs containing the features only and the full dataframe
                to the same path as the csv containing the list of names

        """
        if not image_path and not csv_path:
            raise ValueError("Must specify either image_path or csv_path as input.")

        # Set logging level
        if verbose:
            logger.setLevel(logging.INFO)

        # Fix column headers and image path if necessary
        image_columns, image_path = _input_fixer(image_columns, image_path)

        # Find the full image dict and save the original dataframe. This is required early to know
        # how many images exist in total, to control batch processing.
        full_image_dict, df_original = _build_image_dict(image_path, csv_path,
                                                         image_columns)
        # Save the fixed inputs and full image dict
        self.df_original = df_original
        self.image_columns = image_columns
        self.image_dict = full_image_dict

        # Users can turn off batch processing by either setting batch_processing to false, or
        # setting batch_size to 0
        if batch_processing and batch_size:
            # Perform batch processing, and save the full dataframe and the full features dataframe
            features_df = self._batch_processing(full_image_dict, image_columns,
                                                 image_path, csv_path,
                                                 batch_size, grayscale)

        # If batch processing is turned off, load the images in one big batch and features them all
        else:
            logger.info("Loading full data tensor without batch processing. If you "
                        "experience a memory error, make sure batch processing is enabled.")

            full_data = self.load_data(image_columns, image_path, full_image_dict, csv_path,
                                       grayscale, save_data)

            features_df = \
                self.featurize_preloaded_data(full_data, image_columns=image_columns,
                                              features_only=True)

        # Save the full dataframe with the features
        full_df = pd.concat([df_original, features_df], axis=1)
        self.full_dataframe = full_df

        # Save features and csv if flags are enabled
        if save_features:
            self.features = features_df
        if save_csv:
            self.save_csv(new_csv_path=new_csv_path, omit_model=omit_model, omit_depth=omit_depth,
                          omit_output=omit_output, omit_time=omit_time, save_features=save_features)

        # Return the full featurized dataframe
        return full_df

    def save_csv(self, new_csv_path='', omit_model=False, omit_depth=False,
                 omit_output=False, omit_time=False, save_features=False):
        """
        """
        if self.full_dataframe.empty:
            raise AttributeError('No dataframe has been featurized.')

        # Save the name and extension separately, for robust naming
        if not new_csv_path:
            new_csv_path = self.csv_path or DEFAULT_NEW_CSV_PATH

            csv_name, ext = os.path.splitext(new_csv_path)
            name_path = _named_path_finder("{}_featurized".format(csv_name), self.model_name,
                                           self.depth, self.num_features, omit_model, omit_depth,
                                           omit_output, omit_time)
        else:
            name_path, ext = os.path.splitext(new_csv_path)

        _create_csv_path(name_path)
        logger.warning("Saving full dataframe to csv as {}{}".format(name_path, ext))
        self.full_dataframe.to_csv("{}{}".format(name_path, ext), index=False)

        if save_features:
            logger.warning("Saving features to csv as {}_features_only{}".format(name_path, ext))
            self.df_features.to_csv("{}_features_only{}".format(name_path, ext),
                                    index=False)

    @t.guard(confirm=t.Bool)
    def clear_input(self, confirm=False):
        """
        Clear all input for the model. Requires the user to confirm with an additional "confirm"
        argument in order to run.

        Parameters:
        ----------
        confirm : bool
            Users are required to modify this to true in order to clear all attributes
            from the featurizer
        """
        if not confirm:
            raise ValueError('If you\'re sure you would like to clear the inputs of this model, '
                             'rerun the function with the following argument: '
                             'clear_input(confirm=True). This operation cannot be reversed.')

        self.data = np.zeros((1))
        self.features = pd.DataFrame()
        self.full_dataframe = pd.DataFrame()
        self.csv_path = ''
        self.image_list = ''
        self.image_columns = ''
        self.image_path = ''

    # ###################
    # Helper Functions! #
    # ###################

    def _load_data_helper(self,
                          model_name,
                          image_columns,
                          image_path,
                          image_dict,
                          csv_path,
                          grayscale):
        """
        This function helps load the image data from the image directory and/or csv.
        It can be called by either batch processing, where each column is handled separately in the
        parent function and the data is loaded in batches, or it can be called without batch
        processing, where the columns must each be loaded and concatenated here.

        Parameters:
        ----------
        model_name : str
            The name of the model type, which determines scaling size

        image_columns : list
            A list of the image column headers

        image_path : str
            Path to the image directory

        image_dict : dict
            This is a dictionary containing the names of each image column as a key, along with
            all of the image paths for that column.

        csv_path : str
            Path to the csv

        grayscale : bool
            Whether the images are grayscale or not
        """

        # Save size that model scales to
        scaled_size = SIZE_DICT[model_name]

        # Save the full image tensor, the path to the csv, and the list of image paths
        image_data, list_of_image_paths = \
            preprocess_data(image_columns[0], model_name,
                            image_dict[image_columns[0]],
                            image_path, csv_path, scaled_size, grayscale)

        image_data_list = [np.expand_dims(image_data, axis=0)]

        # If there is more than one image column, repeat this process for each
        if len(image_columns) > 1:
            for column in image_columns[1:]:
                image_data, list_of_image_paths = \
                    preprocess_data(column, model_name, image_dict[column], image_path,
                                    csv_path, scaled_size, grayscale)

                image_data_list.append(np.expand_dims(image_data, axis=0))

        full_image_data = np.concatenate(image_data_list)

        return scaled_size, full_image_data

    def _featurize_helper(self, features, image_columns, batch_data):
        """
        This function featurizes the data for each image column, and creates the features array
        from all of the featurized columns

        Parameters:
        ----------
        features : array
            Array of features already computed

        image_columns : list
            A list of the image column headers

        batch_data : array
            The batch loaded image data (which may be the full array if not running with batches)
        """
        # Save the initial features list
        features_list = []

        # For each image column, perform the full featurization and add the features to the df
        for column in range(batch_data.shape[0]):
            # Featurize the data, and save it to the appropriate columns
            partial_features = featurize_data(self.featurizer, batch_data[column])

            features[:, self.num_features * column:self.num_features * column + self.num_features]\
                = partial_features

            # Save the full dataframe
            df_features = \
                create_features(batch_data[column],
                                partial_features,
                                image_columns[column])

            features_list.append(df_features)

        df_features = pd.concat(features_list, axis=1)

        return df_features

    def _batch_processing(self,
                          full_image_dict,
                          image_columns,
                          image_path='',
                          csv_path='',
                          batch_size=1000,
                          grayscale=False):
        """
        This function handles batch processing. It takes the full list of images that need
        to be processed and loads/featurizes the images in batches.

        Parameters:
        ----------
        full_image_dict : dict
            This is a dictionary containing the names of each image column as a key, along with
            all of the image paths for that column.

        image_columns : list
            A list of the image column headers

        df_original : pandas.DataFrame
            The original dataframe (not containing the image features)

        image_path : str
            Path to the image directory

        csv_path : str
            Path to the csv

        batch_size : int
            The number of images processed per batch

        grayscale : bool
            Whether the images are grayscale or not

        """

        features_df = pd.DataFrame()
        features_df_columns_list = []
        # Iterate through each image column
        for column_index in range(len(image_columns)):
            # Initialize the batch index and save the column name
            index = 0
            batch_number = 0
            column = image_columns[column_index]
            batch_features_df = pd.DataFrame()

            # Get the list of image paths and the number of images in this column
            list_of_image_paths = full_image_dict[column]
            num_images = len(list_of_image_paths)

            batch_features_list = []
            # Loop through the images, featurizing each batch
            if len(image_columns) > 1:
                logger.info("Featurizing column #{}".format(column_index + 1))

            while index < num_images:
                tic = time.clock()

                # Cap the batch size against the total number of images left to prevent overflow
                if index + batch_size > num_images:
                    batch_size = num_images - index

                # Create a dictionary for just the batch of images
                batch_image_dict = {column: full_image_dict[column][index:index + batch_size]}

                # Load the images
                logger.info("Loading image batch.")

                batch_data = self.load_data(column, image_path,
                                            batch_image_dict, csv_path,
                                            grayscale, save_data=False)
                logger.info("\nFeaturizing image batch.")

                # If this is the first batch, the batch features will be saved alone.
                # Otherwise, they are concatenated to the last batch
                batch_features_list.append(self.featurize_preloaded_data(batch_data, column,
                                                                         features_only=True,
                                                                         batch_processing=True))

                # Increment index by batch size
                index += batch_size
                batch_number += 1

                # Give update on time and number of images left in column
                remaining_batches = int(math.ceil(num_images - index) / batch_size)

                logger.info("Featurized batch #{}. Number of images left: {}\n"
                            "Estimated total time left: {} seconds\n".format(
                                batch_number, num_images - index,
                                int((time.clock() - tic) * remaining_batches))
                            )

            # After the full column's features are calculated, concatenate them all and append them
            # to the full DataFrame list
            batch_features_df = pd.concat(batch_features_list, ignore_index=True)
            features_df_columns_list.append(batch_features_df)

        # Once all the features are created for each column, concatenate them together for both
        # the features dataframe and the full dataframe
        features_df = pd.concat(features_df_columns_list, axis=1)

        # Return the full dataframe and features dataframe
        return features_df
コード例 #14
0
    unique_strings_list,
    ensure_list,
)
from .decimal import Decimal
from .format import format_trafaret


__VERSION__ = (0, 2, 1)


check_number = t.OnError(t.Float() | Decimal(), 'Not a number')

json_schema_type = (
    t.Atom('null') & just(t.Null())
    | t.Atom('boolean') & just(t.Bool())
    | t.Atom('object') & just(t.Type(dict))
    | t.Atom('array') & just(t.Type(list))
    | t.Atom('number') & just(check_number)
    | t.Atom('integer') & just(t.Int())
    | t.Atom('string') & just(t.String())
)


def multipleOf(multiplier):
    def check(value):
        if value % multiplier != 0:
            return t.DataError('%s is not devisible by %s' % (value, multiplier))
        return value
    return check

コード例 #15
0
        'data':
        t.Or(t.List(t.Any),
             t.String),  # check if data is optinal, and possible values
        'meta':
        t.Dict(
            {
                'rc': t.Enum('ok', 'error'),  # check if other rc's could exist
                t.Key('msg', optional=True): t.String,
            },
            ignore_extra='*'),
    },
    ignore_extra='*')

_base_time_params = t.Dict({
    'start':
    t.Or(t.Float, t.Type(datetime), t.Atom(None)),
    'end':
    t.Or(t.Float, t.Type(datetime), t.Atom(None)),
})

_base_time_site_params = _base_time_params.merge({'site': SiteName})
_base_time_op_site_params = _base_time_params.merge(
    {'site': t.Or(SiteName, t.Atom(None))})

_inner_stats_extras = t.Dict({
    'gran': t.Enum('5minutes', 'hourly', 'daily'),
    'def_range': t.Int,
})

# will not check for port in base_url for now
init_params = t.Dict({
コード例 #16
0
import pydash as _
import trafaret as t
import datetime
from functools import partial

from jinja2.utils import import_string
from trafaret.contrib.object_id import MongoId
from trafaret.contrib.rfc_3339 import DateTime

Optional = partial(t.Key, optional=True)
SimpleType = t.IntRaw | t.Bool | t.String | t.FloatRaw

DateTimeType = DateTime | t.Type(datetime.datetime)
NumericType = t.Float | t.Int >> (lambda val: float(val))
URLType = t.Regexp(r'^([a-z]{2,5}:)?(\/\/?)?[a-z][a-z0-9\.\-\/]+$')

OptionValue = t.String(
    allow_blank=True) | t.Bool | t.Float | t.Int | t.Type(dict)
Optional = partial(t.Key, optional=True)

SimpleDoc = t.Dict({
    t.Key('id', optional=True) >> '_id': MongoId,
    Optional('_id'): MongoId
})

TimestampDoc = SimpleDoc + t.Dict({
    Optional('created', default=datetime.datetime.now):
    DateTimeType | t.Null,
    Optional('modified'):
    DateTimeType
})