Ejemplo n.º 1
0
def test_log():
    with dcase_util.utils.DisableLogger():
        ManyHotEncoder(
            label_list=['A', 'B', 'C'],
            time_resolution=1.0,
            filename='test.cpickle'
        ).log()
Ejemplo n.º 2
0
def test_construction():
    minimal_event_list = [
        {'scene_label': 'A'},
        {'scene_label': 'A'},
        {'scene_label': 'B'},
        {'scene_label': 'B'},
        {'scene_label': 'C'}
    ]
    meta = MetaDataContainer(minimal_event_list)

    target_binary_matrix = numpy.array([
        [1., 0., 0.],  # 0
        [1., 0., 0.],  # 1
        [1., 0., 0.],  # 2
    ]).T

    # Test #1
    binary_matrix = ManyHotEncoder(
        label_list=['A', 'B', 'C'],
        time_resolution=1.0
    ).encode(
        label_list=['A'],
        length_seconds=3,
    )

    numpy.testing.assert_array_equal(target_binary_matrix, binary_matrix.data)
    nose.tools.assert_equal(binary_matrix.shape[0], target_binary_matrix.shape[0])
    nose.tools.assert_equal(binary_matrix.shape[1], target_binary_matrix.shape[1])

    target_binary_matrix = numpy.array([
        [0., 1., 0.],  # 0
        [0., 1., 0.],  # 1
        [0., 1., 0.],  # 2
    ]).T

    # Test #1
    binary_matrix = ManyHotEncoder(
        label_list=['A', 'B', 'C'],
        time_resolution=1.0
    ).encode(
        label_list=['B'],
        length_seconds=3,
    )

    numpy.testing.assert_array_equal(target_binary_matrix, binary_matrix.data)
    nose.tools.assert_equal(binary_matrix.shape[0], target_binary_matrix.shape[0])
    nose.tools.assert_equal(binary_matrix.shape[1], target_binary_matrix.shape[1])
Ejemplo n.º 3
0
def test_unknown_label():
    with dcase_util.utils.DisableLogger():
        ManyHotEncoder(
            label_list=['A', 'B', 'C'],
            time_resolution=1.0,
            filename='test.cpickle'
        ).encode(
            label_list=['BB'],
            length_seconds=3,
        )
Ejemplo n.º 4
0
    def __init__(self,
                 label_list=None,
                 focus_field='tags',
                 time_resolution=None,
                 length_frames=None,
                 length_seconds=None,
                 **kwargs):
        """Constructor

        Parameters
        ----------
        label_list : list
            List of labels in correct order

        focus_field : str
            Field from the meta data item to be used in encoding

        time_resolution : float > 0.0
            Time resolution used when converting event into event roll.

        length_frames : int
            Length of encoded segment in frames

        length_seconds : float > 0.0
            Length of encoded segment in seconds

        """

        # Inject initialization parameters back to kwargs
        kwargs.update({
            'label_list': label_list,
            'time_resolution': time_resolution,
            'length_frames': length_frames,
            'length_seconds': length_seconds,
        })

        # Run super init to call init of mixins too
        super(ManyHotEncodingProcessor, self).__init__(**kwargs)

        self.focus_field = focus_field
        self.encoder = ManyHotEncoder(**self.init_parameters)
Ejemplo n.º 5
0
class ManyHotEncodingProcessor(Processor):
    """Many hot encoding processor"""
    input_type = ProcessingChainItemType.METADATA  #: Input data type
    output_type = ProcessingChainItemType.DATA_CONTAINER  #: Output data type

    def __init__(self,
                 label_list=None,
                 focus_field='tags',
                 time_resolution=None,
                 length_frames=None,
                 length_seconds=None,
                 **kwargs):
        """Constructor

        Parameters
        ----------
        label_list : list
            List of labels in correct order

        focus_field : str
            Field from the meta data item to be used in encoding

        time_resolution : float > 0.0
            Time resolution used when converting event into event roll.

        length_frames : int
            Length of encoded segment in frames

        length_seconds : float > 0.0
            Length of encoded segment in seconds

        """

        # Inject initialization parameters back to kwargs
        kwargs.update({
            'label_list': label_list,
            'time_resolution': time_resolution,
            'length_frames': length_frames,
            'length_seconds': length_seconds,
        })

        # Run super init to call init of mixins too
        super(ManyHotEncodingProcessor, self).__init__(**kwargs)

        self.focus_field = focus_field
        self.encoder = ManyHotEncoder(**self.init_parameters)

    def process(self,
                data=None,
                focus_field=None,
                length_frames=None,
                length_seconds=None,
                store_processing_chain=False,
                **kwargs):
        """Encode metadata

        Parameters
        ----------
        data : MetaDataContainer
            Meta data to encode.

        focus_field : str
            Field from the meta data item to be used in encoding. If None, one given as parameter for
            class constructor is used.

        length_frames : int
            Length of encoded segment in frames. If None, one given as parameter for class constructor is used.

        length_seconds : float > 0.0
            Length of encoded segment in seconds. If None, one given as parameter for class constructor is used.

        store_processing_chain : bool
            Store processing chain to data container returned
            Default value False

        Returns
        -------
        BinaryMatrixContainer

        """

        from dcase_util.containers import MetaDataContainer

        if focus_field is None:
            focus_field = self.focus_field

        if isinstance(data, MetaDataContainer):
            if len(data) > 0:
                label_list = data[0].get(focus_field)
                if isinstance(label_list, str):
                    label_list = [label_list]

            # Do processing
            self.encoder.encode(label_list=label_list,
                                length_frames=length_frames,
                                length_seconds=length_seconds)

            if store_processing_chain:
                # Get processing chain item
                processing_chain_item = self.get_processing_chain_item()

                if 'process_parameters' not in processing_chain_item:
                    processing_chain_item['process_parameters'] = {}

                processing_chain_item['process_parameters'][
                    'focus_field'] = focus_field
                processing_chain_item['process_parameters'][
                    'length_frames'] = length_frames

                # Create processing chain to be stored in the container, and push chain item into it
                if hasattr(data, 'processing_chain'):
                    data.processing_chain.push_processor(
                        **processing_chain_item)
                    processing_chain = data.processing_chain

                else:
                    processing_chain = ProcessingChain().push_processor(
                        **processing_chain_item)

            else:
                processing_chain = None

            from dcase_util.containers import BinaryMatrix2DContainer
            container = BinaryMatrix2DContainer(
                data=self.encoder.data,
                label_list=self.encoder.label_list,
                time_resolution=self.encoder.time_resolution,
                processing_chain=processing_chain)

            return container

        else:
            message = '{name}: Wrong input data type, type required [{input_type}].'.format(
                name=self.__class__.__name__, input_type=self.input_type)

            self.logger.exception(message)
            raise ValueError(message)
Ejemplo n.º 6
0
def main(config='config/ReLU/0Pool/crnn_maxpool.yaml', **kwargs):
    """Trains a model on the given features and vocab.

    :features: str: Input features. Needs to be kaldi formatted file
    :config: A training configuration. Note that all parameters in the config can also be manually adjusted with --ARG=VALUE
    :returns: None
    """

    config_parameters = parse_config_or_kwargs(config, **kwargs)
    outputdir = os.path.join(
        config_parameters['outputpath'],
        config_parameters['model'],
        datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%f'))
    try:
        os.makedirs(outputdir)
    except IOError:
        pass
    logger = genlogger(outputdir, 'train.log')
    logger.info("Storing data at: {}".format(outputdir))
    logger.info("<== Passed Arguments ==>")
    # Print arguments into logs
    for line in pformat(config_parameters).split('\n'):
        logger.info(line)

    kaldi_string = parsecopyfeats(
        config_parameters['features'], **config_parameters['feature_args'])

    scaler = getattr(
        pre, config_parameters['scaler'])(
        **config_parameters['scaler_args'])
    inputdim = -1
    logger.info(
        "<== Estimating Scaler ({}) ==>".format(
            scaler.__class__.__name__))
    for kid, feat in kaldi_io.read_mat_ark(kaldi_string):
        scaler.partial_fit(feat)
        inputdim = feat.shape[-1]
    assert inputdim > 0, "Reading inputstream failed"
    logger.info(
        "Features: {} Input dimension: {}".format(
            config_parameters['features'],
            inputdim))
    logger.info("<== Labels ==>")
    label_df = pd.read_csv(config_parameters['labels'], sep='\t')
    label_df.event_labels = label_df.event_labels.str.split(',')
    label_df = label_df.set_index('filename')
    uniquelabels = list(np.unique(
        [item
         for row in label_df.event_labels.values
         for item in row]))
    many_hot_encoder = ManyHotEncoder(
        label_list=uniquelabels,
        time_resolution=1
    )
    label_df['manyhot'] = label_df['event_labels'].apply(
        lambda x: many_hot_encoder.encode(x, 1).data.flatten())

    utt_labels = label_df.loc[:, 'manyhot'].to_dict()

    train_dataloader, cv_dataloader = create_dataloader_train_cv(
        kaldi_string,
        utt_labels,
        transform=scaler.transform,
        **config_parameters['dataloader_args'])
    model = getattr(
        models,
        config_parameters['model'])(
        inputdim=inputdim,
        output_size=len(uniquelabels),
        **config_parameters['model_args'])
    logger.info("<== Model ==>")
    for line in pformat(model).split('\n'):
        logger.info(line)
    optimizer = getattr(
        torch.optim, config_parameters['optimizer'])(
        model.parameters(),
        **config_parameters['optimizer_args'])

    scheduler = getattr(
        torch.optim.lr_scheduler,
        config_parameters['scheduler'])(
        optimizer,
        **config_parameters['scheduler_args'])
    criterion = getattr(losses, config_parameters['loss'])(
        **config_parameters['loss_args'])

    trainedmodelpath = os.path.join(outputdir, 'model.th')

    model = model.to(device)
    criterion_improved = criterion_improver(
        config_parameters['improvecriterion'])
    header = [
        'Epoch',
        'UttLoss(T)',
        'UttLoss(CV)',
        "UttAcc(T)",
        "UttAcc(CV)",
        "mAUC(CV)"]
    for line in tp.header(
        header,
            style='grid').split('\n'):
        logger.info(line)

    poolingfunction_name = config_parameters['poolingfunction']
    pooling_function = parse_poolingfunction(poolingfunction_name)
    for epoch in range(1, config_parameters['epochs']+1):
        train_utt_loss_mean_std, train_utt_acc, train_auc_utt = runepoch(
            train_dataloader, model, criterion, optimizer, dotrain=True, poolfun=pooling_function)
        cv_utt_loss_mean_std, cv_utt_acc, cv_auc_utt = runepoch(
            cv_dataloader, model,  criterion, dotrain=False, poolfun=pooling_function)
        logger.info(
            tp.row(
                (epoch,) +
                (train_utt_loss_mean_std[0],
                 cv_utt_loss_mean_std[0],
                 train_utt_acc, cv_utt_acc, cv_auc_utt),
                style='grid'))
        epoch_meanloss = cv_utt_loss_mean_std[0]
        if epoch % config_parameters['saveinterval'] == 0:
            torch.save({'model': model,
                        'scaler': scaler,
                        'encoder': many_hot_encoder,
                        'config': config_parameters},
                       os.path.join(outputdir, 'model_{}.th'.format(epoch)))
        # ReduceOnPlateau needs a value to work
        schedarg = epoch_meanloss if scheduler.__class__.__name__ == 'ReduceLROnPlateau' else None
        scheduler.step(schedarg)
        if criterion_improved(epoch_meanloss):
            torch.save({'model': model,
                        'scaler': scaler,
                        'encoder': many_hot_encoder,
                        'config': config_parameters},
                       trainedmodelpath)
        if optimizer.param_groups[0]['lr'] < 1e-7:
            break
    logger.info(tp.bottom(len(header), style='grid'))
    logger.info("Results are in: {}".format(outputdir))
    return outputdir