Python DataSet.freeze 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: audeep.backend.data.data_set

클래스/타입: DataSet

메소드/함수: freeze

hotexamples.com에서의 예제들: 2

Python DataSet.freeze - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 audeep.backend.data.data_set.DataSet.freeze에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

partitions(3)

freeze(2)

scaled(2)

split(2)

copy(1)

shuffled(1)

with_cv_folds(1)

with_feature_dimensions(1)

예제 #1

파일 보기

파일: eval_tools.py 프로젝트: zeroQiaoba/auDeep

def create_cv_setup(data_set: DataSet, num_folds: int) -> DataSet:
    """
    Add a randomly created cross-validation setup to the specified data set.
    
    f the specified data set contains multiple chunks per filename, chunks from the same filename are always placed in
    the same cross-validation split. If there additionally is full label information available, this method ensures 
    that classes are balanced between folds. Please note, however, that this method does not take into account any 
    further requirements such as ensuring that samples from the same original recording are placed in the same split, if 
    that original recording has been split into multiple audio files.
    
    Parameters
    ----------
    data_set: DataSet
        The data set to which a cross-validation setup should be added
    num_folds: int
        The number of cross-validation folds to create

    Returns
    -------
    DataSet
        A copy of the specified data set, with a cross-validation setup
    """
    log = logging.getLogger(__name__)

    data_set = data_set.with_cv_folds(num_folds).shuffled()

    if num_folds == 0:
        data_set.freeze()

        return data_set

    if data_set.is_fully_labeled:
        log.info(
            "label information available - balancing classes between folds")

        # use pandas to get indices of instances of the same filename
        df = pd.DataFrame({"filenames": data_set.filenames})

        chunk_indices = [
            indices.tolist()
            for indices in df.groupby(df.filenames).groups.values()
        ]

        # in a valid data set, all chunks of the same filename have the same label, and there is at least one chunk
        # per filename
        chunk_labels = np.array(
            [data_set.labels_numeric[indices[0]] for indices in chunk_indices])

        labels, count = np.unique(chunk_labels, return_counts=True)
        label_indices = {
            l: [
                np.nonzero(chunk_labels == l)[0][i::num_folds]
                for i in range(num_folds)
            ]
            for l in labels
        }

        for l in label_indices:
            for fold, fold_indices in enumerate(label_indices[l]):
                cv_folds = [Split.TRAIN] * num_folds
                cv_folds[fold] = Split.VALID

                for chunk_index in fold_indices.tolist():
                    for index in chunk_indices[chunk_index]:
                        data_set[index].cv_folds = cv_folds
    else:
        log.info(
            "no label information available - randomly splitting into folds")

        # use pandas to get indices of instances of the same filename
        df = pd.DataFrame({"filenames": data_set.filenames})

        chunk_indices = [
            indices.tolist()
            for indices in df.groupby(df.filenames).groups.values()
        ]

        valid_split_indices = [
            chunk_indices[i::num_folds] for i in range(num_folds)
        ]

        for fold, fold_indices in enumerate(valid_split_indices):
            cv_folds = [Split.TRAIN] * num_folds
            cv_folds[fold] = Split.VALID

            for chunk_index in fold_indices:
                for index in chunk_indices[chunk_index]:
                    data_set[index].cv_folds = cv_folds

    data_set.freeze()

    return data_set

예제 #2

파일 보기

파일: eval_tools.py 프로젝트: zeroQiaoba/auDeep

def create_partitioning(data_set: DataSet, partitions: Sequence[Partition]):
    """
    Add a randomly created partitioning setup to the specified data set.
    
    If the specified data set contains multiple chunks per filename, chunks from the same filename are always placed in
    the same partition. If there additionally is full label information available, this method ensures that classes are 
    balanced between partitions. Please note, however, that this method does not take into account any further 
    requirements such as ensuring that samples from the same original recording are placed in the same partition, if 
    that original recording has been split into multiple audio files.
   
    Parameters
    ----------
    data_set: DataSet
        The data set to which a partitioning setup should be added
    partitions: list of Partition
        The partitions which should be created.
   
    Returns
    -------
    DataSet
        A copy of the specified data set, with a partitioning setup
    """
    log = logging.getLogger(__name__)

    data_set = data_set.copy().shuffled()

    num_partitions = len(partitions)

    if data_set.is_fully_labeled:
        log.info(
            "label information available - balancing classes between partitions"
        )

        # use pandas to get indices of instances of the same filename
        df = pd.DataFrame({"filenames": data_set.filenames})

        chunk_indices = [
            indices.tolist()
            for indices in df.groupby(df.filenames).groups.values()
        ]

        # in a valid data set, all chunks of the same filename have the same label, and there is at least one chunk
        # per filename
        chunk_labels = np.array(
            [data_set.labels_numeric[indices[0]] for indices in chunk_indices])

        labels, count = np.unique(chunk_labels, return_counts=True)
        label_indices = {
            l: [
                np.nonzero(chunk_labels == l)[0][i::num_partitions]
                for i in range(num_partitions)
            ]
            for l in labels
        }

        for l in label_indices:
            for partition_index, indices in enumerate(label_indices[l]):
                for index in indices:
                    data_set[index].partition = partitions[partition_index]
    else:
        log.info(
            "no label information available - randomly splitting into partitions"
        )

        # use pandas to get indices of instances of the same filename
        df = pd.DataFrame({"filenames": data_set.filenames})

        chunk_indices = [
            indices.tolist()
            for indices in df.groupby(df.filenames).groups.values()
        ]

        partition_indices = [
            chunk_indices[i::num_partitions] for i in range(num_partitions)
        ]

        for partition_index, indices in enumerate(partition_indices):
            for index in indices:
                data_set[index].partition = partitions[partition_index]

    data_set.freeze()

    return data_set