Ejemplo n.º 1
0
'''
JSON output module (for general JSON writing operations).

.. moduleauthor:: Chris Fournier <*****@*****.**>
'''
from __future__ import absolute_import
import json
import os
import codecs
from segeval.util.lang import enum

Field = enum(
    # Property fields
    segmentation_type='segmentation_type',
    # Structural fields
    items='items')

SegmentationType = enum(linear='linear')


def __write_json__(filepath, data):
    '''
    Write a JSON file using the given data.
    '''
    # Create a default filename if a dir is specified
    if os.path.isdir(filepath):
        filepath = os.path.join(filepath, 'output.json')
    # Open file
    json_file = codecs.open(filepath, 'w+', 'utf-8')
    try:
        json.dump(data, fp=json_file, sort_keys=True, indent=4)
Ejemplo n.º 2
0
"""
Segmentation encoding format converstion utilities.

.. moduleauthor:: Chris Fournier <*****@*****.**>
"""
from __future__ import absolute_import
from itertools import groupby
from segeval.util.lang import enum


BoundaryFormat = enum(position="position", mass="mass", sets="sets", nltk="nltk")


def convert_positions_to_masses(positions):
    """
    Convert an ordered sequence of boundary position labels into a
    sequence of segment masses, e.g., ``[1,1,1,1,1,2,2,2,3,3,3,3,3]`` becomes
    ``[5,3,5]``.

    :param segments: Ordered sequence of which segments a unit belongs to.
    :type segments: tuple

    .. deprecated:: 1.0
    """
    return tuple([len(list(group)) for _, group in groupby(positions)])


def convert_masses_to_positions(masses):
    """
    Converts a sequence of segment masses into an ordered sequence of section
    labels for each unit, e.g., ``[5,3,5]`` becomes
Ejemplo n.º 3
0
'''
Machine learning metric package.  This package a variety of traditional machine
learning metrics that have been adapted for use in segmentation.

.. moduleauthor:: Chris Fournier <*****@*****.**>
'''
from __future__ import absolute_import, division
from decimal import Decimal
from collections import defaultdict
from segeval.util import SegmentationMetricError
from segeval.util.math import mean
from segeval.util.lang import enum


Average = enum('micro', 'macro')


def __value_micro_macro__(fnc, arguments, classification=None,
                          version=Average.micro):

    def __compute__(fnc, classes, arguments, classification, version):
        if classification is None:
            if version is Average.micro:
                # Micro-average
                numerator, denominator = 0, 0
                for classification in classes:
                    arguments['classification'] = classification
                    arguments['return_parts'] = True
                    class_numerator, class_denominator = fnc(**arguments)
                    numerator += class_numerator
                    denominator += class_denominator
Ejemplo n.º 4
0
'''
Machine learning metric package.  This package a variety of traditional machine
learning metrics that have been adapted for use in segmentation.

.. moduleauthor:: Chris Fournier <*****@*****.**>
'''
from __future__ import absolute_import, division
from decimal import Decimal
from collections import defaultdict
from segeval.util import SegmentationMetricError
from segeval.util.math import mean
from segeval.util.lang import enum

Average = enum('micro', 'macro')


def __value_micro_macro__(fnc,
                          arguments,
                          classification=None,
                          version=Average.micro):
    def __compute__(fnc, classes, arguments, classification, version):
        if classification is None:
            if version is Average.micro:
                # Micro-average
                numerator, denominator = 0, 0
                for classification in classes:
                    arguments['classification'] = classification
                    arguments['return_parts'] = True
                    class_numerator, class_denominator = fnc(**arguments)
                    numerator += class_numerator
                    denominator += class_denominator
Ejemplo n.º 5
0
'''
JSON output module (for general JSON writing operations).

.. moduleauthor:: Chris Fournier <*****@*****.**>
'''
from __future__ import absolute_import
import json
import os
import codecs
from segeval.util.lang import enum


Field = enum(
    # Property fields
    segmentation_type='segmentation_type',
    # Structural fields
    items='items'
)

SegmentationType = enum(linear='linear')


def __write_json__(filepath, data):
    '''
    Write a JSON file using the given data.
    '''
    # Create a default filename if a dir is specified
    if os.path.isdir(filepath):
        filepath = os.path.join(filepath, 'output.json')
    # Open file
    json_file = codecs.open(filepath, 'w+', 'utf-8')
Ejemplo n.º 6
0
'''
Segmentation encoding format converstion utilities.

.. moduleauthor:: Chris Fournier <*****@*****.**>
'''
from __future__ import absolute_import
from itertools import groupby
from segeval.util.lang import enum


BoundaryFormat = enum(position='position', mass='mass', sets='sets', nltk='nltk')


def convert_positions_to_masses(positions):
    '''
    Convert an ordered sequence of boundary position labels into a
    sequence of segment masses, e.g., ``[1,1,1,1,1,2,2,2,3,3,3,3,3]`` becomes
    ``[5,3,5]``.

    :param segments: Ordered sequence of which segments a unit belongs to.
    :type segments: tuple

    .. deprecated:: 1.0
    '''
    return tuple([len(list(group)) for _, group in groupby(positions)])


def convert_masses_to_positions(masses):
    '''
    Converts a sequence of segment masses into an ordered sequence of section
    labels for each unit, e.g., ``[5,3,5]`` becomes