''' JSON output module (for general JSON writing operations). .. moduleauthor:: Chris Fournier <*****@*****.**> ''' from __future__ import absolute_import import json import os import codecs from segeval.util.lang import enum Field = enum( # Property fields segmentation_type='segmentation_type', # Structural fields items='items') SegmentationType = enum(linear='linear') def __write_json__(filepath, data): ''' Write a JSON file using the given data. ''' # Create a default filename if a dir is specified if os.path.isdir(filepath): filepath = os.path.join(filepath, 'output.json') # Open file json_file = codecs.open(filepath, 'w+', 'utf-8') try: json.dump(data, fp=json_file, sort_keys=True, indent=4)
""" Segmentation encoding format converstion utilities. .. moduleauthor:: Chris Fournier <*****@*****.**> """ from __future__ import absolute_import from itertools import groupby from segeval.util.lang import enum BoundaryFormat = enum(position="position", mass="mass", sets="sets", nltk="nltk") def convert_positions_to_masses(positions): """ Convert an ordered sequence of boundary position labels into a sequence of segment masses, e.g., ``[1,1,1,1,1,2,2,2,3,3,3,3,3]`` becomes ``[5,3,5]``. :param segments: Ordered sequence of which segments a unit belongs to. :type segments: tuple .. deprecated:: 1.0 """ return tuple([len(list(group)) for _, group in groupby(positions)]) def convert_masses_to_positions(masses): """ Converts a sequence of segment masses into an ordered sequence of section labels for each unit, e.g., ``[5,3,5]`` becomes
''' Machine learning metric package. This package a variety of traditional machine learning metrics that have been adapted for use in segmentation. .. moduleauthor:: Chris Fournier <*****@*****.**> ''' from __future__ import absolute_import, division from decimal import Decimal from collections import defaultdict from segeval.util import SegmentationMetricError from segeval.util.math import mean from segeval.util.lang import enum Average = enum('micro', 'macro') def __value_micro_macro__(fnc, arguments, classification=None, version=Average.micro): def __compute__(fnc, classes, arguments, classification, version): if classification is None: if version is Average.micro: # Micro-average numerator, denominator = 0, 0 for classification in classes: arguments['classification'] = classification arguments['return_parts'] = True class_numerator, class_denominator = fnc(**arguments) numerator += class_numerator denominator += class_denominator
''' Machine learning metric package. This package a variety of traditional machine learning metrics that have been adapted for use in segmentation. .. moduleauthor:: Chris Fournier <*****@*****.**> ''' from __future__ import absolute_import, division from decimal import Decimal from collections import defaultdict from segeval.util import SegmentationMetricError from segeval.util.math import mean from segeval.util.lang import enum Average = enum('micro', 'macro') def __value_micro_macro__(fnc, arguments, classification=None, version=Average.micro): def __compute__(fnc, classes, arguments, classification, version): if classification is None: if version is Average.micro: # Micro-average numerator, denominator = 0, 0 for classification in classes: arguments['classification'] = classification arguments['return_parts'] = True class_numerator, class_denominator = fnc(**arguments) numerator += class_numerator denominator += class_denominator
''' JSON output module (for general JSON writing operations). .. moduleauthor:: Chris Fournier <*****@*****.**> ''' from __future__ import absolute_import import json import os import codecs from segeval.util.lang import enum Field = enum( # Property fields segmentation_type='segmentation_type', # Structural fields items='items' ) SegmentationType = enum(linear='linear') def __write_json__(filepath, data): ''' Write a JSON file using the given data. ''' # Create a default filename if a dir is specified if os.path.isdir(filepath): filepath = os.path.join(filepath, 'output.json') # Open file json_file = codecs.open(filepath, 'w+', 'utf-8')
''' Segmentation encoding format converstion utilities. .. moduleauthor:: Chris Fournier <*****@*****.**> ''' from __future__ import absolute_import from itertools import groupby from segeval.util.lang import enum BoundaryFormat = enum(position='position', mass='mass', sets='sets', nltk='nltk') def convert_positions_to_masses(positions): ''' Convert an ordered sequence of boundary position labels into a sequence of segment masses, e.g., ``[1,1,1,1,1,2,2,2,3,3,3,3,3]`` becomes ``[5,3,5]``. :param segments: Ordered sequence of which segments a unit belongs to. :type segments: tuple .. deprecated:: 1.0 ''' return tuple([len(list(group)) for _, group in groupby(positions)]) def convert_masses_to_positions(masses): ''' Converts a sequence of segment masses into an ordered sequence of section labels for each unit, e.g., ``[5,3,5]`` becomes