def compile_metadata(dirty_metadata):
    number_pattern = re.compile(r'[0-9]+')
    metadata = {}

    # Detect duplicate keys, and prepare dummy list assign values to ~~~~~~~~~~>
    key_count = Counter(x.split('[')[0] for x in dirty_metadata.keys())
    duplicate_keys = dict((k,v) for k,v in key_count.items() if v > 1)
    for k,v in duplicate_keys.items():
        metadata[k] = [None for x in range(v)]

    # Merge duplicate items, and clean out empty values ~~~~~~~~~~~~~~~~~~~~~~~>
    for k,v in dirty_metadata.items():
        if v == '' or v is None:
            continue
        unique_key = k.split('[')[0]
        if unique_key in duplicate_keys:
            number_match = number_pattern.search(k)
            if not number_match:
                k_index = 0
            else:
                k_index = int(number_match.group())
            metadata[unique_key][k_index] = v
        else:
            metadata[k] = v

    # Filter out None sub-values ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~>
    for k,v in metadata.items():
        if type(v) == list:
            metadata[k] = [x for x in v if x is not None]

    # Filter out None values, and return clean dictionary ~~~~~~~~~~~~~~~~~~~~~>
    return dict((k,v) for k,v in metadata.items() if v)
Esempio n. 2
0
def format_text(dict_format, entries, accent=None, phoneset=None, encoding='windows-1252'):
	fmt = dict_formats[dict_format]
	if not accent:
		accent = fmt['accent']
	if not phoneset:
		phoneset = fmt['phoneset']
	if phoneset == 'ipa':
		encoding = 'utf-8'
	phonemeset = load_phonemes(accent, phoneset)
	for word, context, phonemes, comment, metadata, error in entries:
		if error:
			print(error, file=sys.stderr)
			continue
		components = []
		if word:
			components.append('entry')
			word = fmt['word'](word)
		if context:
			components.append('context')
		if comment != None:
			if metadata != None:
				meta = []
				for key, values in sorted(metadata.items()):
					meta.extend(['{0}={1}'.format(key, value) for value in values])
				comment = '@@ {0} @@{1}'.format(' '.join(meta), comment)
			components.append('comment')
		if phonemes:
			phonemes = phonemeset.format(phonemes)
		if len(components) == 0:
			print()
		else:
			printf(fmt['-'.join(components)], encoding, word, context, phonemes, comment)
Esempio n. 3
0
def format_text(dict_format,
                entries,
                accent=None,
                phoneset=None,
                encoding='windows-1252'):
    fmt = dict_formats[dict_format]
    if not accent:
        accent = fmt['accent']
    if not phoneset:
        phoneset = fmt['phoneset']
    if phoneset == 'ipa':
        encoding = 'utf-8'
    phonemeset = load_phonemes(accent, phoneset)
    for word, context, phonemes, comment, metadata, error in entries:
        if error:
            print(error, file=sys.stderr)
            continue
        components = []
        if word:
            components.append('entry')
            word = fmt['word'](word)
        if context:
            components.append('context')
        if comment != None:
            if metadata != None:
                meta = []
                for key, values in sorted(metadata.items()):
                    meta.extend(
                        ['{0}={1}'.format(key, value) for value in values])
                comment = '@@ {0} @@{1}'.format(' '.join(meta), comment)
            components.append('comment')
        if phonemes:
            phonemes = phonemeset.format(phonemes)
        if len(components) == 0:
            print()
        else:
            printf(fmt['-'.join(components)], encoding, word, context,
                   phonemes, comment)
Esempio n. 4
0
def merge(metadatas):
    """merge
    
    given a list of metadatas
    return a dictionary of the union of all of them
    raise an exception if there is a conflict
    """
    all_items = []
    for metadata in metadatas:
        try:
            all_items += metadata.items()
        except AttributeError:
            continue
    if all_items == []:
        return None
    all_items.sort()
    cur_item = all_items[0]
    for item in all_items:
        if cur_item == item:
            continue
        if cur_item[0] == item[0]:
            raise MergeConflict(cur_item, item)
        cur_item = item
    return dict(all_items)
def merge(metadatas):
    """merge
    
    given a list of metadatas
    return a dictionary of the union of all of them
    raise an exception if there is a conflict
    """
    all_items = []
    for metadata in metadatas:
        try:
            all_items += metadata.items()
        except AttributeError:
            continue
    if all_items == []:
        return None
    all_items.sort()
    cur_item = all_items[0]
    for item in all_items:
        if cur_item == item:
            continue
        if cur_item[0] == item[0]:
            raise MergeConflict(cur_item, item)
        cur_item = item
    return dict(all_items)