コード例 #1
0
ファイル: test_commit.py プロジェクト: indexphonemica/data
def v(str):
    c = iphon_configparser.parser()
    c.read_string(string=str)
    return commit.validate(c)
コード例 #2
0
	if args.bibkey:
		ref_info.update(from_bibkey(args.glottocode, args.bibkey))
	if args.sil_pacific:
		ref_info.update(from_sil_pacific(args.sil_pacific))

	if args.phon_invs_tb:
		args.simple = True
		args.bibkey = 'hh:hld:Namkung:Tibeto-Burman'
		ref_info.update(from_bibkey(args.glottocode, args.bibkey))
		ref_info['url'] = 'https://stedt.berkeley.edu/pubs_and_prods/STEDT_Monograph3_Phonological-Inv-TB.pdf'
		ref_info['author'] = 'Namkung, Ju'

	ini_path = find_path(args.glottocode)

	# build the file
	ini = iphon_configparser.parser()
	ini['core'] = {
		'name': args.name
	,	'glottocode': args.glottocode
	,	'dialect': 'OPTIONAL'
	,	'dialect_name': args.dialect_name
	}

	ini['source'] = {
		'glottolog': (args.bibkey if args.bibkey else "IDEAL")
	,	'url':       maybe(ref_info, 'url',       "IDEAL")
	,	'doi':		 maybe(ref_info, 'doi',       "OPTIONAL")
	,	'author':    maybe(ref_info, 'author',    "OPTIONAL (but REQUIRED if there's no glottolog ID)")
	,	'title':     maybe(ref_info, 'title',     "OPTIONAL (but REQUIRED if there's no glottolog ID)")
	,	'publisher': maybe(ref_info, 'publisher', "OPTIONAL")
	,	'volume':    maybe(ref_info, 'volume',    "OPTIONAL")
コード例 #3
0
ファイル: commit.py プロジェクト: indexphonemica/data
	if len(phonemes) == 0 or phonemes[0] == 'required':
		raise MissingPropertyError('No phonemes given')

	if 'y' in phonemes:
		print('Warning: /y/ listed in phonemes - make sure you don\'t mean /j/!')

	canonical_phonemes = [get_canonical(phoneme) for phoneme in phonemes]

	# -- Allophonic rules tests --
	for rule_raw in doculect['allophonic_rules']:
		rule = parse_allophonic_rule(rule_raw)
		for phoneme in rule['phonemes']:
			if phoneme not in canonical_phonemes:
				raise InvalidPropertyError('Phoneme {} in rule {} not listed as canonical in phonemes section'.format(phoneme, rule))

	return True

if __name__ == '__main__':
	filename = sys.argv[1]
	doculect = iphon_configparser.parser()
	file_path = path.join('doculects', '{}.ini'.format(filename))

	if not(path.isdir('doculects')):
		raise NotFoundError('Doculects directory not found - this script must be run from the main IPHON directory')

	if not(path.isfile(file_path)):
		raise NotFoundError('File not found')

	doculect.read(file_path, encoding='utf-8')
	validate(doculect) # if it's invalid, this will throw an exception
	subprocess.run(['git', 'add', file_path])
コード例 #4
0
def read_ini(path, sql):
    ini = iphon_configparser.parser()
    ini.read(path, encoding='utf-8')

    validate(ini)

    doculect = OrderedDict()

    doculect['inventory_id'] = os.path.split(
        path)[-1][:-4]  # get the filename minus the .ini

    doculect[DOCULECT_NAME_COL] = ini['core']['name']
    doculect['glottocode'] = ini['core']['glottocode']
    doculect['dialect'] = maybe(ini['core'], 'dialect')
    doculect['dialect_name'] = maybe(ini['core'], 'dialect_name')

    doculect['notes'] = '\n'.join(maybe(ini, 'notes', []))
    if doculect['notes'] == '':
        doculect['notes'] = None

    doculect['source_bibkey'] = maybe(ini['source'],
                                      'glottolog',
                                      filters=INI_DEFAULTS)
    doculect['source_url'] = maybe(ini['source'], 'url', filters=INI_DEFAULTS)
    doculect['source_author'] = maybe(ini['source'],
                                      'author',
                                      filters=INI_DEFAULTS)
    doculect['source_title'] = maybe(ini['source'],
                                     'title',
                                     filters=INI_DEFAULTS)
    doculect['source_publisher'] = maybe(ini['source'],
                                         'publisher',
                                         filters=INI_DEFAULTS)
    doculect['source_volume'] = maybe(ini['source'],
                                      'volume',
                                      filters=INI_DEFAULTS)
    doculect['source_number'] = maybe(ini['source'],
                                      'number',
                                      filters=INI_DEFAULTS)
    doculect['source_year'] = maybe(ini['source'],
                                    'year',
                                    filters=INI_DEFAULTS)
    doculect['source_pages'] = maybe(ini['source'],
                                     'pages',
                                     filters=INI_DEFAULTS)
    doculect['source_doi'] = maybe(ini['source'], 'doi', filters=INI_DEFAULTS)

    if doculect['source_year'] == 'Unknown':
        doculect['source_year'] = None

    phonemes = list(ini['phonemes'])
    allophonic_rules = [
        parse_allophonic_rule(a) for a in ini['allophonic_rules']
    ]

    # Start writing - first the doculect...
    insert('doculects', doculect, return_id=True, sql=sql)
    doculect_id = sql.fetchone()[0]
    # ...then the language, if necessary...
    language_id = find_or_create_language(doculect['glottocode'], sql=sql)
    # ...then the segments...
    # We'll handle featuralization later.
    # We'll also figure out how to store alternate forms later;
    # for now, we'll mostly mirror PSMITH's db structure,
    # and store non-canonical forms (that aren't listed as allophones) as conditionless allophonic rules.
    # (The eventual goal is to have everything in Haskell, so this is good enough for v0.1,
    # but it's not strictly correct.)
    # Another thing we'll figure out later is phoneme junctions in allophonic rules -
    # e.g. s+i > s̩ / unstressed.
    # These will be stored as:
    # s > s̩ / unstressed (_+i)
    # i > s̩ / unstressed (s+_)
    # Which also isn't strictly correct, but isn't good enough.
    # Rules with + in the output are just ignored, because I don't know how they should be handled, and see above re: deadlines.
    # Maybe we want NoSQL for the final DB.

    doculect_segment_ids = {}
    for phoneme_txt in phonemes:
        phoneme = parse_phoneme(phoneme_txt)
        canonical_form_id = find_or_create_segment(phoneme['canonical_form'],
                                                   sql=sql)
        noncanonical_forms = phoneme['noncanonical_forms']
        # OK, let's just do this as string processing to ensure consistency
        form_rule_str = '{} > {} / (non-canonical form - i.e. alternate representation of the phoneme)'
        form_rules = [
            parse_allophonic_rule(
                form_rule_str.format(phoneme['canonical_form'],
                                     noncanonical_form))
            for noncanonical_form in noncanonical_forms
        ]
        allophonic_rules += form_rules

        # Then build the doculect_segments, and save them in a mapping of phoneme -> doculect_segment_id for this doculect.
        insert(DOC_SEG_JOIN_TBL,
               OrderedDict({
                   'doculect_id': doculect_id,
                   'segment_id': canonical_form_id,
                   'marginal': phoneme['marginal'],
                   'loan': phoneme['loan']
               }),
               return_id=True,
               sql=sql)
        doculect_segment_ids[phoneme['canonical_form']] = sql.fetchone()[0]

    # ...then the allophones...
    for allophonic_rule in allophonic_rules:
        serialized_rules = []
        # Note that 'variation' refers to whether the rule itself is obligatory or optional,
        # not whether there's variance in outputs (all of which are distinct from the input) of the rule.
        # That is, the presence of variation means the rule may output its input,
        # and the absence of variation means it may not.
        # That's the ideal, anyway. What +variation means here *in practice* is the presence of a ~ right after the >.

        for allophone in allophonic_rule['allophones']:
            # Skip anything with + in the output because we can't handle that in v0.1
            if '+' in allophone:
                continue

            allophone_segment_id = find_or_create_segment(allophone, sql=sql)

            # If it's a compound, store the whole compound as text, I guess.
            compound = None
            if len(allophonic_rule['phonemes']) > 1:
                compound = '+'.join(allophonic_rule['phonemes'])

            for phoneme in allophonic_rule['phonemes']:
                rule = OrderedDict({
                    'doculect_segment_id':
                    doculect_segment_ids[phoneme],
                    'allophone_id':
                    allophone_segment_id,
                    'variation':
                    allophonic_rule['rule_type'] == 'variant',
                    'compound':
                    '+'.join(allophonic_rule['phonemes'])
                    if len(allophonic_rule['phonemes']) > 1 else None,
                    'environment':
                    allophonic_rule['environment']
                })
                insert('allophones', rule, sql=sql)