Python ElementTree.parse 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: myetree

클래스/타입: ElementTree

메소드/함수: parse

hotexamples.com에서의 예제들: 6

Python ElementTree.parse - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 myetree.ElementTree.parse에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

parse(6)

SubElement(4)

ElementTree(4)

Element(1)

예제 #1

파일 보기

파일: keyWordSpotting.py 프로젝트: ms2471/MLSALT5_code

def search_queries(I, queries, lines, output_file):

    # initialize the output doc creating the root
    attrs = OrderedDict()
    attrs['kwlist_filename'] = 'IARPA-babel202b-v1.0d_conv-dev.kwlist.xml'
    attrs['language'] = 'swahili'
    attrs['system_id'] = ''
    root = ET.Element('kwslist', attrs)

    # open query file and get all the hits (over all queries)
    doc = ET.parse(queries)
    kws = doc.getroot().findall('kw')

    # for each hit in the query file
    for kw in kws:
        # get id and text (split in words and save in a list q) of the query
        kwid = kw.get('kwid')
        q = re.split('\s+', kw.find('kwtext').text)
        # ensure all words in query are lowercase
        q = [q[i].lower() for i in range(len(q))]

        # if the first word is in the transcription, then search for the whole query
        if q[0] in I:
            root, detected_kwsl = kw_detected(root, kwid)
            # get info of current word
            qlen = len(q)

            # check all occurrences of the first word in the query
            for i in I[q[0]]:
                # check if query corresponds to current block in reference and time intervals are valid
                if match_query(lines, i, qlen, q) and valid_time_gap(
                        lines, i, qlen):
                    firstinfo = re.split('\s+', lines[i])
                    lastinfo = re.split('\s+', lines[i + qlen - 1])
                    durs = [
                        float(re.split('\s+', lines[x])[3])
                        for x in range(i, i + qlen)
                    ]
                    durtot = sum(durs)
                    scores = [
                        float(re.split('\s+', lines[x])[5])
                        for x in range(i, i + qlen)
                    ]
                    # multiply the score of the words in the query
                    finalscore = reduce(operator.mul, scores, 1)
                    info = OrderedDict()
                    info['file'] = firstinfo[0]
                    info['channel'] = firstinfo[1]
                    info['tbeg'] = firstinfo[2]
                    info['dur'] = str(round(durtot, 2))
                    info['score'] = str(finalscore)
                    info['decision'] = 'YES'
                    root, detected_kwsl = append_query_result(
                        root, detected_kwsl, info)

    outdoc = ET.ElementTree(root)
    return outdoc

예제 #2

파일 보기

if len(sys.argv)<3:
	raise RuntimeError,'Run script as:\n\tpython scoreNormalization.py path_to/input_file.xml path_to/output_file.xml [gamma]'

# gamma can be tuned
gamma = 1

input_file = sys.argv[1]
output_file = sys.argv[2]
if len(sys.argv)>3:
	gamma = float(sys.argv[3])


# sum over all hits of a query:
# open input file with original scores
doc = ET.parse(input_file)
detected_kwlists = doc.getroot().findall('detected_kwlist')
# for each query detected in the file
for dkw in detected_kwlists:
	# get all the hits and sum of all their scores
	kws = dkw.findall('kw')
	sum_scores = sum([pow(float(kw.get('score')),gamma) for kw in kws])
	# for each hit update the score by dividing by the sum of the scores
	for kw in kws:
		att['file'] = kw.attrib['file']
		att['channel'] = kw.attrib['channel']
		att['tbeg'] = kw.attrib['tbeg']
		att['dur'] = kw.attrib['dur']
		att['score'] = new_score
		new_score = str(pow(float(kw.attrib['score']),gamma)/sum_scores)
		att['decision'] = kw.attrib['decision']

예제 #3

파일 보기

파일: systemCombination.py 프로젝트: ms2471/MLSALT5_code

            indent(elem, level + 1)
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
    else:
        if level and (not elem.tail or not elem.tail.strip()):
            elem.tail = i


''' ----------------- MAIN ----------------- '''

file1 = sys.argv[1]
file2 = sys.argv[2]
output_file = sys.argv[3]

# load file for system2
doc1 = ET.parse(file1)
detected_kwl_1 = doc1.getroot()

# load file for system1
tree2 = ET.parse(file2)
detected_kwl_2 = tree2.getroot()

# get all the queries in system2
queries_2 = detected_kwl_2.findall('detected_kwlist')
kwids = [kw.get('kwid') for kw in queries_2]

# for each query in system1 find the one with same kwid in system2
for query_1 in detected_kwl_1:

    kwid = query_1.get("kwid")
    query_2 = []

예제 #4

파일 보기

input_queries = sys.argv[1]
ref = sys.argv[2]
output_queries = sys.argv[3]
#print input_queries, ref, output_queries

# generate iv dictionary from the transcription ref
IV = iv_dict(ref)

# load the graphemic mapping and build the grapheme-confusion matrix CM
grph_map = 'lib/kws/grapheme.map'
with open(grph_map, 'r') as f:
    lines_map = f.readlines()
CM = generate_CM(lines_map)

# get all the hits of all the queries from the query file
doc = ET.parse(input_queries)
kws = doc.getroot().findall('kw')

# keep track of an OOV dictionary of the oov words you already encountered
# it will contain, for all the oov words, the closest iv word and the distance
OOV = {}

# for each query in the file
for kw in kws:
    kwtext = re.split('\s+', kw.find('kwtext').text)
    for i in range(len(kwtext)):
        w = kwtext[i]
        # check only the oov
        if w not in IV:
            # if w is already seen in this run we already have the info
            if w in OOV:

예제 #5

파일 보기

파일: createQueryLengthMap.py 프로젝트: ms2471/MLSALT5_code

import myetree.ElementTree as ET
import re
import sys

query_file = 'lib/kws/queries.xml'
outmap_file = 'querylength.map'

doc = ET.parse(query_file)
kws = doc.getroot().findall('kw')

# build a dictionary to store the number (cont) of queries of length n:
#    counter[n] = cont
counter = {}

with open(outmap_file, 'w') as f:

    # for each query in the file
    for kw in kws:
        # get the query id: KW202-id
        idx = re.split('-', kw.get('kwid'))[-1]
        # load the list of words
        query = [x.lower() for x in re.split('\s+', kw.find('kwtext').text)]
        # evaluate the number of words
        n = len(query)
        if n not in counter:
            counter[n] = 0
        counter[n] += 1
        line = ' '.join([str(n), str(idx), str(counter[n])])
        f.write(line + '\n')

print 'counter', counter

예제 #6

파일 보기

if error:
	raise RuntimeError,'Run script as:\n\tpython morpoDecomposition.py path_to/input_file.{ctm, xml} path_to/dict_file.dct path_to/output_file.{ctm, xml}'


# read morpological file and build dictionary
d = open(dct)
dct_lines = d.readlines()
d.close()
D = make_dict(dct_lines)

extension = input_f.split('.')[-1]

if extension=='xml':
	# ---- file xml: it's the query.xml
	# open query file and get all hits for all queries
	doc = ET.parse(input_f)
	kws = doc.getroot().findall('kw')

	# for each hit in the file
	for kw in kws:
		kwtext = re.split('\s+', kw.find('kwtext').text)
		text = ''
		# split each word
		for w in kwtext:
			# get decomposition for w from morphological dictionary D
			decomposition = D[w]
			# update text of the tree node
			for s in decomposition:
				text += s+' '
		# remove (eventual) last space
		if text[-1]==' ':